...
For submitting jobs, another possibility is to use the Launcher_GPU module and bind each run/task to one gpu. For this, remember to set the '--cpus-per-task' as the total number of cpus divided by the number of '--tasks-per-node'. The example bellow, runs 8 jobs in parallel. For this, OpenMM was installed locally via conda environmentloaded as module.
Code Block | ||||||||
---|---|---|---|---|---|---|---|---|
| ||||||||
#!/bin/bash -l #SBATCH --job-name=my_job #SBATCH --account=commons #SBATCH --partition=commons #SBATCH --nodes=1 #SBATCH --ntasks-per-node=8 #SBATCH --cpus-per-task=6 #SBATCH --threads-per-core=2 #SBATCH --mem-per-cpu=3G #SBATCH --gres=gpu:8 #SBATCH --time=24:00:00 #SBATCH --export=ALL echo "Submitting simulations..." module purge # Using Launcher_GPU on ARIES module load foss/2020b Launcher_GPU OpenMPI # OpenMM loaded via conda echo "Initiating conda environment:" source $HOME/anaconda3/bin/activate conda activate openmmGCC/10.2.0 OpenMPI/4.0.5 OpenMM/7.5.0 foss/2020b Launcher_GPU # This is for controling Launcher export LAUNCHER_WORKDIR=`pwd` export LAUNCHER_JOB_FILE=$PWD/launcher_jobs_sim export LAUNCHER_BIND=1 echo "Job started on " `date` echo "Running on hostname" `hostname` echo "Job $SLURM_JOB_ID is running on: $SLURM_NODELIST" echo "Job SLURM_SUBMIT_DIR is $SLURM_SUBMIT_DIR" echo "Running on $SLURM_NNODES nodes" echo "Running on $SLURM_NPROCS processors" echo "CPUS per task is $SLURM_CPUS_PER_TASK" echo "LAUNCHER_WORKDIR: $LAUNCHER_WORKDIR" echo "Number of replicas is $max_replicas" df -h # This will adjust the total number of runs to nodes*8 max_replicas=$((SLURM_NNODES*8)) rm $LAUNCHER_WORKDIR/launcher_jobs_sim &> /dev/null # Create Launcher_job_file needed by $LAUNCHER_DIR/paramrun for i in `seq 1 $max_replicas` do echo "python run_code.py input_$i output_${i}.log" >> $LAUNCHER_WORKDIR/launcher_jobs_sim done # This line launches the jobs in the launcher_jobs_sim file $LAUNCHER_DIR/paramrun echo "My job finished at:" `date` |
...