deepmodeling · amcadmus · Dec 10, 2019 · Dec 5, 2019 · Dec 6, 2019 · Dec 6, 2019
diff --git a/README.md b/README.md
@@ -1079,7 +1079,9 @@ The following table gives explicit descriptions on keys in param.json.
 | # Followings are keys in resources
 | numb_node | Integer | 1 | Node count required for the job
 | task_per_node | Integer | 4 | Number of CPU cores required
-| numb_gpu | Integer | 4 | Number of GPUs required
+| numb_gpu | Integer | Integer | 4 | Number of GPUs required
+| manual_cuda_devices | Interger | 1 | Used with key "manual_cuda_multiplicity" specify the gpu number
+| manual_cuda_multiplicity |Interger | 5 | Used in 01.model_devi,used with key "manual_cuda_devices" specify the MD program number running on one GPU  at the same time,dpgen will  automatically allocate MD jobs on different GPU. This can improve GPU usage for GPU like V100.
 | node_cpu | Integer | 4 | Only for LSF. The number of CPU cores on each node that should be allocated to the job.
 | source_list | List of string | "....../vasp.env" | Environment needed for certain job. For example, if "env" is in the list, 'source env' will be written in the script.
 | module_list | List of string | [ "Intel/2018", "Anaconda3"] | For example, If "Intel/2018" is in the list, "module load Intel/2018" will be written in the script.

diff --git a/dpgen/dispatcher/Batch.py b/dpgen/dispatcher/Batch.py
@@ -74,13 +74,13 @@ def sub_script(self,
         # loop over commands 
         self.cmd_cnt = 0
         try:
-            self.manual_gpu = res['manual_cuda_devices']
-        except:
-            self.manual_gpu = 0
+            self.manual_cuda_devices = res['manual_cuda_devices']
+        except KeyError:
+            self.manual_cuda_devices = 0
         try:
-            self.manual_gpu_multiplicity = res['manual_cuda_multiplicity']
-        except:
-            self.manual_gpu_multiplicity = 1
+            self.manual_cuda_multiplicity = res['manual_cuda_multiplicity']
+        except KeyError:
+            self.manual_cuda_multiplicity = 1
         for ii in range(len(cmd)):            
             # for one command
             ret += self._sub_script_inner(job_dirs,
@@ -140,7 +140,7 @@ def _sub_script_inner(self,
         for ii,jj in zip(job_dirs, args) :
             ret += 'cd %s\n' % ii
             ret += 'test $? -ne 0 && exit\n\n'
-            if self.manual_gpu <= 0:
+            if self.manual_cuda_devices <= 0:
                 ret += 'if [ ! -f tag_%d_finished ] ;then\n' % idx
                 ret += '  %s 1>> %s 2>> %s \n' % (self.sub_script_cmd(cmd, jj, res), outlog, errlog)
                 if res['allow_failure'] is False:
@@ -151,11 +151,11 @@ def _sub_script_inner(self,
             else :
                 # do not support task-wise restart
                 tmp_cmd = ' %s 1>> %s 2>> %s ' % (self.sub_script_cmd(cmd, jj, res), outlog, errlog)
-                ret += 'CUDA_VISIBLE_DEVICES=%d %s &\n\n' % ((self.cmd_cnt % self.manual_gpu), tmp_cmd)
+                ret += 'CUDA_VISIBLE_DEVICES=%d %s &\n\n' % ((self.cmd_cnt % self.manual_cuda_devices), tmp_cmd)
                 self.cmd_cnt += 1
             ret += 'cd %s\n' % self.context.remote_root
             ret += 'test $? -ne 0 && exit\n'
-            if self.manual_gpu > 0 and self.cmd_cnt % (self.manual_gpu * self.manual_gpu_multiplicity) == 0:
+            if self.manual_cuda_devices > 0 and self.cmd_cnt % (self.manual_cuda_devices * self.manual_cuda_multiplicity) == 0:
                 ret += '\nwait\n\n'
         ret += '\nwait\n\n'
         return ret