6
6
# the terms of the BSD 2-Clause License. A copy of the BSD 2-Clause License
7
7
# should have be distributed with this file.
8
8
9
- #TODO: add ability to get stats about the job (i.e. number of jobs, run time, etc.)
10
- #TODO: add ability to submit to remote schedulers
11
-
12
- import os , subprocess , re
9
+ import os , subprocess , re , uuid
13
10
from collections import OrderedDict
11
+ from tethyscluster .sshutils import SSHClient
12
+ from tethyscluster .exception import RemoteCommandFailed , SSHError
14
13
15
14
class Job (object ):
16
15
"""classdocs
@@ -20,7 +19,17 @@ class Job(object):
20
19
"""
21
20
22
21
23
- def __init__ (self , name , attributes = None , executable = None , arguments = None , num_jobs = 1 ):
22
+ def __init__ (self ,
23
+ name ,
24
+ attributes = None ,
25
+ executable = None ,
26
+ arguments = None ,
27
+ num_jobs = 1 ,
28
+ host = None ,
29
+ username = None ,
30
+ password = None ,
31
+ private_key = None ,
32
+ private_key_pass = None ):
24
33
"""Constructor
25
34
26
35
"""
@@ -31,11 +40,17 @@ def __init__(self, name, attributes=None, executable=None, arguments=None, num_j
31
40
object .__setattr__ (self , '_num_jobs' , int (num_jobs ))
32
41
object .__setattr__ (self , '_cluster_id' , 0 )
33
42
object .__setattr__ (self , '_job_file' , '' )
43
+ object .__setattr__ (self , '_remote' , None )
44
+ object .__setattr__ (self , '_remote_input_files' , None )
45
+ if host :
46
+ object .__setattr__ (self , '_remote' , SSHClient (host , username , password , private_key , private_key_pass ))
47
+ object .__setattr__ (self , '_remote_id' , uuid .uuid4 ().hex )
34
48
self .job_name = name
35
49
self .executable = executable
36
50
self .arguments = arguments
37
51
38
52
53
+
39
54
def __str__ (self ):
40
55
"""docstring
41
56
@@ -145,7 +160,6 @@ def job_file(self):
145
160
146
161
:return:
147
162
"""
148
- #TODO: should the job file be just the name or the name and initdir?
149
163
job_file_name = '%s.job' % (self .name )
150
164
job_file_path = os .path .join (self .initial_dir , job_file_name )
151
165
self ._job_file = job_file_path
@@ -157,12 +171,11 @@ def log_file(self):
157
171
158
172
:return:
159
173
"""
160
- #TODO: should the log file be just the name or the name and initdir?
161
174
log_file = self .get ('log' )
162
175
if not log_file :
163
176
log_file = '%s.log' % (self .name )
164
177
self .set ('log' , log_file )
165
- return self ._resolve_attribute ('log' )
178
+ return os . path . join ( self .initial_dir , self . _resolve_attribute ('log' ) )
166
179
167
180
@property
168
181
def initial_dir (self ):
@@ -172,9 +185,19 @@ def initial_dir(self):
172
185
"""
173
186
initial_dir = self ._resolve_attribute ('initialdir' )
174
187
if not initial_dir :
175
- initial_dir = os .getcwd ()
188
+ initial_dir = os .path .relpath (os .getcwd ())
189
+ if self ._remote and os .path .isabs (initial_dir ):
190
+ raise Exception ('Cannot define an absolute path as an initial_dir on a remote scheduler' )
176
191
return initial_dir
177
192
193
+ @property
194
+ def remote_input_files (self ):
195
+ return self ._remote_input_files
196
+
197
+ @remote_input_files .setter
198
+ def remote_input_files (self , files ):
199
+ self ._remote_input_files = files
200
+
178
201
def submit (self , queue = None , options = []):
179
202
"""docstring
180
203
@@ -191,9 +214,7 @@ def submit(self, queue=None, options=[]):
191
214
args .extend (options )
192
215
args .append (self .job_file )
193
216
194
- process = subprocess .Popen (args , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
195
- out ,err = process .communicate ()
196
-
217
+ out , err = self ._execute (args )
197
218
if err :
198
219
if re .match ('WARNING' ,err ):
199
220
print (err )
@@ -214,8 +235,7 @@ def remove(self, options=[], job_num=None):
214
235
args .extend (options )
215
236
job_id = '%s.%s' % (self .cluster_id , job_num ) if job_num else self .cluster_id
216
237
args .append (job_id )
217
- process = subprocess .Popen (args , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
218
- out ,err = process .communicate ()
238
+ out , err = self ._execute (args )
219
239
print (out ,err )
220
240
221
241
def edit (self ):
@@ -238,11 +258,9 @@ def wait(self, options=[], job_num=None):
238
258
args = ['condor_wait' ]
239
259
args .extend (options )
240
260
job_id = '%s.%s' % (self .cluster_id , job_num ) if job_num else str (self .cluster_id )
241
- abs_log_file = os .path .join ( self . initial_dir , self .log_file )
261
+ abs_log_file = os .path .abspath ( self .log_file )
242
262
args .extend ([abs_log_file , job_id ])
243
- print args
244
- process = subprocess .Popen (args , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
245
- process .communicate ()
263
+ out , err = self ._execute (args )
246
264
247
265
def get (self , attr , value = None ):
248
266
"""get attribute from job file
@@ -267,12 +285,40 @@ def delete(self, attr):
267
285
"""
268
286
self .attributes .pop (attr )
269
287
288
+ def sync_remote_output (self ):
289
+ self ._copy_output_from_remote ()
290
+
291
+ def _execute (self , args ):
292
+ out = None
293
+ err = None
294
+ if self ._remote :
295
+ cmd = ' ' .join (args )
296
+ try :
297
+ cmd = 'cd %s && %s' % (self ._remote_id , cmd )
298
+ out = '\n ' .join (self ._remote .execute (cmd ))
299
+ except RemoteCommandFailed as e :
300
+ err = e .output
301
+ except SSHError as e :
302
+ err = e .msg
303
+ else :
304
+ process = subprocess .Popen (args , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
305
+ out ,err = process .communicate ()
306
+
307
+ return out , err
308
+
309
+ def _copy_input_files_to_remote (self ):
310
+ self ._remote .put (self .remote_input_files , self ._remote_id )
311
+
312
+ def _copy_output_from_remote (self ):
313
+ self ._remote .get (os .path .join (self ._remote_id , self .initial_dir ))
270
314
271
315
def _write_job_file (self ):
272
316
self ._make_job_dirs ()
273
- job_file = open (self .job_file , 'w' )
317
+ job_file = self . _open (self .job_file , 'w' )
274
318
job_file .write (self .__str__ ())
275
319
job_file .close ()
320
+ if self ._remote :
321
+ self ._copy_input_files_to_remote ()
276
322
277
323
def _list_attributes (self ):
278
324
list = []
@@ -281,12 +327,21 @@ def _list_attributes(self):
281
327
list .append (k + ' = ' + str (v ))
282
328
return list
283
329
330
+ def _open (self , file_name , mode = 'w' ):
331
+ if self ._remote :
332
+ return self ._remote .remote_file (os .path .join (self ._remote_id ,file_name ), mode )
333
+ else :
334
+ return open (file_name , mode )
335
+
284
336
def _make_dir (self , dir_name ):
285
337
"""docstring
286
338
287
339
"""
288
340
try :
289
- os .makedirs (dir_name )
341
+ if self ._remote :
342
+ self ._remote .makedirs (os .path .join (self ._remote_id ,dir_name ))
343
+ else :
344
+ os .makedirs (dir_name )
290
345
except OSError :
291
346
pass
292
347
@@ -321,6 +376,12 @@ def _resolve_attribute_match(self, match):
321
376
322
377
return self .get (match .group (1 ), match .group (0 ))
323
378
379
+ def __del__ (self ):
380
+ if self ._remote :
381
+ self ._remote .execute ('rm -rf %s' % (self ._remote_id ,))
382
+ self ._remote .close ()
383
+ del self ._remote
384
+
324
385
325
386
326
387
class NoExecutable (Exception ):
0 commit comments