Skip to content

Commit

Permalink
Merge branch 'master' into get-cluster-stats
Browse files Browse the repository at this point in the history
  • Loading branch information
lukew3 committed Apr 7, 2022
2 parents 898395d + b311447 commit 9fcbd52
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 4 deletions.
7 changes: 6 additions & 1 deletion lib/ood_core/job/adapters/slurm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,10 @@ def get_state(st)
STATE_MAP.fetch(st, :undetermined)
end

def gpus_from_gres(gres)
gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
end

# Parse hash describing Slurm job status
def parse_job_info(v)
allocated_nodes = parse_nodes(v[:node_list])
Expand All @@ -668,7 +672,8 @@ def parse_job_info(v)
cpu_time: nil,
submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
native: v
native: v,
gpus: gpus_from_gres(v[:gres])
)
end

Expand Down
13 changes: 12 additions & 1 deletion lib/ood_core/job/info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class Info
# @return [Object] native info
attr_reader :native

# Number of gpus allocated for job
# @return [Integer, nil] allocated total number of gpus
attr_reader :gpus

# List of job array child task statuses
# @note only relevant for job arrays
# @return [Array<Task>] tasks
Expand All @@ -86,11 +90,12 @@ class Info
# @param dispatch_time [#to_i, nil] dispatch time
# @param tasks [Array<Hash>] tasks e.g. { id: '12345.owens-batch', status: :running }
# @param native [Object] native info
# @param gpus [#to_i, 0] allocated total number of gpus
def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
job_name: nil, job_owner: nil, accounting_id: nil,
procs: nil, queue_name: nil, wallclock_time: nil,
wallclock_limit: nil, cpu_time: nil, submission_time: nil,
dispatch_time: nil, native: nil, tasks: [],
dispatch_time: nil, native: nil, gpus: 0, tasks: [],
**_)
@id = id.to_s
@status = Status.new(state: status.to_sym)
Expand All @@ -111,6 +116,7 @@ def initialize(id:, status:, allocated_nodes: [], submit_host: nil,
@status = job_array_aggregate_status unless @tasks.empty?

@native = native
@gpus = gpus && gpus.to_i
end

# Create a new Info for a child task
Expand Down Expand Up @@ -147,10 +153,15 @@ def to_h
submission_time: submission_time,
dispatch_time: dispatch_time,
native: native,
gpus: gpus,
tasks: tasks
}
end

def gpu?
gpus.positive?
end

# The comparison operator
# @param other [#to_h] object to compare against
# @return [Boolean] whether objects are equivalent
Expand Down
4 changes: 2 additions & 2 deletions spec/fixtures/scripts/squeue.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env ruby

puts "\u001EACCOUNT\u001FJOBID\u001FEXEC_HOST\u001FMIN_CPUS\u001FCPUS\u001FMIN_TMP_DISK\u001FNODES\u001FEND_TIME\u001FDEPENDENCY\u001FFEATURES\u001FARRAY_JOB_ID\u001FGROUP\u001FGROUP\u001FOVER_SUBSCRIBE\u001FSOCKETS_PER_NODE\u001FJOBID\u001FCORES_PER_SOCKET\u001FNAME\u001FTHREADS_PER_CORE\u001FCOMMENT\u001FARRAY_TASK_ID\u001FTIME_LIMIT\u001FTIME_LEFT\u001FMIN_MEMORY\u001FTIME\u001FREQ_NODES\u001FNODELIST\u001FCOMMAND\u001FCONTIGUOUS\u001FQOS\u001FPARTITION\u001FPRIORITY\u001FREASON\u001FSTART_TIME\u001FST\u001FSTATE\u001FUSER\u001FUSER\u001FRESERVATION\u001FSUBMIT_TIME\u001FWCKEY\u001FLICENSES\u001FEXC_NODES\u001FCORE_SPEC\u001FNICE\u001FSCHEDNODES\u001FS:C:T\u001FWORK_DIR\u001FGRES"
puts "\u001Eoscstaff\u001F5096321\u001Fbr006\u001F1\u001F1\u001F0\u001F1\u001F2019-03-18T10:50:25\u001F\u001F(null)\u001F5096321\u001Foscstaff\u001F15312\u001FOK\u001F*\u001F5096321\u001F*\u001FInteract\u001F*\u001F(null)\u001FN/A\u001F1:00:00\u001F59:55\u001F4400M\u001F0:05\u001F\u001Fr001\u001Fbash\u001F0\u001Frm-interact\u001FRM-small\u001F3985\u001FNone\u001F2019-03-18T10:50:20\u001FCD\u001FCOMPLETED\u001Fefranz\u001F1448\u001F(null)\u001F2019-03-18T10:50:20\u001F(null)\u001F(null)\u001F\u001FN/A\u001F0\u001F(null)\u001F*:*:*\u001F/home/efranz\u001F(null)"
puts "\u001Ect4s8dp\u001F4320602\u001Fn/a\u001F28\u001F1792\u001F0\u001F64\u001FN/A\u001F\u001F(null)\u001F4320602\u001Fct4s8dp\u001F15900\u001FNO\u001F*\u001F4320602\u001F*\u001FLES-data-init\u001F*\u001F(null)\u001FN/A\u001F2-00:00:00\u001F2-00:00:00\u001F123200M\u001F0:00\u001F\u001F\u001F/scratch/ct4s8dp/kyu2/LES-data/run.q\u001F0\u001Frmlrg\u001FRM\u001F11043\u001FResources\u001FN/A\u001FPD\u001FPENDING\u001Fkyu2\u001F66288\u001F(null)\u001F2018-10-30T20:42:56\u001F(null)\u001F(null)\u001F\u001FN/A\u001F0\u001F(null)\u001F*:*:*\u001F/scratch/ct4s8dp/kyu2/LES-data\u001F(null)"
puts "\u001Eoscstaff\u001F5096321\u001Fbr006\u001F1\u001F1\u001F0\u001F1\u001F2019-03-18T10:50:25\u001F\u001F(null)\u001F5096321\u001Foscstaff\u001F15312\u001FOK\u001F*\u001F5096321\u001F*\u001FInteract\u001F*\u001F(null)\u001FN/A\u001F1:00:00\u001F59:55\u001F4400M\u001F0:05\u001F\u001Fr001\u001Fbash\u001F0\u001Frm-interact\u001FRM-small\u001F3985\u001FNone\u001F2019-03-18T10:50:20\u001FCD\u001FCOMPLETED\u001Fefranz\u001F1448\u001F(null)\u001F2019-03-18T10:50:20\u001F(null)\u001F(null)\u001F\u001FN/A\u001F0\u001F(null)\u001F*:*:*\u001F/home/efranz\u001Fgres:gpu:1,gres:gpfs"
puts "\u001Ect4s8dp\u001F4320602\u001Fn/a\u001F28\u001F1792\u001F0\u001F64\u001FN/A\u001F\u001F(null)\u001F4320602\u001Fct4s8dp\u001F15900\u001FNO\u001F*\u001F4320602\u001F*\u001FLES-data-init\u001F*\u001F(null)\u001FN/A\u001F2-00:00:00\u001F2-00:00:00\u001F123200M\u001F0:00\u001F\u001F\u001F/scratch/ct4s8dp/kyu2/LES-data/run.q\u001F0\u001Frmlrg\u001FRM\u001F11043\u001FResources\u001FN/A\u001FPD\u001FPENDING\u001Fkyu2\u001F66288\u001F(null)\u001F2018-10-30T20:42:56\u001F(null)\u001F(null)\u001F\u001FN/A\u001F0\u001F(null)\u001F*:*:*\u001F/scratch/ct4s8dp/kyu2/LES-data\u001Fgres:pfsdir:ess"
30 changes: 30 additions & 0 deletions spec/job/adapters/slurm_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ def build_script(opts = {})
expect(j1.status).to eq("completed")
expect(j1.status).to eq(OodCore::Job::Status.new(state: :completed))
expect(j1.status.to_s).to eq("completed")
expect(j1.gpus).to eq(1)
expect(j1.gpu?).to eq(true)

j2 = jobs.last
expect(j2.id).to eq("4320602")
Expand All @@ -316,6 +318,8 @@ def build_script(opts = {})
expect(j2.status).to eq("queued")
expect(j2.status).to eq(OodCore::Job::Status.new(state: :queued))
expect(j2.status.to_s).to eq("queued")
expect(j2.gpus).to eq(0)
expect(j2.gpu?).to eq(false)
end
end

Expand Down Expand Up @@ -1195,4 +1199,30 @@ def job_info(opts = {})
end
end
end

describe "#gpus_from_gres" do
batch = OodCore::Job::Adapters::Slurm::Batch.new(cluster: "owens.osc.edu", conf: "/etc/slurm/conf/", bin: nil, bin_overrides: {}, submit_host: "owens.osc.edu", strict_host_checking: false)
adapter = OodCore::Job::Adapters::Slurm.new(slurm: batch)

context "when called" do
gres_cases = [
[nil, 0],
["", 0],
["N/A", 0],
["gres:gpu:v100-32g:2", 2],
["gres:gpu:v100-32g:2,gres:pfsdir:1", 2],
["gres:third-thing:sub-thing:17,gres:gpu:v100-32g:2,gres:pfsdir:1", 2],
["gres:third-thing:sub-thing:17,gres:pfsdir:1,gres:gpu:v100-32g:2", 2],
["gres:gpu:v30-12g:2,gres:gpu:v31-32g:1", 3],
["gres:gpu:1", 1],
["gres:pfsdir:ess", 0]
]
gres_cases.each do |gc|
it "does not return the correct number of gpus when gres=\"#{gc[0]}\"" do
gpus = adapter.send(:gpus_from_gres, gc[0])
expect(gpus).to be(gc[1]);
end
end
end
end
end
8 changes: 8 additions & 0 deletions spec/job/info_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def build_info(opts = {})
it { is_expected.to respond_to(:submission_time) }
it { is_expected.to respond_to(:dispatch_time) }
it { is_expected.to respond_to(:native) }
it { is_expected.to respond_to(:gpus) }
it { is_expected.to respond_to(:to_h) }
it { is_expected.to respond_to(:tasks) }

Expand Down Expand Up @@ -151,6 +152,12 @@ def build_info(opts = {})
it { is_expected.to eq("native") }
end

describe "#gpus" do
subject { build_info(native: "gpus").native }

it { is_expected.to eq("gpus") }
end

describe "#to_h" do
subject { build_info.to_h }

Expand All @@ -170,6 +177,7 @@ def build_info(opts = {})
it { is_expected.to have_key(:submission_time) }
it { is_expected.to have_key(:dispatch_time) }
it { is_expected.to have_key(:native) }
it { is_expected.to have_key(:gpus) }
end

describe "#==" do
Expand Down

0 comments on commit 9fcbd52

Please sign in to comment.