Skip to content

Commit

Permalink
start auto queues (#787)
Browse files Browse the repository at this point in the history
Add the `queue` API to the adapter interface.

Co-authored-by: Travis Ravert <travert@osc.edu>
  • Loading branch information
johrstrom and Oglopf authored Jan 17, 2023
1 parent 5da214c commit ad7cc61
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/ood_core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module Job
require "ood_core/job/script"
require "ood_core/job/info"
require "ood_core/job/cluster_info"
require "ood_core/job/queue_info"
require "ood_core/job/status"
require "ood_core/job/adapter"
require "ood_core/job/factory"
Expand Down
7 changes: 7 additions & 0 deletions lib/ood_core/job/adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,13 @@ def sanitize_job_name(job_name)
def job_name_illegal_chars
ENV["OOD_JOB_NAME_ILLEGAL_CHARS"].to_s
end

# Return the list of queues for this scheduler.
#
# @return [Array<QueueInfo>]
def queues
[]
end
end
end
end
34 changes: 34 additions & 0 deletions lib/ood_core/job/adapters/slurm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,37 @@ def all_squeue_fields
}
end

def queues
info_raw = call('scontrol', 'show', 'part', '-o')

[].tap do |ret_arr|
info_raw.each_line do |line|
ret_arr << str_to_acct_info(line)
end
end
end

private
def str_to_acct_info(line)
hsh = line.split(' ').map do |token|
m = token.match(/^(?<key>\w+)=(?<value>.+)$/)
[m[:key], m[:value]]
end.to_h.symbolize_keys

hsh[:name] = hsh[:PartitionName]
hsh[:qos] = hsh[:QoS].to_s == 'N/A' ? [] : hsh[:QoS].to_s.split(',')
hsh[:allow_accounts] = if hsh[:AllowAccounts].nil? || hsh[:AllowAccounts].to_s == 'ALL'
nil
else
hsh[:AllowAccounts].to_s.split(',')
end


hsh[:deny_accounts] = hsh[:DenyAccounts].nil? ? [] : hsh[:DenyAccounts].to_s.split(',')

OodCore::Job::QueueInfo.new(**hsh)
end

# Modify the StringIO instance by advancing past the squeue header
#
# The first two "records" should always be discarded. Consider the
Expand Down Expand Up @@ -605,6 +635,10 @@ def directive_prefix
'#SBATCH'
end

def queues
@slurm.queues
end

private
# Convert duration to seconds
def duration_in_seconds(time)
Expand Down
33 changes: 33 additions & 0 deletions lib/ood_core/job/queue_info.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

# QueueInfo is information about a given queue on a scheduler.
class OodCore::Job::QueueInfo
# The name of the queue.
attr_reader :name
alias to_s name

# The QoSes associated with this queue
attr_reader :qos

# The accounts that are allowed to use this queue.
#
# nil means ALL accounts are allowed.
attr_reader :allow_accounts

# The accounts that are not allowed to use this queue.
attr_reader :deny_accounts

def initialize(**opts)
@name = opts.fetch(:name, 'unknown')
@qos = opts.fetch(:qos, [])
@allow_accounts = opts.fetch(:allow_accounts, nil)
@deny_accounts = opts.fetch(:deny_accounts, [])
end

def to_h
instance_variables.map do |var|
name = var.to_s.gsub('@', '').to_sym
[name, send(name)]
end.to_h
end
end
14 changes: 14 additions & 0 deletions spec/fixtures/output/slurm/owens_partitions.txt

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions spec/job/adapter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,10 @@
end
end
end

describe '#queues' do
it 'returns an empty array' do
expect(adapter.queues).to eq([])
end
end
end
50 changes: 50 additions & 0 deletions spec/job/adapters/slurm_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1226,4 +1226,54 @@ def job_info(opts = {})
end
end
end

describe '#queues' do
context 'when scontrol returns successfully' do
let(:slurm) { OodCore::Job::Adapters::Slurm::Batch.new }
let(:expected_queue_names) {[
'batch', 'debug', 'gpubackfill-parallel', 'gpubackfill-serial', 'gpudebug',
'gpuparallel', 'gpuserial', 'hugemem', 'hugemem-parallel', 'longserial',
'parallel', 'quick', 'serial', 'systems'
]}
let(:quick_deny_accounts) {[
'pcon0003','pcon0014','pcon0015','pcon0016','pcon0401','pcon0008','pas1429','pcon0009',
'pcon0020','pcon0022','pcon0023','pcon0024','pcon0025','pcon0040','pcon0026','pcon0041',
'pcon0080','pcon0100','pcon0101','pcon0120','pcon0140','pcon0160','pcon0180','pcon0200',
'pas1901','pcon0220','pcon0240','pcon0260','pcon0280','pcon0300','pcon0320','pcon0340',
'pcon0341','pcon0360','pcon0380','pcon0381','pcon0441','pcon0481','pcon0501','pcon0421'
]}

it 'returns the correct queue info objects' do
# allow(Etc).to receive(:getlogin).and_return('me')
allow(Open3).to receive(:capture3)
.with({}, 'scontrol', 'show', 'part', '-o', {stdin_data: ''})
.and_return([File.read('spec/fixtures/output/slurm/owens_partitions.txt'), '', double("success?" => true)])

queues = subject.queues
expect(queues.map(&:to_s)).to eq(expected_queue_names)

systems_queue = queues.select { |q| q.name == 'systems' }.first
expect(systems_queue.allow_accounts).to eq(['root', 'pzs0708', 'pzs0710', 'pzs0722'])
expect(systems_queue.deny_accounts).to eq([])
expect(systems_queue.qos).to eq([])

quick_queue = queues.select { |q| q.name == 'quick' }.first
expect(quick_queue.allow_accounts).to eq(nil)
expect(quick_queue.deny_accounts).to eq(quick_deny_accounts)
expect(quick_queue.qos).to eq(['quick'])
end
end

context 'when scontrol fails' do
let(:slurm) { OodCore::Job::Adapters::Slurm::Batch.new }

it 'raises the error' do

allow(Open3).to receive(:capture3)
.with({}, 'scontrol', 'show', 'part', '-o', {stdin_data: ''})
.and_return(['', 'the error message', double("success?" => false)])
expect { subject.queues }.to raise_error(OodCore::Job::Adapters::Slurm::Batch::Error, 'the error message')
end
end
end
end

0 comments on commit ad7cc61

Please sign in to comment.