Skip to content

Commit

Permalink
make sure num of available_gpus are greater equal to 1, add `availabl…
Browse files Browse the repository at this point in the history
…e_gpus` to index page
  • Loading branch information
Spico197 committed Jul 14, 2021
1 parent c953bb2 commit 6dc0eaa
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
10 changes: 6 additions & 4 deletions watchmen/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def check_gpu_info():
def check_work(queue_timeout):
logger.info("regular check")
marked_finished = []
reserved_gpus = set() # whether there can be multiple `ok` in one scan
reserved_gpus = set()
client_list = []
queue_num = 0
for client_id, client in cc.work_queue.items():
Expand All @@ -257,7 +257,8 @@ def check_work(queue_timeout):
client.status = ClientStatus.TIMEOUT
else:
client.status = ClientStatus.OK
client.queue_num = -1 # invalid client
# invalid client
client.queue_num = -1
marked_finished.append(client_id)
continue
client.queue_num = queue_num
Expand All @@ -282,11 +283,12 @@ def check_work(queue_timeout):
except RuntimeError as err:
client.msg = str(err)

client_list.append(client_id, client, ok, set(available_gpus))
client_list.append([client_id, client, ok, set(available_gpus)])
queue_num += 1

# post check and assignment, and make sure gpus of `ready` clients will not be assigned to the others
for client_id, client, ok, available_gpu_set in client_list:
if ok and len(available_gpu_set & reserved_gpus) < 1:
if ok and len(available_gpu_set) > 0 and len(available_gpu_set & reserved_gpus) < 1:
client.status = ClientStatus.READY
client.available_gpus = available_gpus
reserved_gpus |= set(client.available_gpus)
Expand Down
3 changes: 2 additions & 1 deletion watchmen/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@
<th>Mode</th>
<th>GPU Scope</th>
<th>Request GPU Num</th>
<th>Available GPUs</th>
<th>Register Time</th>
<th>Last Request Time</th>
</tr>
Expand Down Expand Up @@ -249,7 +250,7 @@
for (let i = 0; i < data.work_queue.length; i++) {
let c = data.work_queue[i]
let tr = document.createElement("tr")
tr.innerHTML = `<td>${c.queue_num}</td> <td class="${c.status}">${c.status}</td> <td>${c.id}</td> <td>${c.mode}</td> <td>${c.gpus}</td> <td>${c.req_gpu_num}</td> <td>${c.register_time}</td> <td>${c.last_request_time}</td>`
tr.innerHTML = `<td>${c.queue_num}</td> <td class="${c.status}">${c.status}</td> <td>${c.id}</td> <td>${c.mode}</td> <td>${c.gpus}</td> <td>${c.req_gpu_num}</td> <td>${c.available_gpus}</td> <td>${c.register_time}</td> <td>${c.last_request_time}</td>`
workingStats.appendChild(tr)
}
} else if (selected === "finished-queue-template") {
Expand Down

0 comments on commit 6dc0eaa

Please sign in to comment.