From 947c73ee6609a0b64e93f3c83e677a79b2c5ad8b Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Sun, 29 Oct 2023 22:17:31 +0100 Subject: [PATCH] Exclusively lock Runners during code executions Previously, the same runner could be used multiple times with different submissions simultaneously. This, however, yielded errors, for example when one submission time oud (causing the running to be deleted) while another submission was still executed. Admin actions, such as the shell, can be still executed regardless of any other code execution. Fixes CODEOCEAN-HG Fixes openHPI/poseidon#423 --- app/assets/javascripts/editor/editor.js.erb | 9 ++++ app/assets/javascripts/editor/evaluation.js | 5 ++ .../execution_environments_controller.rb | 6 +-- app/controllers/live_streams_controller.rb | 6 +-- .../remote_evaluation_controller.rb | 3 ++ .../request_for_comments_controller.rb | 19 ++++++-- app/controllers/submissions_controller.rb | 23 ++++++++- app/errors/runner/error.rb | 2 + app/models/execution_environment.rb | 2 +- app/models/runner.rb | 47 ++++++++++++++++--- app/models/testrun.rb | 1 + config/locales/de.yml | 1 + config/locales/en.yml | 1 + ...029172331_add_reserved_until_to_runners.rb | 7 +++ spec/models/execution_environment_spec.rb | 2 +- 15 files changed, 113 insertions(+), 21 deletions(-) create mode 100644 db/migrate/20231029172331_add_reserved_until_to_runners.rb diff --git a/app/assets/javascripts/editor/editor.js.erb b/app/assets/javascripts/editor/editor.js.erb index f05914e67..970a46968 100644 --- a/app/assets/javascripts/editor/editor.js.erb +++ b/app/assets/javascripts/editor/editor.js.erb @@ -773,6 +773,8 @@ var CodeOceanEditor = { this.showContainerDepletedMessage(); } else if (output.status === 'out_of_memory') { this.showOutOfMemoryMessage(); + } else if (output.status === 'runner_in_use') { + this.showRunnerInUseMessage(); } }, @@ -832,6 +834,13 @@ var CodeOceanEditor = { }); }, + showRunnerInUseMessage: function () { + $.flash.warning({ + icon: ['fa-solid', 'fa-triangle-exclamation'], + text: I18n.t('exercises.editor.runner_in_use') + }); + }, + showTimeoutMessage: function () { $.flash.info({ icon: ['fa-regular', 'fa-clock'], diff --git a/app/assets/javascripts/editor/evaluation.js b/app/assets/javascripts/editor/evaluation.js index 317a3a86f..c493f98d2 100644 --- a/app/assets/javascripts/editor/evaluation.js +++ b/app/assets/javascripts/editor/evaluation.js @@ -109,6 +109,11 @@ CodeOceanEditorEvaluation = { })) { this.showOutOfMemoryMessage(); } + if (_.some(response, function (result) { + return result.status === 'runner_in_use'; + })) { + this.showRunnerInUseMessage(); + } if (_.some(response, function (result) { return result.status === 'container_depleted'; })) { diff --git a/app/controllers/execution_environments_controller.rb b/app/controllers/execution_environments_controller.rb index 39c09c433..640e47de1 100644 --- a/app/controllers/execution_environments_controller.rb +++ b/app/controllers/execution_environments_controller.rb @@ -33,7 +33,7 @@ def new def execute_command runner = Runner.for(current_user, @execution_environment) @privileged_execution = ActiveModel::Type::Boolean.new.cast(params[:sudo]) || @execution_environment.privileged_execution - output = runner.execute_command(params[:command], privileged_execution: @privileged_execution, raise_exception: false) + output = runner.execute_command(params[:command], privileged_execution: @privileged_execution, raise_exception: false, exclusive: false) render json: output.except(:messages) end @@ -41,11 +41,11 @@ def list_files runner = Runner.for(current_user, @execution_environment) @privileged_execution = ActiveModel::Type::Boolean.new.cast(params[:sudo]) || @execution_environment.privileged_execution begin - files = runner.retrieve_files(path: params[:path], recursive: false, privileged_execution: @privileged_execution) + files = runner.retrieve_files(path: params[:path], recursive: false, privileged_execution: @privileged_execution, exclusive: false) downloadable_files, additional_directories = convert_files_json_to_files files js_tree = FileTree.new(downloadable_files, additional_directories, force_closed: true).to_js_tree render json: js_tree[:core][:data] - rescue Runner::Error::RunnerNotFound, Runner::Error::WorkspaceError + rescue Runner::Error::RunnerNotFound, Runner::Error::WorkspaceError, Runner::Error::RunnerInUse render json: [] end end diff --git a/app/controllers/live_streams_controller.rb b/app/controllers/live_streams_controller.rb index 47f5b3137..c51a60758 100644 --- a/app/controllers/live_streams_controller.rb +++ b/app/controllers/live_streams_controller.rb @@ -26,15 +26,15 @@ def download_arbitrary_file runner = Runner.for(current_user, @execution_environment) fallback_location = shell_execution_environment_path(@execution_environment) privileged = params[:sudo] || @execution_environment.privileged_execution? - send_runner_file(runner, desired_file, fallback_location, privileged:) + send_runner_file(runner, desired_file, fallback_location, privileged:, exclusive: false) end private - def send_runner_file(runner, desired_file, redirect_fallback = root_path, privileged: false) + def send_runner_file(runner, desired_file, redirect_fallback = root_path, privileged: false, exclusive: true) filename = File.basename(desired_file) send_stream(filename:, type: 'application/octet-stream', disposition: 'attachment') do |stream| - runner.download_file(desired_file, privileged_execution: privileged) do |chunk, overall_size, _content_type| + runner.download_file(desired_file, privileged_execution: privileged, exclusive:) do |chunk, overall_size, _content_type| unless response.committed? # Disable Rack::ETag, which would otherwise cause the response to be cached # See https://github.com/rack/rack/issues/1619#issuecomment-848460528 diff --git a/app/controllers/remote_evaluation_controller.rb b/app/controllers/remote_evaluation_controller.rb index 1c4b4b6b8..caa63f664 100644 --- a/app/controllers/remote_evaluation_controller.rb +++ b/app/controllers/remote_evaluation_controller.rb @@ -72,6 +72,9 @@ def create_and_score_submission(cause) # TODO: check token expired? {message: 'No exercise found for this validation_token! Please keep out!', status: 401} end + rescue Runner::Error::RunnerInUse => e + Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" } + {message: I18n.t('exercises.editor.runner_in_use'), status: 409} rescue Runner::Error => e Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" } {message: I18n.t('exercises.editor.depleted'), status: 503} diff --git a/app/controllers/request_for_comments_controller.rb b/app/controllers/request_for_comments_controller.rb index 634fa0489..b92de462c 100644 --- a/app/controllers/request_for_comments_controller.rb +++ b/app/controllers/request_for_comments_controller.rb @@ -167,11 +167,20 @@ def create respond_to do |format| if @request_for_comment.save - # execute the tests here and wait until they finished. - # As the same runner is used for the score and test run, no parallelization is possible - # A run is triggered from the frontend and does not need to be handled here. - @request_for_comment.submission.calculate_score(current_user) - format.json { render :show, status: :created, location: @request_for_comment } + begin + # execute the tests here and wait until they finished. + # As the same runner is used for the score and test run, no parallelization is possible + # A run is triggered from the frontend and does not need to be handled here. + @request_for_comment.submission.calculate_score(current_user) + rescue Runner::Error::RunnerInUse => e + Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" } + format.json { render json: {error: t('exercises.editor.runner_in_use'), status: :runner_in_use}, status: :conflict } + rescue Runner::Error => e + Rails.logger.debug { "Runner error while requesting comments: #{e.message}" } + format.json { render json: {danger: t('exercises.editor.depleted'), status: :container_depleted}, status: :service_unavailable } + else + format.json { render :show, status: :created, location: @request_for_comment } + end else format.html { render :new } format.json { render json: @request_for_comment.errors, status: :unprocessable_entity } diff --git a/app/controllers/submissions_controller.rb b/app/controllers/submissions_controller.rb index b60eb36a5..9283063b5 100644 --- a/app/controllers/submissions_controller.rb +++ b/app/controllers/submissions_controller.rb @@ -234,6 +234,12 @@ def run # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplex @testrun[:exit_code] ||= 137 @testrun[:output] = "out_of_memory: #{@testrun[:output]}" extract_durations(e) + rescue Runner::Error::RunnerInUse => e + send_and_store client_socket, {cmd: :status, status: :runner_in_use} + Rails.logger.debug { "Running a submission failed because the runner was already in use: #{e.message}" } + @testrun[:status] ||= :runner_in_use + @testrun[:output] = "runner_in_use: #{@testrun[:output]}" + extract_durations(e) rescue Runner::Error => e # Regardless of the specific error cause, we send a `container_depleted` status to the client. send_and_store client_socket, {cmd: :status, status: :container_depleted} @@ -266,6 +272,14 @@ def score client_socket&.send_data(JSON.dump(@submission.calculate_score(current_user))) # To enable hints when scoring a submission, uncomment the next line: # send_hints(client_socket, StructuredError.where(submission: @submission)) + rescue Runner::Error::RunnerInUse => e + extract_durations(e) + send_and_store client_socket, {cmd: :status, status: :runner_in_use} + Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" } + @testrun[:passed] = false + @testrun[:status] ||= :runner_in_use + @testrun[:output] = "runner_in_use: #{@testrun[:output]}" + save_testrun_output 'assess' rescue Runner::Error => e extract_durations(e) send_and_store client_socket, {cmd: :status, status: :container_depleted} @@ -301,10 +315,17 @@ def test # The score is stored separately, we can forward it to the client immediately client_socket&.send_data(JSON.dump(@submission.test(@file, current_user))) + rescue Runner::Error::RunnerInUse => e + extract_durations(e) + send_and_store client_socket, {cmd: :status, status: :runner_in_use} + Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" } + @testrun[:passed] = false + @testrun[:status] ||= :runner_in_use + @testrun[:output] = "runner_in_use: #{@testrun[:output]}" + save_testrun_output 'assess' rescue Runner::Error => e extract_durations(e) send_and_store client_socket, {cmd: :status, status: :container_depleted} - kill_client_socket(client_socket) Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" } Sentry.capture_exception(e) @testrun[:passed] = false diff --git a/app/errors/runner/error.rb b/app/errors/runner/error.rb index cc88de17b..0084902c4 100644 --- a/app/errors/runner/error.rb +++ b/app/errors/runner/error.rb @@ -16,6 +16,8 @@ class NotAvailable < Error; end class Unauthorized < Error; end + class RunnerInUse < Error; end + class RunnerNotFound < Error; end class FaradayError < Error; end diff --git a/app/models/execution_environment.rb b/app/models/execution_environment.rb index 7e5fa9ac2..9bb3eab75 100644 --- a/app/models/execution_environment.rb +++ b/app/models/execution_environment.rb @@ -92,7 +92,7 @@ def working_docker_image? retries = 0 begin runner = Runner.for(author, self) - output = runner.execute_command(VALIDATION_COMMAND) + output = runner.execute_command(VALIDATION_COMMAND, exclusive: false) errors.add(:docker_image, "error: #{output[:stderr]}") if output[:stderr].present? rescue Runner::Error => e # In case of an Runner::Error, we retry multiple times before giving up. diff --git a/app/models/runner.rb b/app/models/runner.rb index 293a6c185..dfcd16d1f 100644 --- a/app/models/runner.rb +++ b/app/models/runner.rb @@ -44,18 +44,24 @@ def self.for(contributor, execution_environment) end def copy_files(files) + reserve! @strategy.copy_files(files) rescue Runner::Error::RunnerNotFound request_new_id save @strategy.copy_files(files) + ensure + release! end - def download_file(...) - @strategy.download_file(...) + def download_file(desired_file, privileged_execution:, exclusive: true) + reserve! if exclusive + @strategy.download_file(desired_file, privileged_execution:) + release! if exclusive end - def retrieve_files(raise_exception: true, **) + def retrieve_files(raise_exception: true, exclusive: true, **) + reserve! if exclusive try = 0 begin if try.nonzero? @@ -77,12 +83,14 @@ def retrieve_files(raise_exception: true, **) # We forward the exception if requested raise e if raise_exception && defined?(e) && e.present? - # Otherwise, we return an hash with empty files + # Otherwise, we return an hash with empty files and release the runner + release! if exclusive {'files' => []} end end - def attach_to_execution(command, privileged_execution: false, &block) + def attach_to_execution(command, privileged_execution: false, exclusive: true, &block) + reserve! if exclusive Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Starting execution with Runner #{id} for #{contributor_type} #{contributor_id}." } starting_time = Time.zone.now begin @@ -100,11 +108,12 @@ def attach_to_execution(command, privileged_execution: false, &block) e.execution_duration = Time.zone.now - starting_time raise end + release! if exclusive Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Stopped execution with Runner #{id} for #{contributor_type} #{contributor_id}." } Time.zone.now - starting_time # execution duration end - def execute_command(command, privileged_execution: false, raise_exception: true) + def execute_command(command, privileged_execution: false, raise_exception: true, exclusive: true) output = { stdout: +'', stderr: +'', @@ -119,7 +128,7 @@ def execute_command(command, privileged_execution: false, raise_exception: true) save end - execution_time = attach_to_execution(command, privileged_execution:) do |socket, starting_time| + execution_time = attach_to_execution(command, privileged_execution:, exclusive:) do |socket, starting_time| socket.on :stderr do |data| output[:stderr] << data output[:messages].push({cmd: :write, stream: :stderr, log: data, timestamp: Time.zone.now - starting_time}) @@ -139,6 +148,9 @@ def execute_command(command, privileged_execution: false, raise_exception: true) rescue Runner::Error::OutOfMemory => e Rails.logger.debug { "Running command `#{command}` caused an out of memory error: #{e.message}" } output.merge!(status: :out_of_memory, container_execution_time: e.execution_duration) + rescue Runner::Error::RunnerInUse => e + Rails.logger.debug { "Running command `#{command}` failed because the runner was already in use: #{e.message}" } + output.merge!(status: :runner_in_use, container_execution_time: e.execution_duration) rescue Runner::Error::RunnerNotFound => e Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" } try += 1 @@ -166,6 +178,27 @@ def execute_command(command, privileged_execution: false, raise_exception: true) def destroy_at_management @strategy.destroy_at_management + update!(runner_id: nil, reserved_until: nil) + end + + def reserve! + with_lock do + if reserved_until.present? && reserved_until > Time.zone.now + @error = Runner::Error::RunnerInUse.new("The desired Runner #{id} is already in use until #{reserved_until.iso8601}.") + raise @error + else + update!(reserved_until: Time.zone.now + execution_environment.permitted_execution_time.seconds) + @error = nil + end + end + end + + def release! + return if @error.present? + + with_lock do + update!(reserved_until: nil) + end end private diff --git a/app/models/testrun.rb b/app/models/testrun.rb index ed680461a..9131790c5 100644 --- a/app/models/testrun.rb +++ b/app/models/testrun.rb @@ -14,6 +14,7 @@ class Testrun < ApplicationRecord timeout: 3, out_of_memory: 4, terminated_by_client: 5, + runner_in_use: 6, }, _default: :ok, _prefix: true validates :exit_code, numericality: {only_integer: true, min: 0, max: 255}, allow_nil: true diff --git a/config/locales/de.yml b/config/locales/de.yml index dcc637689..8c2a08d9a 100644 --- a/config/locales/de.yml +++ b/config/locales/de.yml @@ -398,6 +398,7 @@ de: network: 'Während Ihr Code läuft, ist Port %{port} unter folgender Adresse erreichbar: %{address}.' render: Anzeigen run: Ausführen + runner_in_use: Sie führen momentan Code aus. Bitte stoppen Sie die vorherige Ausführung oder warten Sie einen Moment, bevor Sie fortfahren. run_failure: Ihr Code konnte nicht auf der Plattform ausgeführt werden. run_success: Ihr Code wurde auf der Plattform ausgeführt. requestComments: Kommentare erbitten diff --git a/config/locales/en.yml b/config/locales/en.yml index a42d0ae7a..6ccb59c55 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -398,6 +398,7 @@ en: network: 'While your code is running, port %{port} is accessible using the following address: %{address}.' render: Render run: Run + runner_in_use: You are currently running code. Please stop the previous execution, or wait a moment before proceeding. run_failure: Your code could not be run. run_success: Your code was run on our servers. requestComments: Request Comments diff --git a/db/migrate/20231029172331_add_reserved_until_to_runners.rb b/db/migrate/20231029172331_add_reserved_until_to_runners.rb new file mode 100644 index 000000000..b4045eb6b --- /dev/null +++ b/db/migrate/20231029172331_add_reserved_until_to_runners.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class AddReservedUntilToRunners < ActiveRecord::Migration[7.1] + def change + add_column :runners, :reserved_until, :datetime, null: true, default: nil + end +end diff --git a/spec/models/execution_environment_spec.rb b/spec/models/execution_environment_spec.rb index bbaeddab2..fc03843e4 100644 --- a/spec/models/execution_environment_spec.rb +++ b/spec/models/execution_environment_spec.rb @@ -175,7 +175,7 @@ it 'executes the validation command' do allow(runner).to receive(:execute_command).and_return({}) working_docker_image? - expect(runner).to have_received(:execute_command).with(ExecutionEnvironment::VALIDATION_COMMAND) + expect(runner).to have_received(:execute_command).with(ExecutionEnvironment::VALIDATION_COMMAND, exclusive: false) end context 'when the command produces an error' do