From 5823fb4d4afa7daaa2216a78f644db0c6939b652 Mon Sep 17 00:00:00 2001 From: Atreya Shankar <35427332+atreyasha@users.noreply.github.com> Date: Tue, 8 Nov 2022 19:55:01 +0100 Subject: [PATCH 1/2] Add explicit JSON indent to all prediction dumps --- src/reading_comprehension.py | 2 +- src/sequence_classification.py | 2 +- tests/test_reading_comprehension.py | 1 + tests/test_sequence_classification.py | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/reading_comprehension.py b/src/reading_comprehension.py index c3d6a71a..70f82d43 100644 --- a/src/reading_comprehension.py +++ b/src/reading_comprehension.py @@ -623,4 +623,4 @@ def _run_train_loop(self) -> None: os.path.join(self.train_args.output_dir, "predictions.json"), "w", ) as output_file_stream: - json.dump(results.predictions, output_file_stream) + json.dump(results.predictions, output_file_stream, indent=4) diff --git a/src/sequence_classification.py b/src/sequence_classification.py index a0d9bd8c..557aa64c 100644 --- a/src/sequence_classification.py +++ b/src/sequence_classification.py @@ -400,4 +400,4 @@ def _run_train_loop(self) -> None: with open( os.path.join(self.train_args.output_dir, "predictions.json"), "w" ) as output_file_stream: - json.dump(prediction_dump, output_file_stream) + json.dump(prediction_dump, output_file_stream, indent=4) diff --git a/tests/test_reading_comprehension.py b/tests/test_reading_comprehension.py index 4d766bcb..cedca76d 100644 --- a/tests/test_reading_comprehension.py +++ b/tests/test_reading_comprehension.py @@ -1301,6 +1301,7 @@ def test__run_train_loop( }, ], mocker.ANY, + indent=4, ) else: json_open_dump.open.assert_not_called() diff --git a/tests/test_sequence_classification.py b/tests/test_sequence_classification.py index 70764c96..b3bcce22 100644 --- a/tests/test_sequence_classification.py +++ b/tests/test_sequence_classification.py @@ -938,6 +938,7 @@ def test__run_train_loop( }, ], mocker.ANY, + indent=4, ) elif task == "opp_115": json_open_dump.json_dump.assert_called_once_with( @@ -962,6 +963,7 @@ def test__run_train_loop( }, ], mocker.ANY, + indent=4, ) else: json_open_dump.json_dump.assert_called_once_with( @@ -986,6 +988,7 @@ def test__run_train_loop( }, ], mocker.ANY, + indent=4, ) else: json_open_dump.open.assert_not_called() From 1fbf0686934b5abe7b63c98922baab2549546cef Mon Sep 17 00:00:00 2001 From: Atreya Shankar <35427332+atreyasha@users.noreply.github.com> Date: Tue, 8 Nov 2022 20:23:55 +0100 Subject: [PATCH 2/2] Enable `--full_determinism` in our entrypoint Update integration tests as well --- scripts/run_privacy_glue.sh | 1 + tests/test_reproducibility.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/run_privacy_glue.sh b/scripts/run_privacy_glue.sh index 2653efe9..4c138d14 100644 --- a/scripts/run_privacy_glue.sh +++ b/scripts/run_privacy_glue.sh @@ -170,6 +170,7 @@ main() { --warmup_ratio 0.1 \ --early_stopping_patience 5 \ --report_to "$WANDB" \ + --full_determinism \ --per_device_train_batch_size "$DEVICE_BATCH_SIZE" \ --per_device_eval_batch_size "$DEVICE_BATCH_SIZE" \ "${PREPROCESSING_NUM_WORKERS[@]}" \ diff --git a/tests/test_reproducibility.py b/tests/test_reproducibility.py index e81555b7..f2a82259 100644 --- a/tests/test_reproducibility.py +++ b/tests/test_reproducibility.py @@ -88,6 +88,7 @@ def get_cli_arguments( "--learning_rate": "3e-5", "--warmup_ratio": "0.1", "--report_to": "none", + "--full_determinism": None, "--per_device_train_batch_size": "2", "--per_device_eval_batch_size": "2", "--max_train_samples": "16",