From 5823fb4d4afa7daaa2216a78f644db0c6939b652 Mon Sep 17 00:00:00 2001
From: Atreya Shankar <35427332+atreyasha@users.noreply.github.com>
Date: Tue, 8 Nov 2022 19:55:01 +0100
Subject: [PATCH 1/2] Add explicit JSON indent to all prediction dumps

---
 src/reading_comprehension.py          | 2 +-
 src/sequence_classification.py        | 2 +-
 tests/test_reading_comprehension.py   | 1 +
 tests/test_sequence_classification.py | 3 +++
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/reading_comprehension.py b/src/reading_comprehension.py
index c3d6a71a..70f82d43 100644
--- a/src/reading_comprehension.py
+++ b/src/reading_comprehension.py
@@ -623,4 +623,4 @@ def _run_train_loop(self) -> None:
                     os.path.join(self.train_args.output_dir, "predictions.json"),
                     "w",
                 ) as output_file_stream:
-                    json.dump(results.predictions, output_file_stream)
+                    json.dump(results.predictions, output_file_stream, indent=4)
diff --git a/src/sequence_classification.py b/src/sequence_classification.py
index a0d9bd8c..557aa64c 100644
--- a/src/sequence_classification.py
+++ b/src/sequence_classification.py
@@ -400,4 +400,4 @@ def _run_train_loop(self) -> None:
                 with open(
                     os.path.join(self.train_args.output_dir, "predictions.json"), "w"
                 ) as output_file_stream:
-                    json.dump(prediction_dump, output_file_stream)
+                    json.dump(prediction_dump, output_file_stream, indent=4)
diff --git a/tests/test_reading_comprehension.py b/tests/test_reading_comprehension.py
index 4d766bcb..cedca76d 100644
--- a/tests/test_reading_comprehension.py
+++ b/tests/test_reading_comprehension.py
@@ -1301,6 +1301,7 @@ def test__run_train_loop(
                     },
                 ],
                 mocker.ANY,
+                indent=4,
             )
         else:
             json_open_dump.open.assert_not_called()
diff --git a/tests/test_sequence_classification.py b/tests/test_sequence_classification.py
index 70764c96..b3bcce22 100644
--- a/tests/test_sequence_classification.py
+++ b/tests/test_sequence_classification.py
@@ -938,6 +938,7 @@ def test__run_train_loop(
                         },
                     ],
                     mocker.ANY,
+                    indent=4,
                 )
             elif task == "opp_115":
                 json_open_dump.json_dump.assert_called_once_with(
@@ -962,6 +963,7 @@ def test__run_train_loop(
                         },
                     ],
                     mocker.ANY,
+                    indent=4,
                 )
             else:
                 json_open_dump.json_dump.assert_called_once_with(
@@ -986,6 +988,7 @@ def test__run_train_loop(
                         },
                     ],
                     mocker.ANY,
+                    indent=4,
                 )
         else:
             json_open_dump.open.assert_not_called()

From 1fbf0686934b5abe7b63c98922baab2549546cef Mon Sep 17 00:00:00 2001
From: Atreya Shankar <35427332+atreyasha@users.noreply.github.com>
Date: Tue, 8 Nov 2022 20:23:55 +0100
Subject: [PATCH 2/2] Enable `--full_determinism` in our entrypoint

Update integration tests as well
---
 scripts/run_privacy_glue.sh   | 1 +
 tests/test_reproducibility.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scripts/run_privacy_glue.sh b/scripts/run_privacy_glue.sh
index 2653efe9..4c138d14 100644
--- a/scripts/run_privacy_glue.sh
+++ b/scripts/run_privacy_glue.sh
@@ -170,6 +170,7 @@ main() {
     --warmup_ratio 0.1 \
     --early_stopping_patience 5 \
     --report_to "$WANDB" \
+    --full_determinism \
     --per_device_train_batch_size "$DEVICE_BATCH_SIZE" \
     --per_device_eval_batch_size "$DEVICE_BATCH_SIZE" \
     "${PREPROCESSING_NUM_WORKERS[@]}" \
diff --git a/tests/test_reproducibility.py b/tests/test_reproducibility.py
index e81555b7..f2a82259 100644
--- a/tests/test_reproducibility.py
+++ b/tests/test_reproducibility.py
@@ -88,6 +88,7 @@ def get_cli_arguments(
             "--learning_rate": "3e-5",
             "--warmup_ratio": "0.1",
             "--report_to": "none",
+            "--full_determinism": None,
             "--per_device_train_batch_size": "2",
             "--per_device_eval_batch_size": "2",
             "--max_train_samples": "16",