From cf8f61b0b22f97330c3e3e3f85a63fb0f69bf2ab Mon Sep 17 00:00:00 2001
From: Theodore Ehrenborg <theodore.ehrenborg@gmail.com>
Date: Tue, 30 Jan 2024 23:13:50 +0000
Subject: [PATCH] Typos

---
 sparse_autoencoder/autoencoder/types.py             | 2 +-
 sparse_autoencoder/loss/decoded_activations_l2.py   | 2 +-
 sparse_autoencoder/source_data/text_dataset.py      | 2 +-
 sparse_autoencoder/train/utils/wandb_sweep_types.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sparse_autoencoder/autoencoder/types.py b/sparse_autoencoder/autoencoder/types.py
index a0b32fdc..ed76f2b7 100644
--- a/sparse_autoencoder/autoencoder/types.py
+++ b/sparse_autoencoder/autoencoder/types.py
@@ -8,7 +8,7 @@ class ResetOptimizerParameterDetails(NamedTuple):
     """Reset Optimizer Parameter Details.
 
     Details of a parameter that should be reset in the optimizer, when resetting
-    it's corresponding dictionary vectors.
+    its corresponding dictionary vectors.
     """
 
     parameter: Parameter
diff --git a/sparse_autoencoder/loss/decoded_activations_l2.py b/sparse_autoencoder/loss/decoded_activations_l2.py
index 2109dc3b..5ef61876 100644
--- a/sparse_autoencoder/loss/decoded_activations_l2.py
+++ b/sparse_autoencoder/loss/decoded_activations_l2.py
@@ -14,7 +14,7 @@ class L2ReconstructionLoss(AbstractLoss):
     """L2 Reconstruction loss.
 
     L2 reconstruction loss is calculated as the sum squared error between each each input vector
-    and it's corresponding decoded vector. The original paper found that models trained with some
+    and its corresponding decoded vector. The original paper found that models trained with some
     loss functions such as cross-entropy loss generally prefer to represent features
     polysemantically, whereas models trained with L2 may achieve the same loss for both
     polysemantic and monosemantic representations of true features.
diff --git a/sparse_autoencoder/source_data/text_dataset.py b/sparse_autoencoder/source_data/text_dataset.py
index 2d12f043..15f739a3 100644
--- a/sparse_autoencoder/source_data/text_dataset.py
+++ b/sparse_autoencoder/source_data/text_dataset.py
@@ -151,7 +151,7 @@ def push_to_hugging_face_hub(
                 is set.
             n_shards: Number of shards to split the dataset into. A high number is recommended
                 here to allow for flexible distributed training of SAEs across nodes (where e.g.
-                each node fetches it's own shard).
+                each node fetches its own shard).
             revision: Branch to push to.
             private: Whether to save the dataset privately.
 
diff --git a/sparse_autoencoder/train/utils/wandb_sweep_types.py b/sparse_autoencoder/train/utils/wandb_sweep_types.py
index a8530041..ebbb0e56 100644
--- a/sparse_autoencoder/train/utils/wandb_sweep_types.py
+++ b/sparse_autoencoder/train/utils/wandb_sweep_types.py
@@ -341,7 +341,7 @@ def __repr__(self) -> str:
 
 
 @dataclass(frozen=True)
-class NestedParameter(ABC):  # noqa: B024 (abstract so that we can check against it's type)
+class NestedParameter(ABC):  # noqa: B024 (abstract so that we can check against its type)
     """Nested Parameter.
 
     Example: