From cf8f61b0b22f97330c3e3e3f85a63fb0f69bf2ab Mon Sep 17 00:00:00 2001 From: Theodore Ehrenborg Date: Tue, 30 Jan 2024 23:13:50 +0000 Subject: [PATCH] Typos --- sparse_autoencoder/autoencoder/types.py | 2 +- sparse_autoencoder/loss/decoded_activations_l2.py | 2 +- sparse_autoencoder/source_data/text_dataset.py | 2 +- sparse_autoencoder/train/utils/wandb_sweep_types.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sparse_autoencoder/autoencoder/types.py b/sparse_autoencoder/autoencoder/types.py index a0b32fdc..ed76f2b7 100644 --- a/sparse_autoencoder/autoencoder/types.py +++ b/sparse_autoencoder/autoencoder/types.py @@ -8,7 +8,7 @@ class ResetOptimizerParameterDetails(NamedTuple): """Reset Optimizer Parameter Details. Details of a parameter that should be reset in the optimizer, when resetting - it's corresponding dictionary vectors. + its corresponding dictionary vectors. """ parameter: Parameter diff --git a/sparse_autoencoder/loss/decoded_activations_l2.py b/sparse_autoencoder/loss/decoded_activations_l2.py index 2109dc3b..5ef61876 100644 --- a/sparse_autoencoder/loss/decoded_activations_l2.py +++ b/sparse_autoencoder/loss/decoded_activations_l2.py @@ -14,7 +14,7 @@ class L2ReconstructionLoss(AbstractLoss): """L2 Reconstruction loss. L2 reconstruction loss is calculated as the sum squared error between each each input vector - and it's corresponding decoded vector. The original paper found that models trained with some + and its corresponding decoded vector. The original paper found that models trained with some loss functions such as cross-entropy loss generally prefer to represent features polysemantically, whereas models trained with L2 may achieve the same loss for both polysemantic and monosemantic representations of true features. diff --git a/sparse_autoencoder/source_data/text_dataset.py b/sparse_autoencoder/source_data/text_dataset.py index 2d12f043..15f739a3 100644 --- a/sparse_autoencoder/source_data/text_dataset.py +++ b/sparse_autoencoder/source_data/text_dataset.py @@ -151,7 +151,7 @@ def push_to_hugging_face_hub( is set. n_shards: Number of shards to split the dataset into. A high number is recommended here to allow for flexible distributed training of SAEs across nodes (where e.g. - each node fetches it's own shard). + each node fetches its own shard). revision: Branch to push to. private: Whether to save the dataset privately. diff --git a/sparse_autoencoder/train/utils/wandb_sweep_types.py b/sparse_autoencoder/train/utils/wandb_sweep_types.py index a8530041..ebbb0e56 100644 --- a/sparse_autoencoder/train/utils/wandb_sweep_types.py +++ b/sparse_autoencoder/train/utils/wandb_sweep_types.py @@ -341,7 +341,7 @@ def __repr__(self) -> str: @dataclass(frozen=True) -class NestedParameter(ABC): # noqa: B024 (abstract so that we can check against it's type) +class NestedParameter(ABC): # noqa: B024 (abstract so that we can check against its type) """Nested Parameter. Example: