inseq-team · gsarti · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,17 +18,10 @@ repos:
       - id: end-of-file-fixer
         exclude: LICENSE
 
-  - repo: local
-    hooks:
-      - id: black
-        name: black
-        entry: poetry run black --config pyproject.toml
-        types: [python]
-        language: system
-
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: 'v0.0.267'
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.2
     hooks:
+      - id: ruff-format
       - id: ruff
 
   - repo: local

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -24,7 +24,7 @@ make lint
 
 ### Checks
 
-Many checks are configured for this project. Command `make check-style` will check black and isort.
+Many checks are configured for this project. Command `make check-style` will check style with `ruff`.
 The `make check-safety` command will look at the security of your code.
 
 Comand `make lint` applies all checks.

diff --git a/Makefile b/Makefile
@@ -78,15 +78,15 @@ update-deps:
 #* Linting
 .PHONY: check-style
 check-style:
-	poetry run black --diff --check --config pyproject.toml ./
-	poetry run ruff  --no-fix --config pyproject.toml ./
+	poetry run ruff format --check --config pyproject.toml ./
+	poetry run ruff check --no-fix --config pyproject.toml ./
 #   poetry run darglint --verbosity 2 inseq tests
 #	poetry run mypy --config-file pyproject.toml ./
 
 .PHONY: fix-style
 fix-style:
-	poetry run black --config pyproject.toml ./
-	poetry run ruff --config pyproject.toml ./
+	poetry run ruff format --config pyproject.toml ./
+	poetry run ruff check --config pyproject.toml ./
 
 .PHONY: check-safety
 check-safety:

diff --git a/examples/inseq_tutorial.ipynb b/examples/inseq_tutorial.ipynb
@@ -170,7 +170,7 @@
    "source": [
     "import inseq\n",
     "\n",
-    "# Load the model Helsinki-NLP/opus-mt-en-fr (6-layer encoder-decoder transformer) from the \n",
+    "# Load the model Helsinki-NLP/opus-mt-en-fr (6-layer encoder-decoder transformer) from the\n",
     "# Huggingface Hub and hook it with the Input X Gradient feature attribution method\n",
     "model = inseq.load_model(\"Helsinki-NLP/opus-mt-en-it\", \"input_x_gradient\")\n",
     "\n",
@@ -180,7 +180,7 @@
     "out = model.attribute(\n",
     "    input_texts=\"Hello everyone, hope you're enjoying the tutorial!\",\n",
     "    attribute_target=True,\n",
-    "    step_scores=[\"probability\"]\n",
+    "    step_scores=[\"probability\"],\n",
     ")\n",
     "# Visualize the attributions and step scores\n",
     "out.show()"
@@ -349,9 +349,7 @@
    ],
    "source": [
     "out = model.attribute(\n",
-    "    input_texts=\"Hello everyone, hope you're enjoying the tutorial!\",\n",
-    "    attribute_target=True,\n",
-    "    method=\"attention\"\n",
+    "    input_texts=\"Hello everyone, hope you're enjoying the tutorial!\", attribute_target=True, method=\"attention\"\n",
     ")\n",
     "# out[0] is a shortcut for out.sequence_attributions[0]\n",
     "out[0].source_attributions.shape"
@@ -535,10 +533,10 @@
    ],
    "source": [
     "# Gets the mean weights of the first three attention heads only, no normalization\n",
-    "# do_post_aggregation_checks=False is needed since the output has >2 dimensions and \n",
+    "# do_post_aggregation_checks=False is needed since the output has >2 dimensions and\n",
     "# could not be visualized\n",
     "aggregated_heads_seq_attr_out = out[0].aggregate(\n",
-    "    \"mean\", select_idx=(0,3), normalize=False, do_post_aggregation_checks=False\n",
+    "    \"mean\", select_idx=(0, 3), normalize=False, do_post_aggregation_checks=False\n",
     ")\n",
     "\n",
     "# (source_len, target_len, num_layers)\n",
@@ -726,7 +724,7 @@
     "    \"Domanda: Quanti studenti hanno partecipato alle LCL nel 2023?\"\n",
     ")\n",
     "\n",
-    "qa_model =  inseq.load_model(\"it5/it5-base-question-answering\", \"input_x_gradient\")\n",
+    "qa_model = inseq.load_model(\"it5/it5-base-question-answering\", \"input_x_gradient\")\n",
     "out = qa_model.attribute(question, attribute_target=True, step_scores=[\"probability\"])\n",
     "\n",
     "# Aggregate only source tokens, leave target tokens as they are\n",
@@ -1097,7 +1095,7 @@
     "    contrast_targets=\"Ho salutato la manager\",\n",
     "    attribute_target=True,\n",
     "    # We also visualize the score used as target using the same function as step score\n",
-    "    step_scores=[\"contrast_prob_diff\"]\n",
+    "    step_scores=[\"contrast_prob_diff\"],\n",
     ")\n",
     "\n",
     "# Weight attribution scores by the difference in probabilities\n",
@@ -1212,10 +1210,18 @@
     ")\n",
     "\n",
     "source_without_context = \"Do you already know when you'll be back?\"\n",
-    "source_with_context = \"Thank you for your help, my friend, you really saved my life. Do you already know when you'll be back?\"\n",
+    "source_with_context = (\n",
+    "    \"Thank you for your help, my friend, you really saved my life. Do you already know when you'll be back?\"\n",
+    ")\n",
     "\n",
-    "print(\"Generation without context:\", model.generate(source_without_context, forced_bos_token_id=model.tokenizer.lang_code_to_id[\"it_IT\"]))\n",
-    "print(\"Generation with context:\", model.generate(source_with_context, forced_bos_token_id=model.tokenizer.lang_code_to_id[\"it_IT\"]))\n",
+    "print(\n",
+    "    \"Generation without context:\",\n",
+    "    model.generate(source_without_context, forced_bos_token_id=model.tokenizer.lang_code_to_id[\"it_IT\"]),\n",
+    ")\n",
+    "print(\n",
+    "    \"Generation with context:\",\n",
+    "    model.generate(source_with_context, forced_bos_token_id=model.tokenizer.lang_code_to_id[\"it_IT\"]),\n",
+    ")\n",
     "\n",
     "out = model.attribute(\n",
     "    source_without_context,\n",
@@ -1224,7 +1230,7 @@
     "    contrast_targets=\"Grazie per il tuo aiuto, mi hai davvero salvato la vita. Sai già quando tornerai?\",\n",
     "    attribute_target=True,\n",
     "    # We also visualize the score used as target using the same function as step score\n",
-    "    step_scores=[\"pcxmi\", \"probability\"]\n",
+    "    step_scores=[\"pcxmi\", \"probability\"],\n",
     ")\n",
     "\n",
     "out.show()"
@@ -1336,8 +1342,8 @@
    ],
    "source": [
     "# Print tokens to get token indices\n",
-    "print([(i, x) for i, x in enumerate(model.encode(mt_target, as_targets=True).input_tokens[0])])\n",
-    "print([(i, x) for i, x in enumerate(model.encode(pe_target, as_targets=True).input_tokens[0])])"
+    "print(list(enumerate(model.encode(mt_target, as_targets=True).input_tokens[0])))\n",
+    "print(list(enumerate(model.encode(pe_target, as_targets=True).input_tokens[0])))"
    ]
   },
   {
@@ -1394,7 +1400,7 @@
     "    attributed_fn=\"contrast_prob_diff\",\n",
     "    step_scores=[\"contrast_prob_diff\"],\n",
     "    contrast_targets=pe_target,\n",
-    "    contrast_targets_alignments=[(0,0), (1,1), (2,2), (3,4), (4,4), (5,5), (6,7), (7,9)],\n",
+    "    contrast_targets_alignments=[(0, 0), (1, 1), (2, 2), (3, 4), (4, 4), (5, 5), (6, 7), (7, 9)],\n",
     ")\n",
     "\n",
     "# Reasonable alignments\n",
@@ -1504,9 +1510,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import inseq\n",
     "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
     "\n",
+    "import inseq\n",
+    "\n",
     "# The model is loaded in 8-bit on available GPUs using the bitsandbytes library integrated in HF Transformers\n",
     "# This will make the model much smaller for inference purposes, but attributions are not guaranteed to match those\n",
     "# of the full-precision model.\n",
@@ -1930,15 +1937,16 @@
     }
    ],
    "source": [
-    "from inseq import FeatureAttributionOutput\n",
     "import pandas as pd\n",
     "\n",
+    "from inseq import FeatureAttributionOutput\n",
+    "\n",
     "scores = {}\n",
     "\n",
     "for layer_idx in range(48):\n",
     "    curr_out = FeatureAttributionOutput.load(f\"../data/cat_outputs/layer_{layer_idx}.json\")\n",
     "    out_dict = curr_out.get_scores_dicts(do_aggregation=False)[0]\n",
-    "    scores[layer_idx] = [score for score in out_dict[\"target_attributions\"][\"ĠParis\"].values()][:-1]\n",
+    "    scores[layer_idx] = list(out_dict[\"target_attributions\"][\"ĠParis\"].values())[:-1]\n",
     "\n",
     "prefix_tokens = list(out_dict[\"target_attributions\"][\"ĠParis\"].keys())\n",
     "attributions_df = pd.DataFrame(scores, index=prefix_tokens[:-1])\n",
@@ -1989,7 +1997,7 @@
     "ax.set_xticks([0.5 + i for i in range(0, attributions_df.values.shape[1], 4)])\n",
     "ax.set_xticklabels(list(range(0, 48, 4)))\n",
     "ax.set_yticklabels(attributions_df.index)\n",
-    "cb = plt.colorbar(h, ticks=[0, .15, .3, .45, .6, .75])\n",
+    "cb = plt.colorbar(h, ticks=[0, 0.15, 0.3, 0.45, 0.6, 0.75])\n",
     "fig.suptitle(\"What activations are contributing to predicting 'Paris' over 'Rome'?\")\n",
     "plt.savefig(filename)\n",
     "plt.show()"

diff --git a/inseq/attr/__init__.py b/inseq/attr/__init__.py
@@ -2,7 +2,6 @@
 from .step_functions import (
     STEP_SCORES_MAP,
     StepFunctionArgs,
-    StepFunctionEncoderDecoderArgs,
     list_step_functions,
     register_step_function,
 )
@@ -15,5 +14,4 @@
     "STEP_SCORES_MAP",
     "extract_args",
     "StepFunctionArgs",
-    "StepFunctionEncoderDecoderArgs",
 ]
diff --git a/inseq/attr/feat/attribution_utils.py b/inseq/attr/feat/attribution_utils.py
@@ -19,7 +19,6 @@
     from ...models import AttributionModel
     from .feature_attribution import FeatureAttribution
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -43,7 +42,7 @@ def rescale_attributions_to_tokens(
     attributions: OneOrMoreAttributionSequences, tokens: OneOrMoreTokenSequences
 ) -> OneOrMoreAttributionSequences:
     return [
-        attr[: len(tokens)] if not all([math.isnan(x) for x in attr]) else []
+        attr[: len(tokens)] if not all(math.isnan(x) for x in attr) else []
         for attr, tokens in zip(attributions, tokens)
     ]
 
@@ -154,8 +153,7 @@ def get_source_target_attributions(
             return attr[0], None
         else:
             return attr, None
+    elif isinstance(attr, tuple):
+        return None, attr[0]
     else:
-        if isinstance(attr, tuple):
-            return None, attr[0]
-        else:
-            return None, attr
+        return None, attr
diff --git a/inseq/attr/feat/internals_attribution.py b/inseq/attr/feat/internals_attribution.py
@@ -29,6 +29,7 @@
 
 class InternalsAttributionRegistry(FeatureAttribution, Registry):
     r"""Model Internals-based attribution method registry."""
+
     pass
 
 

diff --git a/inseq/attr/feat/ops/lime.py b/inseq/attr/feat/ops/lime.py
@@ -1,25 +1,21 @@
 import inspect
+import logging
 import math
-import warnings
 from functools import partial
 from typing import Any, Callable, Optional, cast
 
 import torch
-from captum._utils.common import (
-    _expand_additional_forward_args,
-    _expand_target,
-)
+from captum._utils.common import _expand_additional_forward_args, _expand_target
 from captum._utils.models.linear_model import SkLearnLinearModel
 from captum._utils.models.model import Model
 from captum._utils.progress import progress
-from captum._utils.typing import (
-    TargetType,
-    TensorOrTupleOfTensorsGeneric,
-)
+from captum._utils.typing import TargetType, TensorOrTupleOfTensorsGeneric
 from captum.attr import LimeBase
 from torch import Tensor
 from torch.utils.data import DataLoader, TensorDataset
 
+logger = logging.getLogger(__name__)
+
 
 class Lime(LimeBase):
     def __init__(
@@ -135,7 +131,7 @@ def attribute(
                     try:
                         curr_sample = next(perturb_generator)
                     except StopIteration:
-                        warnings.warn("Generator completed prior to given n_samples iterations!")
+                        logger.warning("Generator completed prior to given n_samples iterations!")
                         break
                 else:
                     curr_sample = self.perturb_func(inputs, **kwargs)

diff --git a/inseq/attr/feat/ops/sequential_integrated_gradients.py b/inseq/attr/feat/ops/sequential_integrated_gradients.py
@@ -151,7 +151,10 @@ def attribute(  # type: ignore
         method: str = "gausslegendre",
         internal_batch_size: Union[None, int] = None,
         return_convergence_delta: bool = False,
-    ) -> Union[TensorOrTupleOfTensorsGeneric, Tuple[TensorOrTupleOfTensorsGeneric, Tensor],]:
+    ) -> Union[
+        TensorOrTupleOfTensorsGeneric,
+        Tuple[TensorOrTupleOfTensorsGeneric, Tensor],
+    ]:
         r"""
         This method attributes the output of the model with given target index
         (in case it is provided, otherwise it assumes that output is a

diff --git a/inseq/data/data_utils.py b/inseq/data/data_utils.py
@@ -114,7 +114,7 @@ def _eq(self_attr: TensorClass, other_attr: TensorClass) -> bool:
             if isinstance(self_attr, torch.Tensor):
                 return torch.allclose(self_attr, other_attr, equal_nan=True)
             elif isinstance(self_attr, dict):
-                return all([TensorWrapper._eq(self_attr[k], other_attr[k]) for k in self_attr.keys()])
+                return all(TensorWrapper._eq(self_attr[k], other_attr[k]) for k in self_attr.keys())
             else:
                 return self_attr == other_attr
         except:  # noqa: E722

diff --git a/inseq/models/attribution_model.py b/inseq/models/attribution_model.py
@@ -5,7 +5,7 @@
 
 import torch
 
-from ..attr import STEP_SCORES_MAP, StepFunctionArgs, StepFunctionEncoderDecoderArgs
+from ..attr import STEP_SCORES_MAP, StepFunctionArgs
 from ..attr.feat import FeatureAttribution, extract_args, join_token_ids
 from ..data import (
     BatchEncoding,
@@ -144,8 +144,9 @@ def get_text_sequences(
         raise NotImplementedError()
 
     @staticmethod
+    @abstractmethod
     def get_step_function_reserved_args() -> List[str]:
-        return [f.name for f in StepFunctionEncoderDecoderArgs.__dataclass_fields__.values()]
+        raise NotImplementedError()
 
     @staticmethod
     def format_contrast_targets_alignments(

diff --git a/inseq/utils/alignment_utils.py b/inseq/utils/alignment_utils.py
@@ -107,7 +107,7 @@ def compute_word_aligns(
     align_words = set()
     for i, j in align_subwords:
         align_words.add((sub2word_map_src[i], sub2word_map_tgt[j]))
-    word_alignments = [(a_idx, b_idx) for a_idx, b_idx in sorted(align_words, key=lambda x: (x[0], x[1]))]
+    word_alignments = sorted(align_words, key=lambda x: (x[0], x[1]))
     return AlignedSequences(
         source_tokens=src.copy(),
         target_tokens=tgt.copy(),
@@ -246,7 +246,7 @@ def auto_align_sequences(
                     rm_b_idx = removed_b_token_idxs[removed_b_tokens.index(rm_a)]
                     aligned_special_tokens.append((rm_a_idx, rm_b_idx))
         else:
-            aligned_special_tokens = [(rm_a, rm_b) for rm_a, rm_b in zip(removed_a_token_idxs, removed_b_token_idxs)]
+            aligned_special_tokens = list(zip(removed_a_token_idxs, removed_b_token_idxs))
         a_word_to_token_align = align_tokenizations(a_words, clean_a_tokens)
         b_word_to_token_align = align_tokenizations(b_words, clean_b_tokens)
         # 3. Propagate word-level alignments to token-level alignments.