From a1bb88169a6da996ac0249f2bea3c0a7eba68528 Mon Sep 17 00:00:00 2001 From: Gabriele Sarti Date: Thu, 19 Oct 2023 08:13:06 +0200 Subject: [PATCH] Format fixes, add Attanasio et al. (2023) to readme --- README.md | 1 + inseq/models/attribution_model.py | 9 ++++----- inseq/utils/alignment_utils.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 92c8a5e8..35650658 100644 --- a/README.md +++ b/README.md @@ -268,6 +268,7 @@ Inseq has been used in various research projects. A list of known publications t
  • Are Character-level Translations Worth the Wait? Comparing Character- and Subword-level Models for Machine Translation (Edman et al., 2023)
  • Response Generation in Longitudinal Dialogues: Which Knowledge Representation Helps? (Mousavi et al., 2023)
  • Quantifying the Plausibility of Context Reliance in Neural Machine Translation (Sarti et al., 2023)
  • +
  • A Tale of Pronouns: Interpretability Informs Gender Bias Mitigation for Fairer Instruction-Tuned Machine Translation (Attanasio et al., 2023)
  • diff --git a/inseq/models/attribution_model.py b/inseq/models/attribution_model.py index 0d0e4e70..f1015774 100644 --- a/inseq/models/attribution_model.py +++ b/inseq/models/attribution_model.py @@ -404,11 +404,10 @@ def attribute( generated_texts = self.generate( encoded_input, return_generation_output=False, batch_size=batch_size, **generation_args ) - else: - if generation_args: - logger.warning( - f"Generation arguments {generation_args} are provided, but will be ignored (constrained decoding)." - ) + elif generation_args: + logger.warning( + f"Generation arguments {generation_args} are provided, but will be ignored (constrained decoding)." + ) logger.debug(f"reference_texts={generated_texts}") attribution_method = self.get_attribution_method(method, override_default_attribution) attributed_fn = self.get_attributed_fn(attributed_fn) diff --git a/inseq/utils/alignment_utils.py b/inseq/utils/alignment_utils.py index 8000dc69..8cf001b2 100644 --- a/inseq/utils/alignment_utils.py +++ b/inseq/utils/alignment_utils.py @@ -74,10 +74,10 @@ def _get_aligner_subword_aligns( ) -> torch.Tensor: aligner = get_aligner_model() tokenizer = get_aligner_tokenizer() - tok_aenized = [tokenizer.tokenize(word) for word in src] - tok_benized = [tokenizer.tokenize(word) for word in tgt] - ids_src, sub2word_map_src = _preprocess_sequence_for_alignment(tok_aenized) - ids_tgt, sub2word_map_tgt = _preprocess_sequence_for_alignment(tok_benized) + tokenized_src = [tokenizer.tokenize(word) for word in src] + tokenized_tgt = [tokenizer.tokenize(word) for word in tgt] + ids_src, sub2word_map_src = _preprocess_sequence_for_alignment(tokenized_src) + ids_tgt, sub2word_map_tgt = _preprocess_sequence_for_alignment(tokenized_tgt) aligner.eval() with torch.no_grad(): out_src = aligner(ids_src.unsqueeze(0), output_hidden_states=True)[2][align_layer][0, 1:-1]