Skip to content

Commit

Permalink
revert and simplify
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Jun 28, 2024
1 parent ed3428a commit f53e514
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 19 deletions.
5 changes: 1 addition & 4 deletions tokenizers/src/tokenizer/added_vocabulary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,17 +216,14 @@ impl AddedVocabulary {
}

/// Get the token matching the given id if it exists
#[deprecated(
since = "0.19.0",
note = "please use `added_vocabulary.simple_id_to_token(id).or_else(|| model.id_to_token(id)` instead"
)]
pub fn id_to_token(&self, id: u32, model: &impl Model) -> Option<String> {
self.added_tokens_map_r
.get(&id)
.map(|t| t.content.clone())
.or_else(|| model.id_to_token(id))
}

//
pub fn simple_id_to_token(&self, id: u32) -> Option<String> {
self.added_tokens_map_r.get(&id).map(|t| t.content.clone())
}
Expand Down
19 changes: 4 additions & 15 deletions tokenizers/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -851,24 +851,13 @@ where
.iter()
.filter_map(|id| {
self.added_vocabulary
.simple_id_to_token(*id)
.and_then(|token| {
if skip_special_tokens && self.added_vocabulary.is_special_token(&token) {
None
} else if let Some(pre_tok) = &self.pre_tokenizer {
let mut string = PreTokenizedString::from(token);
pre_tok.pre_tokenize(&mut string);
println!("Pre-tok String: {}", string.original);
Some(string.original)
} else {
println!("String: {}", token);
Some(token)
}
.id_to_token(*id, &self.model)
.filter(|token| {
!skip_special_tokens || !self.added_vocabulary.is_special_token(token)
})
.or_else(|| self.model.id_to_token(*id))
})
.collect::<Vec<_>>();
println!("This should print: {:?}", tokens);

if let Some(decoder) = &self.decoder {
decoder.decode(tokens)
} else {
Expand Down

0 comments on commit f53e514

Please sign in to comment.