Skip to content

Commit

Permalink
Tighten up generate_with_config
Browse files Browse the repository at this point in the history
What?
=====

This improvement is mostly minor modifications around how the code was
structured. It removes some intermediate steps, resulting in slightly
more terse code that works well as a pipeline.

The one somewhat fancy thing it does do is apply a pattern documented
in https://abramov.io/rust-dropping-things-in-another-thread to manage
Aho-Corasick structure.
  • Loading branch information
joshuaclayton committed May 25, 2020
1 parent d3dd186 commit b1495f3
Showing 1 changed file with 16 additions and 23 deletions.
39 changes: 16 additions & 23 deletions crates/token_search/src/token_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,17 +147,14 @@ impl TokenSearchResults {
pub fn generate_with_config(config: &TokenSearchConfig) -> Self {
let loaded_files = Self::load_all_files(&config.files);

let filtered_results = config
let filtered_results: Vec<Token> = config
.tokens
.clone()
.into_iter()
.filter(|t| config.filter_token(t))
.filter(|t| config.filter_language(t));

let tokens: Vec<String> = filtered_results
.clone()
.map(|r| r.token.to_string())
.filter(|t| config.filter_token(t) && config.filter_language(t))
.collect();

let tokens: Vec<_> = filtered_results.iter().map(|r| &r.token).collect();
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostLongest)
.build(tokens);
Expand All @@ -166,13 +163,9 @@ impl TokenSearchResults {
.par_iter()
.progress_with(config.toggleable_progress_bar(&"🤔 Working...", loaded_files.len()))
.fold(HashMap::new, |mut results, (f, contents)| {
let mut matches: Vec<usize> = vec![];

for mat in ac.find_iter(contents) {
matches.push(mat.pattern());
}

for (key, res) in matches
for (key, res) in ac
.find_iter(contents)
.map(|v| v.pattern())
.into_iter()
.sorted_by_key(|&v| v)
.group_by(|&v| v)
Expand All @@ -197,15 +190,15 @@ impl TokenSearchResults {
})
});

let lookup: Vec<Token> = filtered_results.collect();
let final_results = res.iter().fold(Vec::new(), |mut acc, (idx, occurrences)| {
let token = lookup[*idx].clone();
acc.push(TokenSearchResult {
token,
occurrences: occurrences.clone(),
});
acc
});
let final_results = res
.into_iter()
.map(|(idx, occurrences)| TokenSearchResult {
token: filtered_results[idx].clone(),
occurrences,
})
.collect();

std::thread::spawn(move || drop(ac));

TokenSearchResults(final_results)
}
Expand Down

0 comments on commit b1495f3

Please sign in to comment.