Skip to content

Commit

Permalink
perf(traverse): generate_uid cache available binding names
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Sep 8, 2024
1 parent 4e748b5 commit 044958c
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 84 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/oxc_traverse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ oxc_syntax = { workspace = true }

compact_str = { workspace = true }
memoffset = { workspace = true }
rustc-hash = { workspace = true }
229 changes: 145 additions & 84 deletions crates/oxc_traverse/src/context/scoping.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use std::{cell::Cell, str};
use std::{cell::Cell, collections::hash_map::Entry, str};

use compact_str::{format_compact, CompactString};
use rustc_hash::FxHashMap;

#[allow(clippy::wildcard_imports)]
use oxc_ast::{ast::*, visit::Visit};
use oxc_semantic::{AstNodeId, Reference, ScopeTree, SymbolTable};
use oxc_span::{Atom, CompactStr, Span, SPAN};
use oxc_span::{format_compact_str, Atom, CompactStr, Span, SPAN};
use oxc_syntax::{
reference::{ReferenceFlags, ReferenceId},
scope::{ScopeFlags, ScopeId},
Expand All @@ -23,6 +25,7 @@ use crate::scopes_collector::ChildScopeCollector;
pub struct TraverseScoping {
scopes: ScopeTree,
symbols: SymbolTable,
uid_names: Option<FxHashMap<CompactStr, Vec<CompactStr>>>,
current_scope_id: ScopeId,
}

Expand Down Expand Up @@ -174,20 +177,18 @@ impl TraverseScoping {
///
/// # Potential improvements
///
/// TODO(improve-on-babel)
/// TODO(improve-on-babel):
///
/// This function is fairly expensive, because it aims to replicate Babel's output.
/// `name_is_unique` method below searches through every single binding in the entire program
/// and does a string comparison on each.
/// If the first name tried is already in use, it will repeat that entire search with a new name,
/// potentially multiple times.
///
/// We could improve this in one of 2 ways:
/// `init_uid_names` iterates through every single binding and unresolved reference in the entire AST,
/// and builds a hashmap of `Vec`s of symbols which could clash with UIDs.
/// Once that's built, it's cached, but still `find_uid_name` has to do a hashmap lookup, and if the
/// base name is in the hash table, it has to iterate through all the existing symbols for that base.
///
/// We could improve this in one of 3 ways:
///
/// 1. Semantic generate a hash set of all identifier names used in program.
/// Check for uniqueness would then be just 1 x hashset lookup for each name that's tried.
/// This would maintain output parity with Babel.
/// But building the hash set would add some overhead to semantic.
/// 1. Build the hash table in `SemanticBuilder` instead of iterating through all symbols again here.
///
/// 2. Use a much simpler method:
///
Expand All @@ -201,11 +202,21 @@ impl TraverseScoping {
///
/// Minimal cost in semantic, and generating UIDs extremely cheap.
///
/// This is a slightly different method from Babel, but hopefully close enough that output will
/// match Babel for most (or maybe all) test cases.
/// This is a slightly different method from Babel, and unfortunately does produce UID names
/// which differ from Babel for some of its test cases.
///
/// 3. If output is being minified anyway, use an even cheaper method which produces less debuggable
/// output, but is even simpler:
///
/// * During initial semantic pass, check for any existing identifiers starting with `_`.
/// * Find the highest number of leading `_`s for any existing symbol.
/// * Generate UIDs with a counter starting at 0, prefixed with number of `_`s one greater than
/// what was found in AST.
/// i.e. if source contains identifiers `_foo` and `__bar`, create UIDs names `___0`, `___1`,
/// `___2` etc. They'll all be unique within the program.
pub fn generate_uid(&mut self, name: &str, scope_id: ScopeId, flags: SymbolFlags) -> SymbolId {
// Get name for UID
let name = CompactStr::new(&self.find_uid_name(name));
let name = self.find_uid_name(name);

// Add binding to scope
let symbol_id =
Expand Down Expand Up @@ -428,6 +439,7 @@ impl TraverseScoping {
Self {
scopes,
symbols,
uid_names: None,
// Dummy value. Immediately overwritten in `walk_program`.
current_scope_id: ScopeId::new(0),
}
Expand All @@ -440,82 +452,70 @@ impl TraverseScoping {
}

/// Find a variable name which can be used as a UID
fn find_uid_name(&self, name: &str) -> CompactString {
let mut name = create_uid_name_base(name);

// Try the name without a numerical postfix (i.e. plain `_temp`)
if self.name_is_unique(&name) {
return name;
}

// It's fairly common that UIDs may need a numerical postfix, so we try to keep string
// operations to a minimum for postfixes up to 99 - using `replace_range` on a single
// `CompactStr`, rather than generating a new string on each attempt.
// Postfixes greater than 99 should be very uncommon, so don't bother optimizing.

// Try single-digit postfixes (i.e. `_temp2`, `_temp3` ... `_temp9`)
name.push('2');
if self.name_is_unique(&name) {
return name;
fn find_uid_name(&mut self, name: &str) -> CompactStr {
// If `uid_names` is not already populated, initialize it
if self.uid_names.is_none() {
self.init_uid_names();
}
for c in b'3'..=b'9' {
name.replace_range(name.len() - 1.., str::from_utf8(&[c]).unwrap());
if self.name_is_unique(&name) {
return name;
let uid_names = self.uid_names.as_mut().unwrap();

let base = get_uid_name_base(name);
match uid_names.entry(CompactStr::from(base)) {
Entry::Occupied(mut entry) => {
let uid = CompactStr::from(get_unique_name(base, entry.get()));
entry.get_mut().push(uid.clone());
uid
}
Entry::Vacant(entry) => {
let uid = format_compact_str!("_{base}");
entry.insert(vec![uid.clone()]);
uid
}
}
}

// Try double-digit postfixes (i.e. `_temp10` ... `_temp99`)
name.replace_range(name.len() - 1.., "1");
name.push('0');
let mut c1 = b'1';
loop {
if self.name_is_unique(&name) {
return name;
/// Initialize `uid_names`.
///
/// Iterate through all symbols and unresolved references in AST and identify any var names
/// which could clash with UIDs (start with `_`).
///
/// Compile a hashmap mapping var names (without leading `_`s or trailing digits) to any symbols
/// used in AST matching `<1 or more underscores>name<any number of digits>`.
///
/// Once this map is created, generating a UID is a relatively quick operation, rather than
/// iterating over all symbols and unresolved references every time generate a UID.
fn init_uid_names(&mut self) {
let mut uid_names = FxHashMap::<CompactStr, Vec<CompactStr>>::default();
for name in self.scopes.root_unresolved_references().keys().chain(self.symbols.names.iter())
{
if name.as_bytes().first() != Some(&b'_') {
continue;
}
for c2 in b'1'..=b'9' {
name.replace_range(name.len() - 1.., str::from_utf8(&[c2]).unwrap());
if self.name_is_unique(&name) {
return name;
// SAFETY: We just checked 1st byte is `_`, so safe to trim it off
let base = unsafe { str::from_utf8_unchecked(&name.as_bytes()[1..]) };
let base = get_uid_name_base(base);

match uid_names.entry(CompactStr::from(base)) {
Entry::Occupied(mut entry) => {
if !entry.get().iter().any(|existing_name| existing_name == name) {
entry.get_mut().push(name.clone());
}
}
Entry::Vacant(entry) => {
entry.insert(vec![name.clone()]);
}
}
if c1 == b'9' {
break;
}
c1 += 1;
name.replace_range(name.len() - 2.., str::from_utf8(&[c1, b'0']).unwrap());
}

// Try longer postfixes (`_temp100` upwards)
let name_base = {
name.pop();
name.pop();
&*name
};
for n in 100..=usize::MAX {
let name = format_compact!("{}{}", name_base, n);
if self.name_is_unique(&name) {
return name;
}
}

panic!("Cannot generate UID");
}

fn name_is_unique(&self, name: &str) -> bool {
!self.scopes.root_unresolved_references().contains_key(name)
&& !self.symbols.names.iter().any(|n| n.as_str() == name)
self.uid_names = Some(uid_names);
}
}

/// Create base for UID name based on provided `name`.
/// i.e. if `name` is "foo", returns "_foo".
/// We use `CompactString` to avoid any allocations where `name` is less than 22 bytes (the common case).
fn create_uid_name_base(name: &str) -> CompactString {
// Trim `_`s from start, and `0-9`s from end.
// Code below is equivalent to
// `let name = name.trim_start_matches('_').trim_end_matches(|c: char| c.is_ascii_digit());`
// but more efficient as operates on bytes not chars.
/// Trim `_`s from start and digits from end.
/// i.e. `__foo123` -> `foo`
fn get_uid_name_base(name: &str) -> &str {
// Equivalent to `name.trim_start_matches('_').trim_end_matches(|c: char| c.is_ascii_digit())`
// but more efficient as operates on bytes not chars
let mut bytes = name.as_bytes();
while bytes.first() == Some(&b'_') {
bytes = &bytes[1..];
Expand All @@ -525,14 +525,75 @@ fn create_uid_name_base(name: &str) -> CompactString {
}
// SAFETY: We started with a valid UTF8 `&str` and have only trimmed off ASCII characters,
// so remainder must still be valid UTF8
let name = unsafe { str::from_utf8_unchecked(bytes) };
unsafe { str::from_utf8_unchecked(bytes) }
}

fn get_unique_name(base: &str, used: &[CompactStr]) -> CompactString {
// Create `CompactString` prepending name with `_`, and with 1 byte excess capacity.
// The extra byte is to avoid reallocation if need to add a digit on the end later,
// which will not be too uncommon.
// Having to add 2 digits will be uncommon, so we don't allocate 2 extra bytes for 2 digits.
let mut str = CompactString::with_capacity(name.len() + 2);
str.push('_');
str.push_str(name);
str
let mut name = CompactString::with_capacity(base.len() + 2);
name.push('_');
name.push_str(base);

let name_is_unique = |name: &str| !used.iter().any(|used_name| used_name == name);

// Try the name without a numerical postfix (i.e. plain `_temp`)
if name_is_unique(&name) {
return name;
}

// It's fairly common that UIDs may need a numerical postfix, so we try to keep string
// operations to a minimum for postfixes up to 99 - using `replace_range` on a single
// `CompactStr`, rather than generating a new string on each attempt.
// Postfixes greater than 99 should be very uncommon, so don't bother optimizing.

// Try single-digit postfixes (i.e. `_temp2`, `_temp3` ... `_temp9`)
name.push('2');
if name_is_unique(&name) {
return name;
}
for c in b'3'..=b'9' {
name.replace_range(name.len() - 1.., str::from_utf8(&[c]).unwrap());
if name_is_unique(&name) {
return name;
}
}

// Try double-digit postfixes (i.e. `_temp10` ... `_temp99`)
name.replace_range(name.len() - 1.., "1");
name.push('0');
let mut c1 = b'1';
loop {
if name_is_unique(&name) {
return name;
}
for c2 in b'1'..=b'9' {
name.replace_range(name.len() - 1.., str::from_utf8(&[c2]).unwrap());
if name_is_unique(&name) {
return name;
}
}
if c1 == b'9' {
break;
}
c1 += 1;
name.replace_range(name.len() - 2.., str::from_utf8(&[c1, b'0']).unwrap());
}

// Try longer postfixes (`_temp100` upwards)
let name_base = {
name.pop();
name.pop();
&*name
};
for n in 100..=usize::MAX {
let name = format_compact!("{}{}", name_base, n);
if name_is_unique(&name) {
return name;
}
}

panic!("Cannot generate UID");
}

0 comments on commit 044958c

Please sign in to comment.