Skip to content

Commit

Permalink
Auto merge of #60659 - nnethercote:tweak-Symbol-and-InternedString, r…
Browse files Browse the repository at this point in the history
…=<try>

Tweak `Symbol` and `InternedString`

Some minor improvements to speed and code cleanliness.

r? @Zoxc
  • Loading branch information
bors committed May 9, 2019
2 parents 9f83961 + 3e5e8d3 commit 9a6dab6
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 29 deletions.
4 changes: 2 additions & 2 deletions src/librustc_codegen_llvm/debuginfo/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ impl TypeMap<'ll, 'tcx> {
// the id is unknown.
fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> &str {
let UniqueTypeId(interner_key) = unique_type_id;
self.unique_id_interner.get(interner_key)
self.unique_id_interner.symbol_str(interner_key)
}

// Get the UniqueTypeId for the given type. If the UniqueTypeId for the given
Expand Down Expand Up @@ -226,7 +226,7 @@ impl TypeMap<'ll, 'tcx> {
let variant_part_type_id = format!("{}_variant_part",
self.get_unique_type_id_as_string(enum_type_id));
let interner_key = self.unique_id_interner.intern(&variant_part_type_id);
self.unique_id_interner.get(interner_key)
self.unique_id_interner.symbol_str(interner_key)
}
}

Expand Down
82 changes: 55 additions & 27 deletions src/libsyntax_pos/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,18 @@ impl Decodable for Ident {
}
}

/// A symbol is an interned or gensymed string. The use of `newtype_index!` means
/// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
/// the last 256 values for tagging purposes.
/// A symbol is an interned or gensymed string. (A gensym is a special kind of
/// symbol that is never equal to any other symbol. E.g.:
/// - `Symbol::intern("x") == Symbol::intern("x")`
/// - `Symbol::gensym("x") != Symbol::intern("x")`
/// - `Symbol::gensym("x") != Symbol::gensym("x")`
///
/// Gensyms are useful when creating new identifiers that must not match any
/// existing identifiers during macro expansion and syntax desugaring.)
///
/// The use of `newtype_index!` means that `Option<Symbol>` only takes up 4
/// bytes, because `newtype_index!` reserves the last 256 values for tagging
/// purposes.
///
/// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
/// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
Expand All @@ -367,10 +376,6 @@ impl Symbol {
with_interner(|interner| interner.intern(string))
}

pub fn interned(self) -> Self {
with_interner(|interner| interner.interned(self))
}

/// Gensyms a new `usize`, using the current interner.
pub fn gensym(string: &str) -> Self {
with_interner(|interner| interner.gensym(string))
Expand All @@ -387,7 +392,7 @@ impl Symbol {
pub fn as_str(self) -> LocalInternedString {
with_interner(|interner| unsafe {
LocalInternedString {
string: std::mem::transmute::<&str, &str>(interner.get(self))
string: std::mem::transmute::<&str, &str>(interner.symbol_or_gensym_str(self))
}
})
}
Expand Down Expand Up @@ -488,11 +493,11 @@ impl Interner {
name
}

pub fn interned(&self, symbol: Symbol) -> Symbol {
fn interned(&self, symbol: Symbol) -> Symbol {
if (symbol.0.as_usize()) < self.strings.len() {
symbol
} else {
self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]
}
}

Expand All @@ -510,10 +515,19 @@ impl Interner {
symbol.0.as_usize() >= self.strings.len()
}

pub fn get(&self, symbol: Symbol) -> &str {
/// Get the chars of a normal (non-gensym) symbol.
pub fn symbol_str(&self, symbol: Symbol) -> &str {
self.strings[symbol.0.as_usize()]
}

/// Get the chars of a normal or gensym symbol.
fn symbol_or_gensym_str(&self, symbol: Symbol) -> &str {
match self.strings.get(symbol.0.as_usize()) {
Some(string) => string,
None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
None => {
let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize];
self.strings[symbol.0.as_usize()]
}
}
}
}
Expand Down Expand Up @@ -611,11 +625,17 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
}

/// Represents a string stored in the interner. Because the interner outlives any thread
/// which uses this type, we can safely treat `string` which points to interner data,
/// as an immortal string, as long as this type never crosses between threads.
// FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
// by creating a new thread right after constructing the interner.
/// An alternative to `Symbol` and `LocalInternedString`, useful when the chars
/// within the symbol need to be accessed. It is best used for temporary
/// values.
///
/// Because the interner outlives any thread which uses this type, we can
/// safely treat `string` which points to interner data, as an immortal string,
/// as long as this type never crosses between threads.
//
// FIXME: ensure that the interner outlives any thread which uses
// `LocalInternedString`, by creating a new thread right after constructing the
// interner.
#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
pub struct LocalInternedString {
string: &'static str,
Expand Down Expand Up @@ -708,7 +728,12 @@ impl Encodable for LocalInternedString {
}
}

/// Represents a string stored in the string interner.
/// A thin wrapper around `Symbol`. It has two main differences to `Symbol`.
/// - Its implementations of `Hash`, `PartialOrd` and `Ord` work with the
/// string chars rather than the symbol integer. This is useful when
/// hash stability is required across compile sessions, or a guaranteed sort
/// ordering is required.
/// - It is guaranteed to not be a gensym symbol.
#[derive(Clone, Copy, Eq)]
pub struct InternedString {
symbol: Symbol,
Expand All @@ -717,14 +742,23 @@ pub struct InternedString {
impl InternedString {
pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
let str = with_interner(|interner| {
interner.get(self.symbol) as *const str
interner.symbol_str(self.symbol) as *const str
});
// This is safe because the interner keeps string alive until it is dropped.
// We can access it because we know the interner is still alive since we use a
// scoped thread local to access it, and it was alive at the beginning of this scope
unsafe { f(&*str) }
}

pub fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
let (self_str, other_str) = with_interner(|interner| {
(interner.symbol_str(self.symbol) as *const str,
interner.symbol_str(other.symbol) as *const str)
});
// This is safe for the same reason that `with` is safe.
unsafe { f(&*self_str, &*other_str) }
}

pub fn as_symbol(self) -> Symbol {
self.symbol
}
Expand All @@ -745,7 +779,7 @@ impl PartialOrd<InternedString> for InternedString {
if self.symbol == other.symbol {
return Some(Ordering::Equal);
}
self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str))
}
}

Expand All @@ -754,7 +788,7 @@ impl Ord for InternedString {
if self.symbol == other.symbol {
return Ordering::Equal;
}
self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
self.with2(other, |self_str, other_str| self_str.cmp(other_str))
}
}

Expand Down Expand Up @@ -794,12 +828,6 @@ impl<'a> PartialEq<InternedString> for &'a String {
}
}

impl std::convert::From<InternedString> for String {
fn from(val: InternedString) -> String {
val.as_symbol().to_string()
}
}

impl fmt::Debug for InternedString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.with(|str| fmt::Debug::fmt(&str, f))
Expand Down

0 comments on commit 9a6dab6

Please sign in to comment.