Skip to content

Commit

Permalink
Merge pull request #64775 from CyrusNajmabadi/cheaperIndex2
Browse files Browse the repository at this point in the history
Simplify SymbolTreeInfo creation
  • Loading branch information
CyrusNajmabadi committed Oct 17, 2022
2 parents 47a2b15 + adab1d3 commit 426b5f9
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 161 deletions.
179 changes: 69 additions & 110 deletions src/Workspaces/Core/Portable/FindSymbols/SymbolTree/SymbolTreeInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Collections;
using Microsoft.CodeAnalysis.Storage;
using Microsoft.CodeAnalysis.Utilities;
using Roslyn.Utilities;

Expand All @@ -32,14 +29,15 @@ namespace Microsoft.CodeAnalysis.FindSymbols
/// </summary>
internal partial class SymbolTreeInfo : IChecksummedObject
{
private static readonly StringComparer s_caseInsensitiveComparer =
CaseInsensitiveComparison.Comparer;

public Checksum Checksum { get; }

/// <summary>
/// The list of nodes that represent symbols. The primary key into the sorting of this
/// list is the name. They are sorted case-insensitively with the <see cref="s_totalComparer" />.
/// Finding case-sensitive matches can be found by binary searching for something that
/// matches insensitively, and then searching around that equivalence class for one that
/// matches.
/// The list of nodes that represent symbols. The primary key into the sorting of this list is the name. They
/// are sorted case-insensitively . Finding case-sensitive matches can be found by binary searching for
/// something that matches insensitively, and then searching around that equivalence class for one that matches.
/// </summary>
private readonly ImmutableArray<Node> _nodes;

Expand Down Expand Up @@ -74,25 +72,6 @@ public MultiDictionary<string, ExtensionMethodInfo>.ValueSet GetExtensionMethodI

private readonly SpellChecker _spellChecker;

private static readonly StringSliceComparer s_caseInsensitiveComparer =
StringSliceComparer.OrdinalIgnoreCase;

// We first sort in a case insensitive manner. But, within items that match insensitively,
// we then sort in a case sensitive manner. This helps for searching as we'll walk all
// the items of a specific casing at once. This way features can cache values for that
// casing and reuse them. i.e. if we didn't do this we might get "Prop, prop, Prop, prop"
// which might cause other features to continually recalculate if that string matches what
// they're searching for. However, with this sort of comparison we now get
// "prop, prop, Prop, Prop". Features can take advantage of that by caching their previous
// result and reusing it when they see they're getting the same string again.
private static readonly Comparison<string> s_totalComparer = (s1, s2) =>
{
var diff = CaseInsensitiveComparison.Comparer.Compare(s1, s2);
return diff != 0
? diff
: StringComparer.Ordinal.Compare(s1, s2);
};

private SymbolTreeInfo(
Checksum checksum,
ImmutableArray<Node> sortedNodes,
Expand Down Expand Up @@ -122,7 +101,7 @@ private SymbolTreeInfo(
public static SymbolTreeInfo CreateEmpty(Checksum checksum)
{
var unsortedNodes = ImmutableArray.Create(BuilderNode.RootNode);
SortNodes(unsortedNodes, out var sortedNodes);
var sortedNodes = SortNodes(unsortedNodes);

return new SymbolTreeInfo(checksum, sortedNodes,
CreateSpellChecker(checksum, sortedNodes),
Expand Down Expand Up @@ -212,76 +191,46 @@ private async Task<ImmutableArray<ISymbol>> FindAsync(
bool ignoreCase,
CancellationToken cancellationToken)
{
var comparer = GetComparer(ignoreCase);
IAssemblySymbol? assemblySymbol = null;

using var results = TemporaryArray<ISymbol>.Empty;
foreach (var node in FindNodeIndices(name, comparer))

var (startIndexInclusive, endIndexExclusive) = FindCaseInsensitiveNodeIndices(_nodes, name);

IAssemblySymbol? assemblySymbol = null;
for (var index = startIndexInclusive; index < endIndexExclusive; index++)
{
cancellationToken.ThrowIfCancellationRequested();
assemblySymbol ??= await lazyAssembly.GetValueAsync(cancellationToken).ConfigureAwait(false);
var node = _nodes[index];

Bind(node, assemblySymbol.GlobalNamespace, ref results.AsRef(), cancellationToken);
// The find-operation found the case-insensitive range of results. So if the caller wants
// case-insensitive, then just check all of them. If they caller wants case-sensitive, then
// actually check that the node matches case-sensitively
if (ignoreCase || StringComparer.Ordinal.Equals(name, node.Name))
{
assemblySymbol ??= await lazyAssembly.GetValueAsync(cancellationToken).ConfigureAwait(false);
Bind(index, assemblySymbol.GlobalNamespace, ref results.AsRef(), cancellationToken);
}
}

return results.ToImmutableAndClear();
}

private static StringSliceComparer GetComparer(bool ignoreCase)
{
return ignoreCase
? StringSliceComparer.OrdinalIgnoreCase
: StringSliceComparer.Ordinal;
}

private IEnumerable<int> FindNodeIndices(string name, StringSliceComparer comparer)
=> FindNodeIndices(_nodes, name, comparer);

/// <summary>
/// Gets all the node indices with matching names per the <paramref name="comparer" />.
/// </summary>
private static IEnumerable<int> FindNodeIndices(
ImmutableArray<Node> nodes,
string name, StringSliceComparer comparer)
private static (int startIndexInclusive, int endIndexExclusive) FindCaseInsensitiveNodeIndices(
ImmutableArray<Node> nodes, string name)
{
// find any node that matches case-insensitively
var startingPosition = BinarySearch(nodes, name);
var nameSlice = name.AsMemory();

if (startingPosition != -1)
{
// yield if this matches by the actual given comparer
if (comparer.Equals(nameSlice, GetNameSlice(nodes, startingPosition)))
{
yield return startingPosition;
}
if (startingPosition == -1)
return default;

var position = startingPosition;
while (position > 0 && s_caseInsensitiveComparer.Equals(GetNameSlice(nodes, position - 1), nameSlice))
{
position--;
if (comparer.Equals(GetNameSlice(nodes, position), nameSlice))
{
yield return position;
}
}
var startIndex = startingPosition;
while (startIndex > 0 && s_caseInsensitiveComparer.Equals(nodes[startIndex - 1].Name, name))
startIndex--;

position = startingPosition;
while (position + 1 < nodes.Length && s_caseInsensitiveComparer.Equals(GetNameSlice(nodes, position + 1), nameSlice))
{
position++;
if (comparer.Equals(GetNameSlice(nodes, position), nameSlice))
{
yield return position;
}
}
}
}
var endIndex = startingPosition;
while (endIndex + 1 < nodes.Length && s_caseInsensitiveComparer.Equals(nodes[endIndex + 1].Name, name))
endIndex++;

private static ReadOnlyMemory<char> GetNameSlice(
ImmutableArray<Node> nodes, int nodeIndex)
{
return nodes[nodeIndex].Name.AsMemory();
return (startIndex, endIndex + 1);
}

private int BinarySearch(string name)
Expand All @@ -292,16 +241,14 @@ private int BinarySearch(string name)
/// </summary>
private static int BinarySearch(ImmutableArray<Node> nodes, string name)
{
var nameSlice = name.AsMemory();
var max = nodes.Length - 1;
var min = 0;

while (max >= min)
{
var mid = min + ((max - min) >> 1);

var comparison = s_caseInsensitiveComparer.Compare(
GetNameSlice(nodes, mid), nameSlice);
var comparison = s_caseInsensitiveComparer.Compare(nodes[mid].Name, name);
if (comparison < 0)
{
min = mid + 1;
Expand All @@ -322,35 +269,28 @@ private static int BinarySearch(ImmutableArray<Node> nodes, string name)
#region Construction

private static SpellChecker CreateSpellChecker(Checksum checksum, ImmutableArray<Node> sortedNodes)
=> new(checksum, sortedNodes.Select(n => n.Name.AsMemory()));
=> new(checksum, sortedNodes.Select(n => n.Name));

private static void SortNodes(
ImmutableArray<BuilderNode> unsortedNodes,
out ImmutableArray<Node> sortedNodes)
private static ImmutableArray<Node> SortNodes(ImmutableArray<BuilderNode> unsortedNodes)
{
// Generate index numbers from 0 to Count-1
var tmp = new int[unsortedNodes.Length];
for (var i = 0; i < tmp.Length; i++)
{
using var _1 = ArrayBuilder<int>.GetInstance(unsortedNodes.Length, out var tmp);
tmp.Count = unsortedNodes.Length;
for (var i = 0; i < tmp.Count; i++)
tmp[i] = i;
}

// Sort the index according to node elements
Array.Sort<int>(tmp, (a, b) => CompareNodes(unsortedNodes[a], unsortedNodes[b], unsortedNodes));
tmp.Sort((a, b) => CompareNodes(unsortedNodes[a], unsortedNodes[b], unsortedNodes));

// Use the sort order to build the ranking table which will
// be used as the map from original (unsorted) location to the
// sorted location.
var ranking = new int[unsortedNodes.Length];
for (var i = 0; i < tmp.Length; i++)
{
using var _2 = ArrayBuilder<int>.GetInstance(unsortedNodes.Length, out var ranking);
ranking.Count = unsortedNodes.Length;
for (var i = 0; i < tmp.Count; i++)
ranking[tmp[i]] = i;
}

// No longer need the tmp array
tmp = null;

var result = ArrayBuilder<Node>.GetInstance(unsortedNodes.Length);
using var _3 = ArrayBuilder<Node>.GetInstance(unsortedNodes.Length, out var result);
result.Count = unsortedNodes.Length;

string? lastName = null;
Expand All @@ -375,13 +315,13 @@ private static void SortNodes(
lastName = currentName;
}

sortedNodes = result.ToImmutableAndFree();
return result.ToImmutableAndClear();
}

private static int CompareNodes(
BuilderNode x, BuilderNode y, ImmutableArray<BuilderNode> nodeList)
{
var comp = s_totalComparer(x.Name, y.Name);
var comp = TotalComparer(x.Name, y.Name);
if (comp == 0)
{
if (x.ParentIndex != y.ParentIndex)
Expand All @@ -402,6 +342,22 @@ private static int CompareNodes(
}

return comp;

// We first sort in a case insensitive manner. But, within items that match insensitively,
// we then sort in a case sensitive manner. This helps for searching as we'll walk all
// the items of a specific casing at once. This way features can cache values for that
// casing and reuse them. i.e. if we didn't do this we might get "Prop, prop, Prop, prop"
// which might cause other features to continually recalculate if that string matches what
// they're searching for. However, with this sort of comparison we now get
// "prop, prop, Prop, Prop". Features can take advantage of that by caching their previous
// result and reusing it when they see they're getting the same string again.
static int TotalComparer(string s1, string s2)
{
var diff = CaseInsensitiveComparison.Comparer.Compare(s1, s2);
return diff != 0
? diff
: StringComparer.Ordinal.Compare(s1, s2);
}
}

#endregion
Expand Down Expand Up @@ -476,7 +432,7 @@ private static SymbolTreeInfo CreateSymbolTreeInfo(
OrderPreservingMultiDictionary<string, string> inheritanceMap,
MultiDictionary<string, ExtensionMethodInfo>? receiverTypeNameToExtensionMethodMap)
{
SortNodes(unsortedNodes, out var sortedNodes);
var sortedNodes = SortNodes(unsortedNodes);
var spellChecker = CreateSpellChecker(checksum, sortedNodes);

return new SymbolTreeInfo(
Expand All @@ -487,8 +443,6 @@ private static OrderPreservingMultiDictionary<int, int> CreateIndexBasedInherita
ImmutableArray<Node> nodes,
OrderPreservingMultiDictionary<string, string> inheritanceMap)
{
// All names in metadata will be case sensitive.
var comparer = GetComparer(ignoreCase: false);
var result = new OrderPreservingMultiDictionary<int, int>();

foreach (var (baseName, derivedNames) in inheritanceMap)
Expand All @@ -498,9 +452,14 @@ private static OrderPreservingMultiDictionary<int, int> CreateIndexBasedInherita

foreach (var derivedName in derivedNames)
{
foreach (var derivedNameIndex in FindNodeIndices(nodes, derivedName, comparer))
var (startIndexInclusive, endIndexExclusive) = FindCaseInsensitiveNodeIndices(nodes, derivedName);

for (var derivedNameIndex = startIndexInclusive; derivedNameIndex < endIndexExclusive; derivedNameIndex++)
{
result.Add(baseNameIndex, derivedNameIndex);
var node = nodes[derivedNameIndex];
// All names in metadata will be case sensitive.
if (StringComparer.Ordinal.Equals(derivedName, node.Name))
result.Add(baseNameIndex, derivedNameIndex);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/Workspaces/Core/Portable/Utilities/SpellChecker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public SpellChecker(Checksum checksum, BKTree bKTree)
_bkTree = bKTree;
}

public SpellChecker(Checksum checksum, IEnumerable<ReadOnlyMemory<char>> corpus)
public SpellChecker(Checksum checksum, IEnumerable<string> corpus)
: this(checksum, BKTree.Create(corpus))
{
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,6 @@
<Compile Include="$(MSBuildThisFileDirectory)Utilities\StringBreaker.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities\WordSimilarityChecker.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities\StringEscapeEncoder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities\StringSlice.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities\SyntaxPath.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities\TaskExtensions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)Utilities\TaskFactoryExtensions.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ private class Builder
private readonly Edge[] _compactEdges;
private readonly BuilderNode[] _builderNodes;

public Builder(IEnumerable<ReadOnlyMemory<char>> values)
public Builder(IEnumerable<string> values)
{
// TODO(cyrusn): Properly handle unicode normalization here.
var distinctValues = values.Where(v => v.Length > 0).Distinct(StringSliceComparer.OrdinalIgnoreCase).ToArray();
var distinctValues = values.Where(v => v.Length > 0).Distinct(CaseInsensitiveComparison.Comparer).ToArray();
var charCount = values.Sum(v => v.Length);

_concatenatedLowerCaseWords = new char[charCount];
Expand All @@ -107,7 +107,7 @@ public Builder(IEnumerable<ReadOnlyMemory<char>> values)
var value = distinctValues[i];
_wordSpans[i] = new TextSpan(characterIndex, value.Length);

foreach (var ch in value.Span)
foreach (var ch in value)
{
_concatenatedLowerCaseWords[characterIndex] = CaseInsensitiveComparison.ToLower(ch);
characterIndex++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ private BKTree(char[] concatenatedLowerCaseWords, ImmutableArray<Node> nodes, Im
}

public static BKTree Create(params string[] values)
=> Create(values.Select(v => v.AsMemory()));
=> Create((IEnumerable<string>)values);

public static BKTree Create(IEnumerable<ReadOnlyMemory<char>> values)
public static BKTree Create(IEnumerable<string> values)
=> new Builder(values).Create();

public IList<string> Find(string value, int? threshold = null)
Expand Down
Loading

0 comments on commit 426b5f9

Please sign in to comment.