Skip to content

Commit

Permalink
Reduce CPU/Allocations in elfie loading
Browse files Browse the repository at this point in the history
In addition to persisting the elfie index as a txt file, persist it as a binary file, allowing it to on subsequent loads to be read from the binary file. This showed pretty drastic CPU/allocation improvements during the initial solution load.
  • Loading branch information
ToddGrun authored Dec 19, 2024
1 parent 42f217f commit 9a27ef7
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

using Microsoft.CodeAnalysis.Elfie.Model;

namespace Microsoft.CodeAnalysis.SymbolSearch;

internal interface IDatabaseFactoryService
{
AddReferenceDatabase CreateDatabaseFromBytes(byte[] bytes);
AddReferenceDatabase CreateDatabaseFromBytes(byte[] bytes, bool isBinary);
}
7 changes: 3 additions & 4 deletions src/Features/Core/Portable/SymbolSearch/Windows/IIOService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

using System;
using System.IO;

namespace Microsoft.CodeAnalysis.SymbolSearch;
Expand All @@ -17,7 +16,7 @@ internal interface IIOService
void Delete(FileInfo file);
bool Exists(FileSystemInfo info);
byte[] ReadAllBytes(string path);
void Replace(string sourceFileName, string destinationFileName, string destinationBackupFileName, bool ignoreMetadataErrors);
void Replace(string sourceFileName, string destinationFileName, string? destinationBackupFileName, bool ignoreMetadataErrors);
void Move(string sourceFileName, string destinationFileName);
void WriteAndFlushAllBytes(string path, byte[] bytes);
void WriteAndFlushAllBytes(string path, ArraySegment<byte> bytes);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

using System.IO;
using Microsoft.CodeAnalysis.Elfie.Model;

Expand All @@ -13,12 +11,22 @@ internal sealed partial class SymbolSearchUpdateEngine
{
private sealed class DatabaseFactoryService : IDatabaseFactoryService
{
public AddReferenceDatabase CreateDatabaseFromBytes(byte[] bytes)
public AddReferenceDatabase CreateDatabaseFromBytes(byte[] bytes, bool isBinary)
{
using var memoryStream = new MemoryStream(bytes);
using var streamReader = new StreamReader(memoryStream);
var database = new AddReferenceDatabase(ArdbVersion.V1);
database.ReadText(streamReader);

if (isBinary)
{
using var binaryReader = new BinaryReader(memoryStream);
database.ReadBinary(binaryReader);
}
else
{
using var streamReader = new StreamReader(memoryStream);
database.ReadText(streamReader);
}

return database;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

using System;
using System.IO;

namespace Microsoft.CodeAnalysis.SymbolSearch;
Expand All @@ -20,16 +19,17 @@ private sealed class IOService : IIOService

public byte[] ReadAllBytes(string path) => File.ReadAllBytes(path);

public void Replace(string sourceFileName, string destinationFileName, string destinationBackupFileName, bool ignoreMetadataErrors)
public void Replace(string sourceFileName, string destinationFileName, string? destinationBackupFileName, bool ignoreMetadataErrors)
=> File.Replace(sourceFileName, destinationFileName, destinationBackupFileName, ignoreMetadataErrors);

public void Move(string sourceFileName, string destinationFileName)
=> File.Move(sourceFileName, destinationFileName);

public void WriteAndFlushAllBytes(string path, byte[] bytes)
public void WriteAndFlushAllBytes(string path, ArraySegment<byte> bytes)
{
using var fileStream = new FileStream(path, FileMode.Create);
fileStream.Write(bytes, 0, bytes.Length);

fileStream.Write(bytes.Array!, bytes.Offset, bytes.Count);
fileStream.Flush(flushToDisk: true);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
Expand All @@ -18,6 +16,7 @@
using Microsoft.CodeAnalysis.AddImport;
using Microsoft.CodeAnalysis.Elfie.Model;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Roslyn.Utilities;
using static System.FormattableString;

namespace Microsoft.CodeAnalysis.SymbolSearch;
Expand Down Expand Up @@ -267,13 +266,14 @@ private async Task<TimeSpan> DownloadFullDatabaseAsync(FileInfo databaseFileInfo
return (succeeded: false, failureDelay);
}

var bytes = contentBytes;
var bytes = contentBytes!;
AddReferenceDatabase database;

// Make a database out of that and set it to our in memory database that we'll be
// searching.
try
{
CreateAndSetInMemoryDatabase(bytes);
database = CreateAndSetInMemoryDatabase(bytes, isBinary: false);
}
catch (Exception e) when (_service._reportAndSwallowExceptionUnlessCanceled(e, cancellationToken))
{
Expand All @@ -288,18 +288,55 @@ private async Task<TimeSpan> DownloadFullDatabaseAsync(FileInfo databaseFileInfo

// Write the file out to disk so we'll have it the next time we launch VS. Do this
// after we set the in-memory instance so we at least have something to search while
// we're waiting to write.
await WriteDatabaseFileAsync(databaseFileInfo, bytes, cancellationToken).ConfigureAwait(false);
// we're waiting to write. It's ok if either of these writes don't succeed. If the txt
// file fails to persist, a subsequent VS session will redownload the index and again try
// to persist. If the binary file fails to persist, a subsequent VS session will just load
// the index from the txt file and again try to persist the binary file.
await WriteDatabaseTextFileAsync(databaseFileInfo, bytes, cancellationToken).ConfigureAwait(false);
await WriteDatabaseBinaryFileAsync(database, databaseFileInfo, cancellationToken).ConfigureAwait(false);

var delay = _service._delayService.UpdateSucceededDelay;
LogInfo($"Processing full database element completed. Update again in {delay}");
return (succeeded: true, delay);
}

private async Task WriteDatabaseFileAsync(FileInfo databaseFileInfo, byte[] bytes, CancellationToken cancellationToken)
private async Task WriteDatabaseTextFileAsync(FileInfo databaseFileInfo, byte[] bytes, CancellationToken cancellationToken)
{
LogInfo("Writing database file");

await WriteDatabaseFileAsync(databaseFileInfo, new ArraySegment<byte>(bytes), cancellationToken).ConfigureAwait(false);

LogInfo("Writing database file completed");
}

private async Task WriteDatabaseBinaryFileAsync(AddReferenceDatabase database, FileInfo databaseFileInfo, CancellationToken cancellationToken)
{
using var memoryStream = new MemoryStream();
using var writer = new BinaryWriter(memoryStream);

LogInfo("Writing database binary file");

database.WriteBinary(writer);
writer.Flush();

// Obtain the underlying array from the memory stream. If for some reason this isn't available,
// fall back to reading the stream into a new byte array.
if (!memoryStream.TryGetBuffer(out var arraySegmentBuffer))
{
memoryStream.Position = 0;

// Read the buffer directly from the memory stream.
arraySegmentBuffer = new ArraySegment<byte>(memoryStream.ReadAllBytes());
}

var databaseBinaryFileInfo = GetBinaryFileInfo(databaseFileInfo);
await WriteDatabaseFileAsync(databaseBinaryFileInfo, arraySegmentBuffer, cancellationToken).ConfigureAwait(false);

LogInfo("Writing database binary file completed");
}

private async Task WriteDatabaseFileAsync(FileInfo databaseFileInfo, ArraySegment<byte> bytes, CancellationToken cancellationToken)
{
await RepeatIOAsync(
cancellationToken =>
{
Expand Down Expand Up @@ -343,17 +380,19 @@ await RepeatIOAsync(
IOUtilities.PerformIO(() => _service._ioService.Delete(new FileInfo(tempFilePath)));
}
}, cancellationToken).ConfigureAwait(false);

LogInfo("Writing database file completed");
}

private static FileInfo GetBinaryFileInfo(FileInfo databaseFileInfo)
=> new FileInfo(Path.ChangeExtension(databaseFileInfo.FullName, ".bin"));

private async Task<TimeSpan> PatchLocalDatabaseAsync(FileInfo databaseFileInfo, CancellationToken cancellationToken)
{
LogInfo("Patching local database");

LogInfo("Reading in local database");
// (intentionally not wrapped in IOUtilities. If this throws we want to restart).
var databaseBytes = _service._ioService.ReadAllBytes(databaseFileInfo.FullName);

var (databaseBytes, isBinary) = GetDatabaseBytes(databaseFileInfo);

LogInfo($"Reading in local database completed. databaseBytes.Length={databaseBytes.Length}");

// Make a database instance out of those bytes and set is as the current in memory database
Expand All @@ -363,7 +402,7 @@ private async Task<TimeSpan> PatchLocalDatabaseAsync(FileInfo databaseFileInfo,
AddReferenceDatabase database;
try
{
database = CreateAndSetInMemoryDatabase(databaseBytes);
database = CreateAndSetInMemoryDatabase(databaseBytes, isBinary);
}
catch (Exception e) when (_service._reportAndSwallowExceptionUnlessCanceled(e, cancellationToken))
{
Expand All @@ -381,12 +420,34 @@ private async Task<TimeSpan> PatchLocalDatabaseAsync(FileInfo databaseFileInfo,
LogInfo("Downloading and processing patch file: " + serverPath);

var element = await DownloadFileAsync(serverPath, cancellationToken).ConfigureAwait(false);
var delayUntilUpdate = await ProcessPatchXElementAsync(databaseFileInfo, element, databaseBytes, cancellationToken).ConfigureAwait(false);
var delayUntilUpdate = await ProcessPatchXElementAsync(
databaseFileInfo,
element,
// We pass a delegate to get the database bytes so that we can avoid reading the bytes when we don't need them due to no patch to apply.
getDatabaseBytes: () => isBinary ? _service._ioService.ReadAllBytes(databaseFileInfo.FullName) : databaseBytes,
cancellationToken).ConfigureAwait(false);

LogInfo("Downloading and processing patch file completed");
LogInfo("Patching local database completed");

return delayUntilUpdate;

(byte[] dataBytes, bool isBinary) GetDatabaseBytes(FileInfo databaseFileInfo)
{
var databaseBinaryFileInfo = GetBinaryFileInfo(databaseFileInfo);

try
{
// First attempt to read from the binary file. If that fails, fall back to the text file.
return (_service._ioService.ReadAllBytes(databaseBinaryFileInfo.FullName), isBinary: true);
}
catch (Exception e) when (IOUtilities.IsNormalIOException(e))
{
}

// (intentionally not wrapped in IOUtilities. If this throws we want to restart).
return (_service._ioService.ReadAllBytes(databaseFileInfo.FullName), isBinary: false);
}
}

/// <summary>
Expand All @@ -395,20 +456,20 @@ private async Task<TimeSpan> PatchLocalDatabaseAsync(FileInfo databaseFileInfo,
/// indicates that our data is corrupt), the exception will bubble up and must be appropriately
/// dealt with by the caller.
/// </summary>
private AddReferenceDatabase CreateAndSetInMemoryDatabase(byte[] bytes)
private AddReferenceDatabase CreateAndSetInMemoryDatabase(byte[] bytes, bool isBinary)
{
var database = CreateDatabaseFromBytes(bytes);
var database = CreateDatabaseFromBytes(bytes, isBinary);
_service._sourceToDatabase[_source] = new AddReferenceDatabaseWrapper(database);
return database;
}

private async Task<TimeSpan> ProcessPatchXElementAsync(
FileInfo databaseFileInfo, XElement patchElement, byte[] databaseBytes, CancellationToken cancellationToken)
FileInfo databaseFileInfo, XElement patchElement, Func<byte[]> getDatabaseBytes, CancellationToken cancellationToken)
{
try
{
LogInfo("Processing patch element");
var delayUntilUpdate = await TryProcessPatchXElementAsync(databaseFileInfo, patchElement, databaseBytes, cancellationToken).ConfigureAwait(false);
var delayUntilUpdate = await TryProcessPatchXElementAsync(databaseFileInfo, patchElement, getDatabaseBytes, cancellationToken).ConfigureAwait(false);
if (delayUntilUpdate != null)
{
LogInfo($"Processing patch element completed. Update again in {delayUntilUpdate.Value}");
Expand All @@ -427,13 +488,19 @@ private async Task<TimeSpan> ProcessPatchXElementAsync(
}

private async Task<TimeSpan?> TryProcessPatchXElementAsync(
FileInfo databaseFileInfo, XElement patchElement, byte[] databaseBytes, CancellationToken cancellationToken)
FileInfo databaseFileInfo, XElement patchElement, Func<byte[]> getDatabaseBytes, CancellationToken cancellationToken)
{
ParsePatchElement(patchElement, out var upToDate, out var tooOld, out var patchBytes);
AddReferenceDatabase database;

if (upToDate)
{
LogInfo("Local version is up to date");

var databaseBinaryFileInfo = GetBinaryFileInfo(databaseFileInfo);
if (!_service._ioService.Exists(databaseBinaryFileInfo))
await WriteDatabaseBinaryFileAsync(_service._sourceToDatabase[_source].Database, databaseFileInfo, cancellationToken).ConfigureAwait(false);

return _service._delayService.UpdateSucceededDelay;
}

Expand All @@ -443,22 +510,29 @@ private async Task<TimeSpan> ProcessPatchXElementAsync(
return null;
}

LogInfo($"Got patch. databaseBytes.Length={databaseBytes.Length} patchBytes.Length={patchBytes.Length}.");
var databaseBytes = getDatabaseBytes();
LogInfo($"Got patch. databaseBytes.Length={databaseBytes.Length} patchBytes.Length={patchBytes!.Length}.");

// We have patch data. Apply it to our current database bytes to produce the new
// database.
LogInfo("Applying patch");
var finalBytes = _service._patchService.ApplyPatch(databaseBytes, patchBytes);
LogInfo($"Applying patch completed. finalBytes.Length={finalBytes.Length}");

CreateAndSetInMemoryDatabase(finalBytes);
// finalBytes is generated from the current database and the patch, not from the binary file.
database = CreateAndSetInMemoryDatabase(finalBytes, isBinary: false);

await WriteDatabaseFileAsync(databaseFileInfo, finalBytes, cancellationToken).ConfigureAwait(false);
// Attempt to persist the txt and binary forms of the index. It's ok if either of these writes
// don't succeed. If the txt file fails to persist, a subsequent VS session will redownload the
// index and again try to persist. If the binary file fails to persist, a subsequent VS session
// will just load the index from the txt file and again try to persist the binary file.
await WriteDatabaseTextFileAsync(databaseFileInfo, finalBytes, cancellationToken).ConfigureAwait(false);
await WriteDatabaseBinaryFileAsync(database, databaseFileInfo, cancellationToken).ConfigureAwait(false);

return _service._delayService.UpdateSucceededDelay;
}

private static void ParsePatchElement(XElement patchElement, out bool upToDate, out bool tooOld, out byte[] patchBytes)
private static void ParsePatchElement(XElement patchElement, out bool upToDate, out bool tooOld, out byte[]? patchBytes)
{
patchBytes = null;

Expand Down Expand Up @@ -486,10 +560,10 @@ private static void ParsePatchElement(XElement patchElement, out bool upToDate,
}
}

private AddReferenceDatabase CreateDatabaseFromBytes(byte[] bytes)
private AddReferenceDatabase CreateDatabaseFromBytes(byte[] bytes, bool isBinary)
{
LogInfo("Creating database from bytes");
var result = _service._databaseFactoryService.CreateDatabaseFromBytes(bytes);
var result = _service._databaseFactoryService.CreateDatabaseFromBytes(bytes, isBinary);
LogInfo("Creating database from bytes completed");
return result;
}
Expand Down Expand Up @@ -534,7 +608,7 @@ private async Task<XElement> DownloadFileAsync(string serverPath, CancellationTo
}

/// <summary>Returns 'null' if download is not available and caller should keep polling.</summary>
private async Task<(XElement element, TimeSpan delay)> TryDownloadFileAsync(IFileDownloader fileDownloader, CancellationToken cancellationToken)
private async Task<(XElement? element, TimeSpan delay)> TryDownloadFileAsync(IFileDownloader fileDownloader, CancellationToken cancellationToken)
{
LogInfo("Read file from client");

Expand Down Expand Up @@ -609,7 +683,7 @@ private async Task RepeatIOAsync(Action<CancellationToken> action, CancellationT
}
}

private async Task<(bool succeeded, byte[] contentBytes)> TryParseDatabaseElementAsync(XElement element, CancellationToken cancellationToken)
private async Task<(bool succeeded, byte[]? contentBytes)> TryParseDatabaseElementAsync(XElement element, CancellationToken cancellationToken)
{
LogInfo("Parsing database element");
var contentsAttribute = element.Attribute(ContentAttributeName);
Expand Down
Loading

0 comments on commit 9a27ef7

Please sign in to comment.