Skip to content

Commit

Permalink
Fix bug where pipreport used index-urls from requirements.txt (#1227)
Browse files Browse the repository at this point in the history
* fix bug where pipreport used index urls in requirements.txt

* update tests

* docs

* add --no-input to pip install, so we do not hang waiting for user input

* pr feedback: performance and cleanup

* bump version
  • Loading branch information
pauld-msft committed Aug 19, 2024
1 parent f27fe8e commit edf0c8d
Show file tree
Hide file tree
Showing 11 changed files with 300 additions and 61 deletions.
4 changes: 4 additions & 0 deletions docs/detectors/pip.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,7 @@ The environment variable `PipReportSkipFallbackOnFailure` is used to skip the de
The environment variable `PipReportFileLevelTimeoutSeconds` is used to control the timeout limit for generating the PipReport for individual files. This defaults to the overall timeout.

The environment variable `PipReportDisableFastDeps` is used to disable the fast deps feature in PipReport.

The enviroment variable `PipReportIgnoreFileLevelIndexUrl` is used to ignore the `--index-url` argument that can be provided in the requirements.txt file: https://pip.pypa.io/en/stable/cli/pip_install/#install-index-url

The enviroment variable `PipReportPersistReports` allows the PipReport detector to persist the reports that it generates, rather than cleaning them up after constructing the dependency graph.
32 changes: 32 additions & 0 deletions src/Microsoft.ComponentDetection.Common/FileUtilityService.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace Microsoft.ComponentDetection.Common;

using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
Expand Down Expand Up @@ -36,4 +37,35 @@ public Stream MakeFileStream(string fileName)
{
return new FileStream(fileName, FileMode.Open, FileAccess.Read);
}

/// <inheritdoc />
public (string DuplicateFilePath, bool CreatedDuplicate) DuplicateFileWithoutLines(string fileName, string removalIndicator)
{
// Read all lines from the file and filter out the lines that start with the removal indicator.
var removedAnyLines = false;
var linesToKeep = new List<string>();
foreach (var line in File.ReadLines(fileName))
{
if (line == null || line.Trim().StartsWith(removalIndicator, System.StringComparison.OrdinalIgnoreCase))
{
removedAnyLines = true;
}
else
{
linesToKeep.Add(line);
}
}

// If the file did not have any lines to remove, return null.
if (!removedAnyLines)
{
return (null, false);
}

// Otherwise, write the lines to a new file and return the new file path.
var duplicateFileName = $"temp.{Path.GetFileName(fileName)}";
var duplicateFilePath = Path.Combine(Path.GetDirectoryName(fileName), duplicateFileName);
File.WriteAllLines(duplicateFilePath, linesToKeep);
return (duplicateFilePath, true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,15 @@ protected Task<IEnumerable<IComponentStream>> GetFileStreamsAsync(DirectoryInfo

private async Task<IndividualDetectorScanResult> ProcessAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs, int maxThreads, CancellationToken cancellationToken = default)
{
var threadsToUse = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1;
this.Telemetry["ThreadsUsed"] = $"{threadsToUse}";

var processor = new ActionBlock<ProcessRequest>(
async processRequest => await this.OnFileFoundAsync(processRequest, detectorArgs, cancellationToken),
new ExecutionDataflowBlockOptions
{
// MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in
MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
MaxDegreeOfParallelism = threadsToUse,
});

var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs, cancellationToken);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,12 @@ public interface IFileUtilityService
/// <param name="fileName">Path to the file.</param>
/// <returns>Returns a stream representing the file.</returns>
Stream MakeFileStream(string fileName);

/// <summary>
/// Duplicates a file, removing any lines that starts with the given string.
/// </summary>
/// <param name="fileName">Path to the file.</param>
/// <param name="removalIndicator">The string that indicates a line should be removed.</param>
/// <returns>Returns the path of the new file, and whether or not one was created.</returns>
(string DuplicateFilePath, bool CreatedDuplicate) DuplicateFileWithoutLines(string fileName, string removalIndicator);
}
139 changes: 81 additions & 58 deletions src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
public class PipCommandService : IPipCommandService
{
private const string PipReportDisableFastDepsEnvVar = "PipReportDisableFastDeps";
private const string PipReportIgnoreFileLevelIndexUrlEnvVar = "PipReportIgnoreFileLevelIndexUrl";

private readonly ICommandLineInvocationService commandLineInvocationService;
private readonly IPathUtilityService pathUtilityService;
Expand Down Expand Up @@ -134,71 +135,93 @@ private async Task<CommandLineExecutionResult> ExecuteCommandAsync(
var reportName = Path.GetFileNameWithoutExtension(Path.GetRandomFileName()) + ".component-detection-pip-report.json";
var reportFile = new FileInfo(Path.Combine(workingDir.FullName, reportName));

FileInfo duplicateFile = null;
string pipReportCommand;
if (path.EndsWith(".py"))
{
pipReportCommand = $"install -e .";
}
else if (path.EndsWith(".txt"))
{
pipReportCommand = "install -r requirements.txt";
}
else
{
// Failure case, but this shouldn't be hit since detection is only running
// on setup.py and requirements.txt files.
this.logger.LogDebug("PipReport: Unsupported file type for pip installation report: {Path}", path);
return (new PipInstallationReport(), null);
}

// When PIP_INDEX_URL is set, we need to pass it as a parameter to pip install command.
// This should be done before running detection by the build system, otherwise the detection
// will default to the public PyPI index if not configured in pip defaults. Note this index URL may have credentials, we need to remove it when logging.
pipReportCommand += $" --dry-run --ignore-installed --quiet --report {reportName}";
if (this.environmentService.DoesEnvironmentVariableExist("PIP_INDEX_URL"))
{
pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}";
}

if (!this.environmentService.DoesEnvironmentVariableExist(PipReportDisableFastDepsEnvVar) || !this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar))
{
pipReportCommand += $" --use-feature=fast-deps";
}

this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation());
command = await this.ExecuteCommandAsync(
pipExecutable,
pythonExecutable,
null,
workingDir,
cancellationToken,
pipReportCommand);

if (command.ExitCode == -1 && cancellationToken.IsCancellationRequested)
try
{
var errorMessage = $"PipReport: Cancelled for file '{formattedPath}' with command '{pipReportCommand.RemoveSensitiveInformation()}'.";
using var failureRecord = new PipReportFailureTelemetryRecord
if (path.EndsWith(".py"))
{
pipReportCommand = "install -e .";
}
else if (path.EndsWith(".txt", StringComparison.OrdinalIgnoreCase))
{
ExitCode = command.ExitCode,
StdErr = $"{errorMessage} {command.StdErr}",
};
pipReportCommand = "install -r requirements.txt";
if (this.environmentService.IsEnvironmentVariableValueTrue(PipReportIgnoreFileLevelIndexUrlEnvVar))
{
// check for --index-url in requirements.txt and remove it from the file, since we want to use PIP_INDEX_URL from the environment.
var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(formattedPath, "--index-url");
if (createdDuplicate)
{
var duplicateFileName = Path.GetFileName(duplicateFilePath);
duplicateFile = new FileInfo(duplicateFilePath);
pipReportCommand = $"install -r {duplicateFileName}";
}
}
}
else
{
// Failure case, but this shouldn't be hit since detection is only running
// on setup.py and requirements.txt files.
this.logger.LogDebug("PipReport: Unsupported file type for pip installation report: {Path}", path);
return (new PipInstallationReport(), null);
}

// When PIP_INDEX_URL is set, we need to pass it as a parameter to pip install command.
// This should be done before running detection by the build system, otherwise the detection
// will default to the public PyPI index if not configured in pip defaults. Note this index URL may have credentials, we need to remove it when logging.
pipReportCommand += $" --dry-run --ignore-installed --quiet --no-input --report {reportName}";
if (this.environmentService.DoesEnvironmentVariableExist("PIP_INDEX_URL"))
{
pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}";
}

this.logger.LogWarning("{Error}", errorMessage);
throw new InvalidOperationException(errorMessage);
if (!this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar))
{
pipReportCommand += " --use-feature=fast-deps";
}

this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation());
command = await this.ExecuteCommandAsync(
pipExecutable,
pythonExecutable,
null,
workingDir,
cancellationToken,
pipReportCommand);

if (command.ExitCode == -1 && cancellationToken.IsCancellationRequested)
{
var errorMessage = $"PipReport: Cancelled for file '{formattedPath}' with command '{pipReportCommand.RemoveSensitiveInformation()}'.";
using var failureRecord = new PipReportFailureTelemetryRecord
{
ExitCode = command.ExitCode,
StdErr = $"{errorMessage} {command.StdErr}",
};

this.logger.LogWarning("{Error}", errorMessage);
throw new InvalidOperationException(errorMessage);
}
else if (command.ExitCode != 0)
{
using var failureRecord = new PipReportFailureTelemetryRecord
{
ExitCode = command.ExitCode,
StdErr = command.StdErr,
};

this.logger.LogDebug("PipReport: Pip installation report error: {StdErr}", command.StdErr);
throw new InvalidOperationException($"PipReport: Failed to generate pip installation report for file {path} with exit code {command.ExitCode}");
}

var reportOutput = await this.fileUtilityService.ReadAllTextAsync(reportFile);
return (JsonConvert.DeserializeObject<PipInstallationReport>(reportOutput), reportFile);
}
else if (command.ExitCode != 0)
finally
{
using var failureRecord = new PipReportFailureTelemetryRecord
if (duplicateFile != null && duplicateFile.Exists)
{
ExitCode = command.ExitCode,
StdErr = command.StdErr,
};

this.logger.LogDebug("PipReport: Pip installation report error: {StdErr}", command.StdErr);
throw new InvalidOperationException($"PipReport: Failed to generate pip installation report for file {path} with exit code {command.ExitCode}");
duplicateFile.Delete();
}
}

var reportOutput = await this.fileUtilityService.ReadAllTextAsync(reportFile);
return (JsonConvert.DeserializeObject<PipInstallationReport>(reportOutput), reportFile);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private enum PipReportOverrideBehavior

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 6;
public override int Version { get; } = 7;

protected override bool EnableParallelism { get; set; } = true;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
namespace Microsoft.ComponentDetection.Common.Tests;

using System.IO;
using FluentAssertions;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.VisualStudio.TestTools.UnitTesting;

[TestClass]
[TestCategory("Governance/All")]
[TestCategory("Governance/ComponentDetection")]
public class FileUtilityServiceTests
{
private readonly IFileUtilityService fileUtilityService;

public FileUtilityServiceTests() =>
this.fileUtilityService = new FileUtilityService();

[TestMethod]
public void DuplicateFileWithoutLines_WithLinesToRemove_ShouldCreateDuplicateFileWithoutLines()
{
// Get directory of current executing assembly
var directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);

// Arrange
var fileName = $"{directory}/Resources/test-file-duplicate.txt";
var removalIndicator = "//REMOVE";
var expectedDuplicateFilePath = Path.Combine(directory, "Resources", "temp.test-file-duplicate.txt");

// Act
var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(fileName, removalIndicator);

// Assert
createdDuplicate.Should().BeTrue();
duplicateFilePath.Should().Be(expectedDuplicateFilePath);
File.Exists(expectedDuplicateFilePath).Should().BeTrue();
}

[TestMethod]
public void DuplicateFileWithoutLines_WithoutLinesToRemove_ShouldNotCreateDuplicateFile()
{
// Get directory of current executing assembly
var directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);

// Arrange
var fileName = $"{directory}/Resources/test-file-duplicate.txt";
var removalIndicator = "//NOTEXIST";

// Act
var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(fileName, removalIndicator);

// Assert
createdDuplicate.Should().BeFalse();
duplicateFilePath.Should().BeNull();
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<ItemGroup Label="Project References">
<ProjectReference Include="..\Microsoft.ComponentDetection.TestsUtilities\Microsoft.ComponentDetection.TestsUtilities.csproj" />
Expand All @@ -13,4 +13,10 @@
<PackageReference Include="System.Threading.Tasks.Dataflow" />
</ItemGroup>

<ItemGroup>
<None Update="Resources\test-file-duplicate.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hello
//REMOVE
world
Loading

0 comments on commit edf0c8d

Please sign in to comment.