-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix sparse checkout and set list of service directories based upon the project list #29151
Changes from all commits
cdd4837
40ab4ab
c37fe77
f481796
f0d2e5f
f863eb7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,7 +74,6 @@ jobs: | |
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml | ||
parameters: | ||
Paths: | ||
- 'sdk/${{ parameters.ServiceDirectory }}' | ||
- '**/*.xml' | ||
- '**/*.md' | ||
- '!sdk/**/test-recordings' | ||
|
@@ -91,25 +90,27 @@ jobs: | |
inputs: | ||
versionSpec: '3.6' | ||
|
||
- ${{ parameters.PreBuildSteps }} | ||
|
||
# This step needs to run before we update to using dev versions otherwise dependency | ||
# check in the script will not fully work because the versions will mismatch | ||
- task: PythonScript@0 | ||
displayName: 'Generate directories variable for sparse checkout' | ||
- task: PowerShell@2 | ||
displayName: Generate directories variable for sparse checkout | ||
inputs: | ||
scriptPath: 'eng/scripts/generate_from_source_pom.py' | ||
arguments: '--set-pipeline-variable CheckoutDirectories --project-list $(ProjectList)' | ||
workingDirectory: '$(System.DefaultWorkingDirectory)' | ||
pwsh: true | ||
filePath: $(Build.SourcesDirectory)/eng/scripts/Generate-ServiceDirectories-From-Project-List.ps1 | ||
arguments: > | ||
-SourcesDirectory $(Build.SourcesDirectory) | ||
-ProjectList $(ProjectList) | ||
|
||
# Skip sparse checkout for the `azure-sdk-for-<lang>-pr` private mirrored repositories | ||
# as we require the github service connection to be loaded. | ||
- ${{ if not(contains(variables['Build.DefinitionName'], 'java-pr')) }}: | ||
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml | ||
parameters: | ||
Paths: $(CheckoutDirectories) | ||
Paths: $(SparseCheckoutDirectories) | ||
SkipDefaultCheckout: true | ||
|
||
- ${{ parameters.PreBuildSteps }} | ||
|
||
- script: | | ||
echo "##vso[build.addbuildtag]Scheduled" | ||
displayName: 'Tag scheduled builds' | ||
|
@@ -275,16 +276,18 @@ jobs: | |
AdditionalModules: ${{ parameters.AdditionalModules }} | ||
JobType: 'Analyze' | ||
|
||
- task: PythonScript@0 | ||
displayName: 'Generate directories variable for sparse checkout' | ||
- task: PowerShell@2 | ||
displayName: Generate directories variable for sparse checkout | ||
inputs: | ||
scriptPath: 'eng/scripts/generate_from_source_pom.py' | ||
arguments: '--set-pipeline-variable CheckoutDirectories --project-list $(ProjectList)' | ||
workingDirectory: '$(System.DefaultWorkingDirectory)' | ||
pwsh: true | ||
filePath: $(Build.SourcesDirectory)/eng/scripts/Generate-ServiceDirectories-From-Project-List.ps1 | ||
arguments: > | ||
-SourcesDirectory $(Build.SourcesDirectory) | ||
-ProjectList $(ProjectList) | ||
|
||
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml | ||
parameters: | ||
Paths: $(CheckoutDirectories) | ||
Paths: $(SparseCheckoutDirectories) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I was trying to figure out where this is set and until I reviewed the script it wasn't clear that is what set it. In other cases we have made that a parameter into the script so things were a little more connected. It looks like you are setting 2 variables which is probably why you opted not to do that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously they were passing this into the python script and calling it prior to every sparse checkout that wasn't the default. There's no real point in passing in an environment variable to set if it's always being set in multiple places. Further, I'd added a second variable for ServiceDirectires which isn't yet being used but will be so I saw no point in passing in variables that were being expected. |
||
SkipDefaultCheckout: true | ||
|
||
- template: /eng/common/pipelines/templates/steps/set-test-pipeline-version.yml | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
# Use case: | ||
# Given a project list, set following environment variables in JSON format: | ||
# 1. SparseCheckoutDirectories - This the list of sparse checkout paths that will be used by sparse-checkout.yml | ||
# 2. ServiceDirectories - A list of ServiceDirectories. | ||
# | ||
# The project list should be generated by a call to generate-project-list-and-cache-maven-repository.yml | ||
# prior to calling this script. | ||
param( | ||
# $(Build.SourcesDirectory) - root of the repository | ||
[Parameter(Mandatory=$true)][string]$SourcesDirectory, | ||
# ArtifactsList will be using ('${{ convertToJson(parameters.Artifacts) }}' | ConvertFrom-Json | Select-Object name, groupId) | ||
[Parameter(Mandatory=$true)][array] $ProjectList | ||
) | ||
|
||
$StartTime = $(get-date) | ||
. "${PSScriptRoot}/../common/scripts/common.ps1" | ||
$Path = Resolve-Path ($PSScriptRoot + "/../../") | ||
$script:FoundError = $false | ||
|
||
function Build-Unreleased-List-From-File { | ||
param([string]$versionFile) | ||
$unreleasedList = @() | ||
foreach($line in Get-Content $versionFile) { | ||
if (!$line -or $line.Trim() -eq '' -or $line.StartsWith("#") -or !$line.StartsWith("unreleased_")) { | ||
continue | ||
} else { | ||
$split = $line.Split(";") | ||
if ($split.Length -ne 2) { | ||
LogError("Malformed unreleased_ dependency line in $versionFile. line=$line") | ||
# If there's a malformed unreleased_ dependency line in version_client.txt then | ||
# immediately exit. This is the only place we need to immediately exit. | ||
exit(1) | ||
JimSuplizio marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
$temp = $split[0].Replace("unreleased_","") | ||
$unreleasedList += $temp | ||
} | ||
} | ||
return ,$unreleasedList | ||
} | ||
|
||
$unreleasedList = @() | ||
# version_client is the only version file that will have unreleased_ dependencies as these | ||
# are disallowed for data track libraries. | ||
$unreleasedList = Build-Unreleased-List-From-File $Path\eng\versioning\version_client.txt | ||
|
||
$sparseCheckoutDirHash = @{} | ||
$serviceDirHash = @{} | ||
foreach($file in Get-ChildItem -Path $SourcesDirectory -Filter pom*.xml -Recurse -Depth 3 -File) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should consider using Get-java-PackageInfoFromRepo from language-settings.ps1 to try and start using a common function for gathering properties for the projects. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @weshaggard, we'd have a chicken and egg there. That function requires the service directory. The common functions, for Java anyways, are far less common than one would think. Things under common aren't really common outside of the fact that they're specifically being used for a targeted piece of processing (like docs updating or .MD file verifications) and are mostly not useful outside of the context in which they were initially written. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that function should work without a servicedirectory as well. Mostly I was wanting to try and have a common function we can use to parse and produce the properties. |
||
$xmlPomFile = $null | ||
$xmlPomFile = New-Object xml | ||
$xmlPomFile.Load($file.FullName) | ||
$library = $xmlPomFile.project.groupId + ":" + $xmlPomFile.project.artifactId | ||
$serviceDirectory = (Get-Item $file).Directory.Parent | ||
# This if check is only necessary because resourcemanager and resourcemanagerhybrid contain the | ||
# exact same group/artifact ids | ||
if ($file.FullName.Split([IO.Path]::DirectorySeparatorChar) -notcontains "resourcemanagerhybrid") { | ||
benbp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# The directories for sparse checkout and ServiceDirectories need the $SourcesDirectory | ||
# stripped off | ||
$tempDir = $serviceDirectory.FullName.Replace("$SourcesDirectory", "") | ||
$tempDir = $tempDir.Replace([IO.Path]::DirectorySeparatorChar, '/') | ||
$sparseCheckoutDirHash.Add($library, $tempDir) | ||
if ($tempDir.StartsWith("/sdk/")) { | ||
# Strip off the "/sdk/" to get the service directory. A ServiceDirectory will always have | ||
# the format of /SDK/<ServiceDirectory>. Whereas sparse checkout can have other directories | ||
# outside of /SDK, eg. /common | ||
$tempDir = $tempDir.Replace("/sdk/", "") | ||
JimSuplizio marked this conversation as resolved.
Show resolved
Hide resolved
|
||
$serviceDirHash.Add($library, $tempDir) | ||
} | ||
} | ||
} | ||
|
||
$sparseCheckoutDirectories = @() | ||
$serviceDirectories = @() | ||
$sparseCheckoutDirectories += "/sdk/parents" | ||
foreach ($project in $ProjectList) { | ||
if ($sparseCheckoutDirHash.ContainsKey($project)) { | ||
$sparseCheckoutDirectories += $sparseCheckoutDirHash[$project] | ||
$serviceDirectories += $serviceDirHash[$project] | ||
} else { | ||
LogError("Did not find $project in any pom files") | ||
$script:FoundError = $true | ||
} | ||
} | ||
# Unreleased_ libraries are special. They're the only case, outside of FromSource runs where | ||
# libraries from other service directories, outside of the one we're building, need to get built. | ||
# It's easier to just add any unreleased_ dependency service directories to the sparse checkout | ||
# and service directory variables. The alternative requires a lot of specific pom file and | ||
# dependent pom file processing which is messy. The result here is that we may have an extra | ||
# service directory being sunk up when it wasn't necessary. The overall time of that sync | ||
# is a measured in seconds which isn't expensive and far outweighs the alternative mentioned | ||
# above. | ||
foreach ($project in $unreleasedList) { | ||
if ($sparseCheckoutDirHash.ContainsKey($project)) { | ||
$sparseCheckoutDirectories += $sparseCheckoutDirHash[$project] | ||
} else { | ||
LogError("Did not find unreleased $project in any pom files") | ||
$script:FoundError = $true | ||
} | ||
} | ||
|
||
if ($script:FoundError) { | ||
exit 1 | ||
} | ||
|
||
$temp = ConvertTo-Json @($sparseCheckoutDirectories | Sort-Object | Get-Unique) -Compress | ||
Write-Host "setting env variable SparseCheckoutDirectories = $temp" | ||
Write-Host "##vso[task.setvariable variable=SparseCheckoutDirectories;]$temp" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what the limit is but I suspect there is a limit on the size of a DevOps variable or a Env variable. So might be something we need to think about. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @weshaggard - Previously the sparse checkout was granular down to the artifact's directory in some cases there were 120+ items added to the sparse checkout (like core's FromSource run for example). I changed things to just simply do the service directory which turned 120+ into like 30. If there's a limit it would have been hit long before I'd made my changes which dramatically trimmed things down. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't find the docs for it right now, but I remember the limit being large enough that it hasn't been a concern thus far. |
||
$temp = ConvertTo-Json @($serviceDirectories | Sort-Object | Get-Unique) -Compress | ||
Write-Host "setting env variable ServiceDirectories = $temp" | ||
Write-Host "##vso[task.setvariable variable=ServiceDirectories;]$temp" | ||
$ElapsedTime = $(get-date) - $StartTime | ||
$TotalRunTime = "{0:HH:mm:ss}" -f ([datetime]$ElapsedTime.Ticks) | ||
Write-Host "Total run time=$($TotalRunTime)" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,12 @@ | |
# Flags | ||
# --project-list/--pl: List of project included in the From Source run. | ||
# | ||
# Output: | ||
# 1. ClientFromSourcePom.xml which is the aggregate pom required by the From Source run | ||
# Set following environment variables in JSON format: | ||
# 2. SparseCheckoutDirectories - This the list of sparse checkout paths that will be used by sparse-checkout.yml | ||
# 3. ServiceDirectories - A list of ServiceDirectories. | ||
# | ||
# For example: To create an aggregate POM for Azure Storage | ||
# python eng/scripts/generate_from_source_pom.py --pl com.azure:azure-storage-blob,com.azure:azure-storage-common,... | ||
# | ||
|
@@ -91,7 +97,7 @@ def matches_version(self, version: str, match_any_version: bool = False): | |
maven_xml_namespace = '{http://maven.apache.org/POM/4.0.0}' | ||
|
||
# Function that creates the aggregate POM. | ||
def create_from_source_pom(project_list: str, set_pipeline_variable: str, set_skip_linting_projects: str, match_any_version: bool): | ||
def create_from_source_pom(project_list: str, set_skip_linting_projects: str, match_any_version: bool): | ||
project_list_identifiers = project_list.split(',') | ||
|
||
# Get the artifact identifiers from client_versions.txt to act as our source of truth. | ||
|
@@ -129,20 +135,36 @@ def create_from_source_pom(project_list: str, set_pipeline_variable: str, set_sk | |
|
||
fromSourcePom.write(pom_file_end) | ||
|
||
if set_pipeline_variable: | ||
# The directory_path is too granular. There are build rules for some libraries that | ||
# create empty sources/javadocs jars using the README.md. Not every library | ||
# has a README.md and, in these cases, it uses the README.md from the root service | ||
# directory. This will also trim the number of paths down considerably. | ||
service_directories: Set[str] = set() | ||
for p in source_projects: | ||
# get the service directory, which is one level up from the library's directory | ||
service_directory = '/'.join(p.directory_path.split('/')[0:-1]) | ||
service_directories.add(service_directory) | ||
|
||
checkout_paths = list(sorted(service_directories)) | ||
print('setting env variable {} = {}'.format(set_pipeline_variable, checkout_paths)) | ||
print('##vso[task.setvariable variable={};]{}'.format(set_pipeline_variable, json.dumps(checkout_paths))) | ||
# The directory_path is too granular. There are build rules for some libraries that | ||
# create empty sources/javadocs jars using the README.md. Not every library | ||
# has a README.md and, in these cases, it uses the README.md from the root service | ||
# directory. This will also trim the number of paths down considerably. | ||
sparse_checkout_directories: Set[str] = set() | ||
service_directories: Set[str] = set() | ||
sdk_string = "/sdk/" | ||
for p in source_projects: | ||
# get the service directory, which is one level up from the library's directory | ||
sparse_checkout_directory = '/'.join(p.directory_path.split('/')[0:-1]) | ||
sparse_checkout_directories.add(sparse_checkout_directory) | ||
# The ServiceDirectories list should only ever contain the list of service | ||
# directories for the project list and nothing else. | ||
if p.identifier in project_list_identifiers: | ||
# Sparse checkout directories can contain directories that aren't service directories. | ||
# (aka. /common). Any service directory will start with "/sdk/", everything else is | ||
# would be attributed to supporting libraries (ex. perf-test-core). | ||
if sdk_string in sparse_checkout_directory: | ||
service_directory = sparse_checkout_directory.replace(sdk_string, "") | ||
service_directories.add(service_directory) | ||
|
||
# output the SparseCheckoutDirectories environment variable | ||
sparse_checkout_paths = list(sorted(sparse_checkout_directories)) | ||
print('setting env variable SparseCheckoutDirectories = {}'.format(sparse_checkout_paths)) | ||
print('##vso[task.setvariable variable=SparseCheckoutDirectories;]{}'.format(json.dumps(sparse_checkout_paths))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any good reason to do this in both scripts as opposed to just running both scripts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The scripts generate dramatically different lists for the same list of artifacts. The generate_from_source_pom produces a list of sparse checkout directories that's necessary for the FromSource run which includes the service directory for the list and every service directory that has a dependency on any project in the project list. It also generates the ClientFromSourcePom.xml which is only needed in two places. Generate-ServiceDirectories-From-Project-List.ps1 only generates the list of service directories for the project list and nothing more. |
||
|
||
# output the ServiceDirectories environment variable | ||
service_dirs = list(sorted(service_directories)) | ||
print('setting env variable ServiceDirectories = {}'.format(service_dirs)) | ||
print('##vso[task.setvariable variable=ServiceDirectories;]{}'.format(json.dumps(service_dirs))) | ||
|
||
# Sets the DevOps variable that is used to skip certain projects during linting validation. | ||
if set_skip_linting_projects: | ||
|
@@ -331,14 +353,13 @@ def project_uses_client_parent(project: Project, projects: Dict[str, Project]) - | |
def main(): | ||
parser = argparse.ArgumentParser(description='Generated an aggregate POM for a From Source run.') | ||
parser.add_argument('--project-list', '--pl', type=str) | ||
parser.add_argument('--set-pipeline-variable', type=str) | ||
parser.add_argument('--set-skip-linting-projects', type=str) | ||
parser.add_argument('--match-any-version', action='store_true') | ||
args = parser.parse_args() | ||
if args.project_list == None: | ||
raise ValueError('Missing project list.') | ||
start_time = time.time() | ||
create_from_source_pom(args.project_list, args.set_pipeline_variable, args.set_skip_linting_projects, args.match_any_version) | ||
create_from_source_pom(args.project_list, args.set_skip_linting_projects, args.match_any_version) | ||
elapsed_time = time.time() - start_time | ||
|
||
print('Effective From Source POM File') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you intent to move the PreBuildSteps after the sparse checkout?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was intentional, otherwise there's a chicken or egg problem with the pre build scripts not being checked out with the initial sparse checkout pass.