Skip to content

Commit

Permalink
Fix sparse checkout and set list of service directories based upon th…
Browse files Browse the repository at this point in the history
…e project list (#29151)

* Fix sparse checkout and set list of service directories based upon the project list

* add Generate-ServiceDirectories-From-Project-List.ps1 script

* remove set-pipeline-variable command line argument

* remove set-pipeline-variable command line argument part 2

* For want of -Compress, the kingdom fell

* Update for review feedback
  • Loading branch information
JimSuplizio authored Jun 2, 2022
1 parent 0bd6adf commit a5a1f9a
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 41 deletions.
12 changes: 7 additions & 5 deletions eng/pipelines/code-quality-reports.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ jobs:
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
Paths:
# Checkout Core as it triggers building all SDKs that use code-quality-reports
- 'sdk/core'
- '**/*.xml'
- '!sdk/**/test-recordings'
- '!sdk/**/session-records'
Expand Down Expand Up @@ -49,17 +47,21 @@ jobs:
safeName: azurecore
JobType: 'linting'

# The only time generate_from_source_pom.py should be used to set the SparseCheckoutDirectories
# is for FromSource runs or, in the case of code quality reports, a run that needs to build
# everything using the latest source. It'll greedily set any service directories as it figures
# out what libraries, their dependents and so on, that need to be in ClientFromSourcePom.xml
- task: PythonScript@0
displayName: 'Generate directories variable for sparse checkout'
displayName: 'Generate FromSource POM and directories for sparse checkout'
inputs:
scriptPath: 'eng/scripts/generate_from_source_pom.py'
arguments: '--set-pipeline-variable CheckoutDirectories --set-skip-linting-projects SkipLintingProjects --project-list $(ProjectList)'
arguments: '--set-skip-linting-projects SkipLintingProjects --project-list $(ProjectList)'
workingDirectory: '$(System.DefaultWorkingDirectory)'

- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
SkipDefaultCheckout: true
Paths: $(CheckoutDirectories)
Paths: $(SparseCheckoutDirectories)

- task: PowerShell@2
inputs:
Expand Down
33 changes: 18 additions & 15 deletions eng/pipelines/templates/jobs/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ jobs:
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
Paths:
- 'sdk/${{ parameters.ServiceDirectory }}'
- '**/*.xml'
- '**/*.md'
- '!sdk/**/test-recordings'
Expand All @@ -94,25 +93,27 @@ jobs:
inputs:
versionSpec: '3.6'

- ${{ parameters.PreBuildSteps }}

# This step needs to run before we update to using dev versions otherwise dependency
# check in the script will not fully work because the versions will mismatch
- task: PythonScript@0
displayName: 'Generate directories variable for sparse checkout'
- task: PowerShell@2
displayName: Generate directories variable for sparse checkout
inputs:
scriptPath: 'eng/scripts/generate_from_source_pom.py'
arguments: '--set-pipeline-variable CheckoutDirectories --project-list $(ProjectList)'
workingDirectory: '$(System.DefaultWorkingDirectory)'
pwsh: true
filePath: $(Build.SourcesDirectory)/eng/scripts/Generate-ServiceDirectories-From-Project-List.ps1
arguments: >
-SourcesDirectory $(Build.SourcesDirectory)
-ProjectList $(ProjectList)
# Skip sparse checkout for the `azure-sdk-for-<lang>-pr` private mirrored repositories
# as we require the github service connection to be loaded.
- ${{ if not(contains(variables['Build.DefinitionName'], 'java-pr')) }}:
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
Paths: $(CheckoutDirectories)
Paths: $(SparseCheckoutDirectories)
SkipDefaultCheckout: true

- ${{ parameters.PreBuildSteps }}

- script: |
echo "##vso[build.addbuildtag]Scheduled"
displayName: 'Tag scheduled builds'
Expand Down Expand Up @@ -278,16 +279,18 @@ jobs:
AdditionalModules: ${{ parameters.AdditionalModules }}
JobType: 'Analyze'

- task: PythonScript@0
displayName: 'Generate directories variable for sparse checkout'
- task: PowerShell@2
displayName: Generate directories variable for sparse checkout
inputs:
scriptPath: 'eng/scripts/generate_from_source_pom.py'
arguments: '--set-pipeline-variable CheckoutDirectories --project-list $(ProjectList)'
workingDirectory: '$(System.DefaultWorkingDirectory)'
pwsh: true
filePath: $(Build.SourcesDirectory)/eng/scripts/Generate-ServiceDirectories-From-Project-List.ps1
arguments: >
-SourcesDirectory $(Build.SourcesDirectory)
-ProjectList $(ProjectList)
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
Paths: $(CheckoutDirectories)
Paths: $(SparseCheckoutDirectories)
SkipDefaultCheckout: true

- template: /eng/common/pipelines/templates/steps/set-test-pipeline-version.yml
Expand Down
23 changes: 19 additions & 4 deletions eng/pipelines/templates/steps/initialize-test-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ steps:
parameters:
Paths:
- '**/*.xml'
- 'sdk/${{ parameters.ServiceDirectory }}'
- ${{ if not(parameters.CheckoutRecordings) }}:
- '!sdk/**/test-recordings'
- '!sdk/**/session-records'
Expand Down Expand Up @@ -53,14 +52,30 @@ steps:
AdditionalModules: ${{ parameters.AdditionalModules }}
JobType: 'test'

# This call is used generate the sparse checkout variables for non-FromSource runs.
- task: PowerShell@2
displayName: Generate directories variable for sparse checkout
inputs:
pwsh: true
filePath: $(Build.SourcesDirectory)/eng/scripts/Generate-ServiceDirectories-From-Project-List.ps1
arguments: >
-SourcesDirectory $(Build.SourcesDirectory)
-ProjectList $(ProjectList)
condition: and(succeeded(), ne(variables['TestFromSource'], 'true'))

# The only time generate_from_source_pom.py should be used to set the SparseCheckoutDirectories
# is for FromSource runs or, in the case of code quality reports, a run that needs to build
# everything using the latest source. It'll greedily set any service directories as it figures
# out what libraries, their dependents and so on, that need to be in ClientFromSourcePom.xml
- task: PythonScript@0
displayName: 'Generate directories variable for sparse checkout'
displayName: 'Generate FromSource POM and directories for sparse checkout'
inputs:
scriptPath: 'eng/scripts/generate_from_source_pom.py'
arguments: '--set-pipeline-variable CheckoutDirectories --project-list $(ProjectList)'
arguments: '--project-list $(ProjectList)'
workingDirectory: '$(System.DefaultWorkingDirectory)'
condition: and(succeeded(), eq(variables['TestFromSource'], 'true'))

- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
SkipDefaultCheckout: true
Paths: $(CheckoutDirectories)
Paths: $(SparseCheckoutDirectories)
116 changes: 116 additions & 0 deletions eng/scripts/Generate-ServiceDirectories-From-Project-List.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# Use case:
# Given a project list, set following environment variables in JSON format:
# 1. SparseCheckoutDirectories - This the list of sparse checkout paths that will be used by sparse-checkout.yml
# 2. ServiceDirectories - A list of ServiceDirectories.
#
# The project list should be generated by a call to generate-project-list-and-cache-maven-repository.yml
# prior to calling this script.
param(
# $(Build.SourcesDirectory) - root of the repository
[Parameter(Mandatory=$true)][string]$SourcesDirectory,
# ArtifactsList will be using ('${{ convertToJson(parameters.Artifacts) }}' | ConvertFrom-Json | Select-Object name, groupId)
[Parameter(Mandatory=$true)][array] $ProjectList
)

$StartTime = $(get-date)
. "${PSScriptRoot}/../common/scripts/common.ps1"
$Path = Resolve-Path ($PSScriptRoot + "/../../")
$script:FoundError = $false

function Build-Unreleased-List-From-File {
param([string]$versionFile)
$unreleasedList = @()
foreach($line in Get-Content $versionFile) {
if (!$line -or $line.Trim() -eq '' -or $line.StartsWith("#") -or !$line.StartsWith("unreleased_")) {
continue
} else {
$split = $line.Split(";")
if ($split.Length -ne 2) {
LogError("Malformed unreleased_ dependency line in $versionFile. line=$line")
# If there's a malformed unreleased_ dependency line in version_client.txt then
# immediately exit. This is the only place we need to immediately exit.
exit(1)
}
$temp = $split[0].Replace("unreleased_","")
$unreleasedList += $temp
}
}
return ,$unreleasedList
}

$unreleasedList = @()
# version_client is the only version file that will have unreleased_ dependencies as these
# are disallowed for data track libraries.
$unreleasedList = Build-Unreleased-List-From-File $Path\eng\versioning\version_client.txt

$sparseCheckoutDirHash = @{}
$serviceDirHash = @{}
foreach($file in Get-ChildItem -Path $SourcesDirectory -Filter pom*.xml -Recurse -Depth 3 -File) {
$xmlPomFile = $null
$xmlPomFile = New-Object xml
$xmlPomFile.Load($file.FullName)
$library = $xmlPomFile.project.groupId + ":" + $xmlPomFile.project.artifactId
$serviceDirectory = (Get-Item $file).Directory.Parent
# This if check is only necessary because resourcemanager and resourcemanagerhybrid contain the
# exact same group/artifact ids
if ($file.FullName.Split([IO.Path]::DirectorySeparatorChar) -notcontains "resourcemanagerhybrid") {
# The directories for sparse checkout and ServiceDirectories need the $SourcesDirectory
# stripped off
$tempDir = $serviceDirectory.FullName.Replace("$SourcesDirectory", "")
$tempDir = $tempDir.Replace([IO.Path]::DirectorySeparatorChar, '/')
$sparseCheckoutDirHash.Add($library, $tempDir)
if ($tempDir.StartsWith("/sdk/")) {
# Strip off the "/sdk/" to get the service directory. A ServiceDirectory will always have
# the format of /SDK/<ServiceDirectory>. Whereas sparse checkout can have other directories
# outside of /SDK, eg. /common
$tempDir = $tempDir.Replace("/sdk/", "")
$serviceDirHash.Add($library, $tempDir)
}
}
}

$sparseCheckoutDirectories = @()
$serviceDirectories = @()
$sparseCheckoutDirectories += "/sdk/parents"
foreach ($project in $ProjectList) {
if ($sparseCheckoutDirHash.ContainsKey($project)) {
$sparseCheckoutDirectories += $sparseCheckoutDirHash[$project]
$serviceDirectories += $serviceDirHash[$project]
} else {
LogError("Did not find $project in any pom files")
$script:FoundError = $true
}
}
# Unreleased_ libraries are special. They're the only case, outside of FromSource runs where
# libraries from other service directories, outside of the one we're building, need to get built.
# It's easier to just add any unreleased_ dependency service directories to the sparse checkout
# and service directory variables. The alternative requires a lot of specific pom file and
# dependent pom file processing which is messy. The result here is that we may have an extra
# service directory being sunk up when it wasn't necessary. The overall time of that sync
# is a measured in seconds which isn't expensive and far outweighs the alternative mentioned
# above.
foreach ($project in $unreleasedList) {
if ($sparseCheckoutDirHash.ContainsKey($project)) {
$sparseCheckoutDirectories += $sparseCheckoutDirHash[$project]
} else {
LogError("Did not find unreleased $project in any pom files")
$script:FoundError = $true
}
}

if ($script:FoundError) {
exit 1
}

$temp = ConvertTo-Json @($sparseCheckoutDirectories | Sort-Object | Get-Unique) -Compress
Write-Host "setting env variable SparseCheckoutDirectories = $temp"
Write-Host "##vso[task.setvariable variable=SparseCheckoutDirectories;]$temp"
$temp = ConvertTo-Json @($serviceDirectories | Sort-Object | Get-Unique) -Compress
Write-Host "setting env variable ServiceDirectories = $temp"
Write-Host "##vso[task.setvariable variable=ServiceDirectories;]$temp"
$ElapsedTime = $(get-date) - $StartTime
$TotalRunTime = "{0:HH:mm:ss}" -f ([datetime]$ElapsedTime.Ticks)
Write-Host "Total run time=$($TotalRunTime)"
55 changes: 38 additions & 17 deletions eng/scripts/generate_from_source_pom.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
# Flags
# --project-list/--pl: List of project included in the From Source run.
#
# Output:
# 1. ClientFromSourcePom.xml which is the aggregate pom required by the From Source run
# Set following environment variables in JSON format:
# 2. SparseCheckoutDirectories - This the list of sparse checkout paths that will be used by sparse-checkout.yml
# 3. ServiceDirectories - A list of ServiceDirectories.
#
# For example: To create an aggregate POM for Azure Storage
# python eng/scripts/generate_from_source_pom.py --pl com.azure:azure-storage-blob,com.azure:azure-storage-common,...
#
Expand Down Expand Up @@ -91,7 +97,7 @@ def matches_version(self, version: str, match_any_version: bool = False):
maven_xml_namespace = '{http://maven.apache.org/POM/4.0.0}'

# Function that creates the aggregate POM.
def create_from_source_pom(project_list: str, set_pipeline_variable: str, set_skip_linting_projects: str, match_any_version: bool):
def create_from_source_pom(project_list: str, set_skip_linting_projects: str, match_any_version: bool):
project_list_identifiers = project_list.split(',')

# Get the artifact identifiers from client_versions.txt to act as our source of truth.
Expand Down Expand Up @@ -129,20 +135,36 @@ def create_from_source_pom(project_list: str, set_pipeline_variable: str, set_sk

fromSourcePom.write(pom_file_end)

if set_pipeline_variable:
# The directory_path is too granular. There are build rules for some libraries that
# create empty sources/javadocs jars using the README.md. Not every library
# has a README.md and, in these cases, it uses the README.md from the root service
# directory. This will also trim the number of paths down considerably.
service_directories: Set[str] = set()
for p in source_projects:
# get the service directory, which is one level up from the library's directory
service_directory = '/'.join(p.directory_path.split('/')[0:-1])
service_directories.add(service_directory)

checkout_paths = list(sorted(service_directories))
print('setting env variable {} = {}'.format(set_pipeline_variable, checkout_paths))
print('##vso[task.setvariable variable={};]{}'.format(set_pipeline_variable, json.dumps(checkout_paths)))
# The directory_path is too granular. There are build rules for some libraries that
# create empty sources/javadocs jars using the README.md. Not every library
# has a README.md and, in these cases, it uses the README.md from the root service
# directory. This will also trim the number of paths down considerably.
sparse_checkout_directories: Set[str] = set()
service_directories: Set[str] = set()
sdk_string = "/sdk/"
for p in source_projects:
# get the service directory, which is one level up from the library's directory
sparse_checkout_directory = '/'.join(p.directory_path.split('/')[0:-1])
sparse_checkout_directories.add(sparse_checkout_directory)
# The ServiceDirectories list should only ever contain the list of service
# directories for the project list and nothing else.
if p.identifier in project_list_identifiers:
# Sparse checkout directories can contain directories that aren't service directories.
# (aka. /common). Any service directory will start with "/sdk/", everything else is
# would be attributed to supporting libraries (ex. perf-test-core).
if sdk_string in sparse_checkout_directory:
service_directory = sparse_checkout_directory.replace(sdk_string, "")
service_directories.add(service_directory)

# output the SparseCheckoutDirectories environment variable
sparse_checkout_paths = list(sorted(sparse_checkout_directories))
print('setting env variable SparseCheckoutDirectories = {}'.format(sparse_checkout_paths))
print('##vso[task.setvariable variable=SparseCheckoutDirectories;]{}'.format(json.dumps(sparse_checkout_paths)))

# output the ServiceDirectories environment variable
service_dirs = list(sorted(service_directories))
print('setting env variable ServiceDirectories = {}'.format(service_dirs))
print('##vso[task.setvariable variable=ServiceDirectories;]{}'.format(json.dumps(service_dirs)))

# Sets the DevOps variable that is used to skip certain projects during linting validation.
if set_skip_linting_projects:
Expand Down Expand Up @@ -331,14 +353,13 @@ def project_uses_client_parent(project: Project, projects: Dict[str, Project]) -
def main():
parser = argparse.ArgumentParser(description='Generated an aggregate POM for a From Source run.')
parser.add_argument('--project-list', '--pl', type=str)
parser.add_argument('--set-pipeline-variable', type=str)
parser.add_argument('--set-skip-linting-projects', type=str)
parser.add_argument('--match-any-version', action='store_true')
args = parser.parse_args()
if args.project_list == None:
raise ValueError('Missing project list.')
start_time = time.time()
create_from_source_pom(args.project_list, args.set_pipeline_variable, args.set_skip_linting_projects, args.match_any_version)
create_from_source_pom(args.project_list, args.set_skip_linting_projects, args.match_any_version)
elapsed_time = time.time() - start_time

print('Effective From Source POM File')
Expand Down

0 comments on commit a5a1f9a

Please sign in to comment.