From c623783ca6b5c850c21b0319ab1d82f12d8e3d56 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 23 Feb 2023 13:44:39 +0100 Subject: [PATCH 01/63] update branch --- CHANGELOG.md | 2 + .../assets/schema_input.json | 3 +- .../pipeline-template/lib/NfcoreSchema.groovy | 528 ------------------ .../pipeline-template/lib/WorkflowMain.groovy | 43 -- .../modules/local/samplesheet_check.nf | 31 - nf_core/pipeline-template/nextflow.config | 4 + .../subworkflows/local/input_check.nf | 44 -- .../pipeline-template/workflows/pipeline.nf | 36 +- 8 files changed, 34 insertions(+), 657 deletions(-) delete mode 100755 nf_core/pipeline-template/lib/NfcoreSchema.groovy delete mode 100644 nf_core/pipeline-template/modules/local/samplesheet_check.nf delete mode 100644 nf_core/pipeline-template/subworkflows/local/input_check.nf diff --git a/CHANGELOG.md b/CHANGELOG.md index 67e6ada896..055a477714 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ - Turn on automatic clean up of intermediate files in `work/` on successful pipeline completion in full-test config ([#2163](https://github.com/nf-core/tools/pull/2163)) [Contributed by @jfy133] - Add documentation to `usage.md` on how to use `params.yml` files, based on nf-core/ampliseq text ([#2173](https://github.com/nf-core/tools/pull/2173/)) [Contributed by @jfy133, @d4straub] - Make jobs automatically resubmit for a much wider range of exit codes (now `104` and `130..145`) ([#2170](https://github.com/nf-core/tools/pull/2170)) +- Remove shcema validation from `lib` folder and use Nextflow nf-validation plugin instead ([#1771](https://github.com/nf-core/tools/pull/1771/)) +- Generate input channel from input file using Nextflow nf-validation plugin ([#1771](https://github.com/nf-core/tools/pull/1771/)) ### Linting diff --git a/nf_core/pipeline-template/assets/schema_input.json b/nf_core/pipeline-template/assets/schema_input.json index 509048bd8a..10329ebb63 100644 --- a/nf_core/pipeline-template/assets/schema_input.json +++ b/nf_core/pipeline-template/assets/schema_input.json @@ -10,7 +10,8 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", diff --git a/nf_core/pipeline-template/lib/NfcoreSchema.groovy b/nf_core/pipeline-template/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6e8d..0000000000 --- a/nf_core/pipeline-template/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/nf_core/pipeline-template/lib/WorkflowMain.groovy b/nf_core/pipeline-template/lib/WorkflowMain.groovy index 05db418b2d..ca9d9780e4 100755 --- a/nf_core/pipeline-template/lib/WorkflowMain.groovy +++ b/nf_core/pipeline-template/lib/WorkflowMain.groovy @@ -18,45 +18,10 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params, log) { - {% if igenomes -%} - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - {% else -%} - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --fasta reference.fa -profile docker" - {% endif -%} - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - // Print workflow version and exit on --version if (params.version) { String workflow_version = NfcoreTemplate.version(workflow) @@ -64,14 +29,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/nf_core/pipeline-template/modules/local/samplesheet_check.nf b/nf_core/pipeline-template/modules/local/samplesheet_check.nf deleted file mode 100644 index 5d25800775..0000000000 --- a/nf_core/pipeline-template/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in {{ name }}/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index cc591a54e5..1c11a8b715 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -58,6 +58,10 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + fail_unrecognised_params = false + lenient_mode = false + } // Load base.config by default for all pipelines diff --git a/nf_core/pipeline-template/subworkflows/local/input_check.nf b/nf_core/pipeline-template/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87fb7..0000000000 --- a/nf_core/pipeline-template/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf index 9bcc0086b5..abc79fa749 100644 --- a/nf_core/pipeline-template/workflows/pipeline.nf +++ b/nf_core/pipeline-template/workflows/pipeline.nf @@ -4,9 +4,29 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { validateParameters } from 'plugin/nf-validation' +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { validateAndConvertSamplesheet } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print help message if needed +if (params.help) { + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} // Validate input parameters +validateParameters() + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation + Workflow{{ short_name[0]|upper }}{{ short_name[1:] }}.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below @@ -14,9 +34,6 @@ Workflow{{ short_name[0]|upper }}{{ short_name[1:] }}.initialise(params, log) def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -37,7 +54,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -66,18 +82,18 @@ workflow {{ short_name|upper }} { ch_versions = Channel.empty() // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // Create input channel from input file // - INPUT_CHECK ( - ch_input + ch_input = Channel.validateAndConvertSamplesheet( + file(params.input, checkIfExists:true), + file("${projectDir}/assets/schema_input.json", checkIfExists:true) ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // // MODULE: Run FastQC // FASTQC ( - INPUT_CHECK.out.reads + ch_input ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) From c29f9eb6418714f209a8a0babbda4f444e6d7f73 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 23 Feb 2023 14:04:16 +0100 Subject: [PATCH 02/63] fix linting --- nf_core/lint/files_exist.py | 1 - nf_core/lint/files_unchanged.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index eb8c04916a..840d036827 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -52,7 +52,6 @@ def files_exist(self): docs/README.md docs/usage.md lib/nfcore_external_java_deps.jar - lib/NfcoreSchema.groovy lib/NfcoreTemplate.groovy lib/Utils.groovy lib/WorkflowMain.groovy diff --git a/nf_core/lint/files_unchanged.py b/nf_core/lint/files_unchanged.py index c0be64d0d7..2bbcb4f61e 100644 --- a/nf_core/lint/files_unchanged.py +++ b/nf_core/lint/files_unchanged.py @@ -40,7 +40,6 @@ def files_unchanged(self): docs/images/nf-core-PIPELINE_logo_dark.png docs/README.md' lib/nfcore_external_java_deps.jar - lib/NfcoreSchema.groovy lib/NfcoreTemplate.groovy ['LICENSE', 'LICENSE.md', 'LICENCE', 'LICENCE.md'], # NB: British / American spelling From 566bb582f14934197988249353cd0d0af2a09aed Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 23 Feb 2023 16:06:33 +0100 Subject: [PATCH 03/63] remove check_samplesheet.py --- .../bin/check_samplesheet.py | 262 ------------------ 1 file changed, 262 deletions(-) delete mode 100755 nf_core/pipeline-template/bin/check_samplesheet.py diff --git a/nf_core/pipeline-template/bin/check_samplesheet.py b/nf_core/pipeline-template/bin/check_samplesheet.py deleted file mode 100755 index 11b155723a..0000000000 --- a/nf_core/pipeline-template/bin/check_samplesheet.py +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) From 0ccb9bcc2571985df0b54c76672736a859136f0e Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 16 May 2023 13:51:14 +0200 Subject: [PATCH 04/63] remove NFcoreSchema.groovy from pipeline linting --- nf_core/lint/files_exist.py | 1 - nf_core/lint/files_unchanged.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nf_core/lint/files_exist.py b/nf_core/lint/files_exist.py index 840d036827..02baae7db8 100644 --- a/nf_core/lint/files_exist.py +++ b/nf_core/lint/files_exist.py @@ -160,7 +160,6 @@ def files_exist(self): [os.path.join("docs", "README.md")], [os.path.join("docs", "usage.md")], [os.path.join("lib", "nfcore_external_java_deps.jar")], - [os.path.join("lib", "NfcoreSchema.groovy")], [os.path.join("lib", "NfcoreTemplate.groovy")], [os.path.join("lib", "Utils.groovy")], [os.path.join("lib", "WorkflowMain.groovy")], diff --git a/nf_core/lint/files_unchanged.py b/nf_core/lint/files_unchanged.py index 2bbcb4f61e..2b64d62638 100644 --- a/nf_core/lint/files_unchanged.py +++ b/nf_core/lint/files_unchanged.py @@ -104,7 +104,6 @@ def files_unchanged(self): [os.path.join("docs", "images", f"nf-core-{short_name}_logo_dark.png")], [os.path.join("docs", "README.md")], [os.path.join("lib", "nfcore_external_java_deps.jar")], - [os.path.join("lib", "NfcoreSchema.groovy")], [os.path.join("lib", "NfcoreTemplate.groovy")], ] files_partial = [ From 8b4d0a599511e7ac097f0d886623a900996f12ec Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 16 May 2023 13:58:33 +0200 Subject: [PATCH 05/63] update validation params --- nf_core/pipeline-template/nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 48ae5111df..3ac14ea41f 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -59,8 +59,8 @@ params { max_time = '240.h' // Schema validation default options - fail_unrecognised_params = false - lenient_mode = false + failUnrecognisedParams = false + lenientMode = false } From 8c6621314b117502fb76b5e7539681328f3de7f9 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 22 May 2023 13:00:10 +0200 Subject: [PATCH 06/63] update plugin version --- nf_core/pipeline-template/nextflow.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 3ac14ea41f..5261b85570 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -183,6 +183,11 @@ profiles { docker.registry = 'quay.io' podman.registry = 'quay.io' +// Nextflow plugins +plugins { + id 'nf-validation@0.1.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + {% if igenomes %} // Load igenomes.config if required if (!params.igenomes_ignore) { From bc75e05fa5ac29b62a0ca3aae979564b13893e5e Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 23 May 2023 11:02:58 +0200 Subject: [PATCH 07/63] use fromSamplesheet() --- nf_core/pipeline-template/conf/modules.config | 8 -------- nf_core/pipeline-template/pyproject.toml | 2 +- nf_core/pipeline-template/workflows/pipeline.nf | 13 +++---------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/nf_core/pipeline-template/conf/modules.config b/nf_core/pipeline-template/conf/modules.config index da58a5d881..2cb3b13049 100644 --- a/nf_core/pipeline-template/conf/modules.config +++ b/nf_core/pipeline-template/conf/modules.config @@ -18,14 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: FASTQC { ext.args = '--quiet' } diff --git a/nf_core/pipeline-template/pyproject.toml b/nf_core/pipeline-template/pyproject.toml index 0d62beb6f9..bc01239b3e 100644 --- a/nf_core/pipeline-template/pyproject.toml +++ b/nf_core/pipeline-template/pyproject.toml @@ -1,4 +1,4 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Black. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. [tool.black] line-length = 120 diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf index abc79fa749..d63e2b719d 100644 --- a/nf_core/pipeline-template/workflows/pipeline.nf +++ b/nf_core/pipeline-template/workflows/pipeline.nf @@ -4,11 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters } from 'plugin/nf-validation' -include { paramsHelp } from 'plugin/nf-validation' -include { paramsSummaryLog } from 'plugin/nf-validation' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { validateAndConvertSamplesheet } from 'plugin/nf-validation' +include { validateParameters; paramsHelp paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -82,12 +78,9 @@ workflow {{ short_name|upper }} { ch_versions = Channel.empty() // - // Create input channel from input file + // Create input channel from input file provided through params.input // - ch_input = Channel.validateAndConvertSamplesheet( - file(params.input, checkIfExists:true), - file("${projectDir}/assets/schema_input.json", checkIfExists:true) - ) + ch_input = Channel.fromSamplesheet("input") // // MODULE: Run FastQC From cd9dadfcde8fe65d310e3494350a32c6ff1ebeb3 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 23 May 2023 14:52:18 +0200 Subject: [PATCH 08/63] refactoring --- nf_core/pipeline-template/nextflow.config | 4 ++-- nf_core/pipeline-template/workflows/pipeline.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 5261b85570..30c9a9d451 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -59,8 +59,8 @@ params { max_time = '240.h' // Schema validation default options - failUnrecognisedParams = false - lenientMode = false + validationFailUnrecognisedParams = false + validationLenientMode = false } diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf index d63e2b719d..c4ad82417e 100644 --- a/nf_core/pipeline-template/workflows/pipeline.nf +++ b/nf_core/pipeline-template/workflows/pipeline.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' +include { validateParameters; paramsHelp; paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' From fe6da9b55baa4d8fc2ef078b3682ec8ad6510aa8 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 23 May 2023 16:58:53 +0200 Subject: [PATCH 09/63] refactoring according to nf-validation params --- nf_core/lint/nextflow_config.py | 4 +--- nf_core/pipeline-template/nextflow.config | 7 ++++--- nf_core/pipeline-template/nextflow_schema.json | 2 +- nf_core/pipeline-template/workflows/pipeline.nf | 5 ++++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/nf_core/lint/nextflow_config.py b/nf_core/lint/nextflow_config.py index af018331f0..0b45b55b7c 100644 --- a/nf_core/lint/nextflow_config.py +++ b/nf_core/lint/nextflow_config.py @@ -62,7 +62,7 @@ def nextflow_config(self): * Should always be set to default value: ``https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}`` - * ``params.show_hidden_params`` + * ``params.validationShowHiddenParams`` * Determines whether boilerplate params are showed by schema. Set to ``false`` by default @@ -130,8 +130,6 @@ def nextflow_config(self): ["process.time"], ["params.outdir"], ["params.input"], - ["params.show_hidden_params"], - ["params.schema_ignore_params"], ] # Throw a warning if these are missing config_warn = [ diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 30c9a9d451..20b831163a 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -38,9 +38,7 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' + {% if nf_core_configs %} // Config options @@ -61,6 +59,9 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false + validate_params = true + schema_ignore_params = 'genomes' + validationShowHiddenParams = false } diff --git a/nf_core/pipeline-template/nextflow_schema.json b/nf_core/pipeline-template/nextflow_schema.json index 2743562d6c..2c204f386f 100644 --- a/nf_core/pipeline-template/nextflow_schema.json +++ b/nf_core/pipeline-template/nextflow_schema.json @@ -257,7 +257,7 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf index c4ad82417e..30e0c34c68 100644 --- a/nf_core/pipeline-template/workflows/pipeline.nf +++ b/nf_core/pipeline-template/workflows/pipeline.nf @@ -18,7 +18,10 @@ if (params.help) { } // Validate input parameters -validateParameters() +if (params.validate_params) { + validateParameters() +} + // Print parameter summary log to screen log.info logo + paramsSummaryLog(workflow) + citation From 6dec44c4327d3f03b6b4a807b5d2dc8cab68505b Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 25 May 2023 13:15:56 +0200 Subject: [PATCH 10/63] update nextflow_schema.json --- nf_core/pipeline-template/nextflow_schema.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nf_core/pipeline-template/nextflow_schema.json b/nf_core/pipeline-template/nextflow_schema.json index 2c204f386f..d3ad943a5d 100644 --- a/nf_core/pipeline-template/nextflow_schema.json +++ b/nf_core/pipeline-template/nextflow_schema.json @@ -263,6 +263,20 @@ "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } From 39ad411426e84149a5a77ffd73b55122659e12cf Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 25 May 2023 13:16:09 +0200 Subject: [PATCH 11/63] fix padding --- nf_core/pipeline-template/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 20b831163a..0aff59624b 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -186,7 +186,7 @@ podman.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation@0.1.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@0.1.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet } {% if igenomes %} From 9b08dd9cb239260b5e969c562da678ab40acf86a Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Fri, 26 May 2023 13:07:36 +0200 Subject: [PATCH 12/63] add link to nf-validation documentation --- nf_core/pipeline-template/workflows/pipeline.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf index 30e0c34c68..d55c375c2b 100644 --- a/nf_core/pipeline-template/workflows/pipeline.nf +++ b/nf_core/pipeline-template/workflows/pipeline.nf @@ -83,6 +83,7 @@ workflow {{ short_name|upper }} { // // Create input channel from input file provided through params.input // + // TODO: For more information on how to format your samplesheet and assets/schema_input.json, please refer to nf-validation plugin documentation https://nextflow-io.github.io/nf-validation/ ch_input = Channel.fromSamplesheet("input") // From fd5f963103080ba27d0e8a54ff45fe59e7f937a4 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 30 May 2023 09:09:39 +0200 Subject: [PATCH 13/63] refactor schema_ignore_params to validationSchemaIgnoreParams --- nf_core/lint/nextflow_config.py | 2 +- nf_core/pipeline-template/nextflow.config | 4 ++-- nf_core/schema.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nf_core/lint/nextflow_config.py b/nf_core/lint/nextflow_config.py index 0b45b55b7c..f317410cdd 100644 --- a/nf_core/lint/nextflow_config.py +++ b/nf_core/lint/nextflow_config.py @@ -66,7 +66,7 @@ def nextflow_config(self): * Determines whether boilerplate params are showed by schema. Set to ``false`` by default - * ``params.schema_ignore_params`` + * ``params.validationSchemaIgnoreParams`` * A comma separated string of inputs the schema validation should ignore. diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config index 0aff59624b..73ed72425f 100644 --- a/nf_core/pipeline-template/nextflow.config +++ b/nf_core/pipeline-template/nextflow.config @@ -59,9 +59,9 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validate_params = true - schema_ignore_params = 'genomes' + validationSchemaIgnoreParams = 'genomes' validationShowHiddenParams = false + validate_params = true } diff --git a/nf_core/schema.py b/nf_core/schema.py index ba88e762ea..75dbebce04 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -245,8 +245,8 @@ def validate_default_params(self): self.get_wf_params() # Collect parameters to ignore - if "schema_ignore_params" in self.pipeline_params: - params_ignore = self.pipeline_params.get("schema_ignore_params", "").strip("\"'").split(",") + if "validationSchemaIgnoreParams" in self.pipeline_params: + params_ignore = self.pipeline_params.get("validationSchemaIgnoreParams", "").strip("\"'").split(",") else: params_ignore = [] @@ -759,8 +759,8 @@ def add_schema_found_configs(self): Add anything that's found in the Nextflow params that's missing in the pipeline schema """ params_added = [] - params_ignore = self.pipeline_params.get("schema_ignore_params", "").strip("\"'").split(",") - params_ignore.append("schema_ignore_params") + params_ignore = self.pipeline_params.get("validationSchemaIgnoreParams", "").strip("\"'").split(",") + params_ignore.append("validationSchemaIgnoreParams") for p_key, p_val in self.pipeline_params.items(): # Check if key is in schema parameters if p_key not in self.schema_params and p_key not in params_ignore: From 0658d0b465b5f2c5991fc2fceaa625871b2945fa Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 5 Oct 2022 17:04:29 +0200 Subject: [PATCH 14/63] Add -t / --tower option to 'nf-core download'. --- nf_core/__main__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 735eb99e04..725bcfc895 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -215,6 +215,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") +@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for sequeralabs® Nextflow Tower") @click.option( "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) @@ -223,7 +224,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all help="Don't / do copy images to the output directory and set 'singularity.cacheDir' in workflow", ) @click.option("-p", "--parallel-downloads", type=int, default=4, help="Number of parallel image downloads") -def download(pipeline, revision, outdir, compress, force, container, singularity_cache_only, parallel_downloads): +def download(pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads): """ Download a pipeline, nf-core/configs and pipeline singularity images. @@ -233,7 +234,7 @@ def download(pipeline, revision, outdir, compress, force, container, singularity from nf_core.download import DownloadWorkflow dl = DownloadWorkflow( - pipeline, revision, outdir, compress, force, container, singularity_cache_only, parallel_downloads + pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads ) dl.download_workflow() From 4d7cfd9e053a6db7a644a0237a27d6c30bb9dd4e Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 5 Dec 2022 18:04:40 +0100 Subject: [PATCH 15/63] Intermediate commit --- nf_core/download.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nf_core/download.py b/nf_core/download.py index cd36c65c4a..8ecd2fd85c 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -73,6 +73,7 @@ class DownloadWorkflow: pipeline (str): A nf-core pipeline name. revision (str): The workflow revision to download, like `1.0`. Defaults to None. singularity (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. + tower (bool): Flag, to customize the download for Nextflow Tower (convert to git bare repo). Defaults to False. outdir (str): Path to the local download directory. Defaults to None. """ @@ -83,6 +84,7 @@ def __init__( outdir=None, compress_type=None, force=False, + tower=False, container=None, singularity_cache_only=False, parallel_downloads=4, @@ -93,6 +95,7 @@ def __init__( self.output_filename = None self.compress_type = compress_type self.force = force + self.tower = tower self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads From bc77266ce2c9227bafeb00a365a81a7aee7e44cd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 16:04:41 +0100 Subject: [PATCH 16/63] Implement logic for the Tower download in DownloadWorkflow:download_workflow() --- nf_core/download.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8ecd2fd85c..c3451818e8 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -135,7 +135,10 @@ def download_workflow(self): summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") # Set an output filename now that we have the outdir - if self.compress_type is not None: + if self.tower: + self.output_filename = f"{self.outdir}.git" + summary_log.append(f"Output file (Tower enabled): '{self.output_filename}'") + elif self.compress_type is not None: self.output_filename = f"{self.outdir}.{self.compress_type}" summary_log.append(f"Output file: '{self.output_filename}'") else: @@ -160,6 +163,13 @@ def download_workflow(self): # Summary log log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) + # Actually download the workflow + if not self.tower: + self.download_workflow_classic() + else: + self.download_workflow_tower() + + def download_workflow_classic(self): # Download the pipeline files log.info("Downloading workflow files from GitHub") self.download_wf_files() @@ -188,6 +198,15 @@ def download_workflow(self): log.info("Compressing download..") self.compress_download() + def download_workflow_tower(self): + # Create a bare-cloned git repository of the workflow that includes the configs + log.info("Cloning workflow files from GitHub") + self.clone_wf_files() + + # Download the centralised configs + log.info("Downloading centralised configs from GitHub") + self.download_configs() + def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" From 66cb1a0d8cd8af5e89ec5ec866f773997b3a821b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 18:40:22 +0100 Subject: [PATCH 17/63] Extend ModulesRepo:setup_local_repo() with a cache_only bool, so we can use ModuleRepo as superclass to the new WorkflowRepo. --- nf_core/modules/modules_repo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 5f77148867..0f5db4bc52 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -11,7 +11,7 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils -from nf_core.utils import NFCORE_DIR, load_tools_config +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config log = logging.getLogger(__name__) @@ -166,7 +166,7 @@ def verify_sha(self, prompt, sha): return True - def setup_local_repo(self, remote, branch, hide_progress=True): + def setup_local_repo(self, remote, branch, hide_progress=True, cache_only=False): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -177,7 +177,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True): branch (str): name of branch to use Sets self.repo """ - self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) + self.local_repo_dir = os.path.join(NFCORE_DIR if not cache_only else NFCORE_CACHE_DIR, self.fullname) try: if not os.path.exists(self.local_repo_dir): try: From 60d8309437c5c9b8ccffab047c44efa00b53bf97 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 20:11:22 +0100 Subject: [PATCH 18/63] Create WorkflowRepo subclass of ModuleRepo and initialise local clone. --- nf_core/__main__.py | 2 +- nf_core/download.py | 41 +++++++++++++++++++++++++++++---- nf_core/modules/modules_repo.py | 4 ++-- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 725bcfc895..521454eb99 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -215,7 +215,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") -@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for sequeralabs® Nextflow Tower") +@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for seqeralabs® Nextflow Tower") @click.option( "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) diff --git a/nf_core/download.py b/nf_core/download.py index c3451818e8..5c2ff1607d 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,6 +3,7 @@ from __future__ import print_function import concurrent.futures +from git import Repo import io import logging import os @@ -23,6 +24,7 @@ import nf_core import nf_core.list import nf_core.utils +from nf_core.modules import ModulesRepo # to create subclass WorkflowRepo log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -137,13 +139,15 @@ def download_workflow(self): # Set an output filename now that we have the outdir if self.tower: self.output_filename = f"{self.outdir}.git" - summary_log.append(f"Output file (Tower enabled): '{self.output_filename}'") + summary_log.append(f"Output file: '{self.output_filename}'") elif self.compress_type is not None: self.output_filename = f"{self.outdir}.{self.compress_type}" summary_log.append(f"Output file: '{self.output_filename}'") else: summary_log.append(f"Output directory: '{self.outdir}'") + summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") + # Check that the outdir doesn't already exist if os.path.exists(self.outdir): if not self.force: @@ -170,6 +174,7 @@ def download_workflow(self): self.download_workflow_tower() def download_workflow_classic(self): + """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" # Download the pipeline files log.info("Downloading workflow files from GitHub") self.download_wf_files() @@ -199,9 +204,10 @@ def download_workflow_classic(self): self.compress_download() def download_workflow_tower(self): - # Create a bare-cloned git repository of the workflow that includes the configs + """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Cloning workflow files from GitHub") - self.clone_wf_files() + + self.workflow_repo = WorkflowRepo(remote_url=f"git@github.com:{self.pipeline}.git", branch=self.revision) # Download the centralised configs log.info("Downloading centralised configs from GitHub") @@ -816,5 +822,32 @@ def compress_download(self): log.debug(f"Deleting uncompressed files: '{self.outdir}'") shutil.rmtree(self.outdir) - # Caclualte md5sum for output file + # Calculate md5sum for output file log.info(f"MD5 checksum for '{self.output_filename}': [blue]{nf_core.utils.file_md5(self.output_filename)}[/]") + + +class WorkflowRepo(ModulesRepo): + """ + An object to store details about a locally cached workflow repository. + + Important Attributes: + fullname: The full name of the repository, ``nf-core/{self.pipelinename}``. + local_repo_dir (str): The local directory, where the workflow is cloned into. Defaults to ``$HOME/.cache/nf-core/nf-core/{self.pipeline}``. + + """ + + def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False, in_cache=True): + """ + Initializes the object and clones the workflows git repository if it is not already present + + Args: + remote_url (str, optional): The URL of the remote repository. Defaults to None. + branch (str, optional): The branch to clone. Defaults to None. + no_pull (bool, optional): Whether to skip the pull step. Defaults to False. + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. + """ + self.remote_url = remote_url + self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + + self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 0f5db4bc52..23f62bdee2 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -166,7 +166,7 @@ def verify_sha(self, prompt, sha): return True - def setup_local_repo(self, remote, branch, hide_progress=True, cache_only=False): + def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -177,7 +177,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True, cache_only=False) branch (str): name of branch to use Sets self.repo """ - self.local_repo_dir = os.path.join(NFCORE_DIR if not cache_only else NFCORE_CACHE_DIR, self.fullname) + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) try: if not os.path.exists(self.local_repo_dir): try: From a431dfdbcd1121ad6110da36b75d02278f32b551 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 15 Feb 2023 23:03:02 +0100 Subject: [PATCH 19/63] TypeError: HEAD is a detached symbolic reference as it points to ... --- nf_core/download.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 5c2ff1607d..74b8abf3d8 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -205,13 +205,13 @@ def download_workflow_classic(self): def download_workflow_tower(self): """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" - log.info("Cloning workflow files from GitHub") + log.info("Collecting workflow from GitHub") self.workflow_repo = WorkflowRepo(remote_url=f"git@github.com:{self.pipeline}.git", branch=self.revision) + import pbb - # Download the centralised configs + pdb.set_trace() log.info("Downloading centralised configs from GitHub") - self.download_configs() def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -851,3 +851,14 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) + + @property + def active_branch(self): + """ + In ModuleRepo.setup_local_repo(), self.repo.active_branch.tracking_branch() is called in line 227. + For a WorkflowRepo, this raises a TypeError ``HEAD is a detached symbolic reference as it points to {commit hash}`` + + This property shadows the call and seemed the cleanest solution to prevent excessive code duplication. + Otherwise, I would have needed to define a setup_local_repo() method for the WorkflowRepo class. + """ + pass # TODO From 3196e41ead669315b246f6f93f4489e053aba2f7 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:11:00 +0100 Subject: [PATCH 20/63] Split history ./modules/modules_repo.py to synced_repo.py --- nf_core/{modules/modules_repo.py => temp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nf_core/{modules/modules_repo.py => temp} (100%) diff --git a/nf_core/modules/modules_repo.py b/nf_core/temp similarity index 100% rename from nf_core/modules/modules_repo.py rename to nf_core/temp From d0dea842908f37e2bad994de4a6d49c76e43afa1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:11:00 +0100 Subject: [PATCH 21/63] Split history ./modules/modules_repo.py to synced_repo.py --- nf_core/synced_repo.py | 498 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 nf_core/synced_repo.py diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py new file mode 100644 index 0000000000..23f62bdee2 --- /dev/null +++ b/nf_core/synced_repo.py @@ -0,0 +1,498 @@ +import filecmp +import logging +import os +import shutil +from pathlib import Path + +import git +import rich +import rich.progress +from git.exc import GitCommandError, InvalidGitRepositoryError + +import nf_core.modules.modules_json +import nf_core.modules.modules_utils +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config + +log = logging.getLogger(__name__) + +# Constants for the nf-core/modules repo used throughout the module files +NF_CORE_MODULES_NAME = "nf-core" +NF_CORE_MODULES_REMOTE = "https://github.com/nf-core/modules.git" +NF_CORE_MODULES_DEFAULT_BRANCH = "master" + + +class RemoteProgressbar(git.RemoteProgress): + """ + An object to create a progressbar for when doing an operation with the remote. + Note that an initialized rich Progress (progress bar) object must be past + during initialization. + """ + + def __init__(self, progress_bar, repo_name, remote_url, operation): + """ + Initializes the object and adds a task to the progressbar passed as 'progress_bar' + + Args: + progress_bar (rich.progress.Progress): A rich progress bar object + repo_name (str): Name of the repository the operation is performed on + remote_url (str): Git URL of the repository the operation is performed on + operation (str): The operation performed on the repository, i.e. 'Pulling', 'Cloning' etc. + """ + super().__init__() + self.progress_bar = progress_bar + self.tid = self.progress_bar.add_task( + f"{operation} from [bold green]'{repo_name}'[/bold green] ([link={remote_url}]{remote_url}[/link])", + start=False, + state="Waiting for response", + ) + + def update(self, op_code, cur_count, max_count=None, message=""): + """ + Overrides git.RemoteProgress.update. + Called every time there is a change in the remote operation + """ + if not self.progress_bar.tasks[self.tid].started: + self.progress_bar.start_task(self.tid) + self.progress_bar.update( + self.tid, total=max_count, completed=cur_count, state=f"{cur_count / max_count * 100:.1f}%" + ) + + +class ModulesRepo: + """ + An object to store details about the repository being used for modules. + + Used by the `nf-core modules` top-level command with -r and -b flags, + so that this can be used in the same way by all sub-commands. + + We keep track of the pull-status of the different installed repos in + the static variable local_repo_status. This is so we don't need to + pull a remote several times in one command. + """ + + local_repo_statuses = {} + no_pull_global = False + + @staticmethod + def local_repo_synced(repo_name): + """ + Checks whether a local repo has been cloned/pull in the current session + """ + return ModulesRepo.local_repo_statuses.get(repo_name, False) + + @staticmethod + def update_local_repo_status(repo_name, up_to_date): + """ + Updates the clone/pull status of a local repo + """ + ModulesRepo.local_repo_statuses[repo_name] = up_to_date + + @staticmethod + def get_remote_branches(remote_url): + """ + Get all branches from a remote repository + + Args: + remote_url (str): The git url to the remote repository + + Returns: + (set[str]): All branches found in the remote + """ + try: + unparsed_branches = git.Git().ls_remote(remote_url) + except git.GitCommandError: + raise LookupError(f"Was unable to fetch branches from '{remote_url}'") + else: + branches = {} + for branch_info in unparsed_branches.split("\n"): + sha, name = branch_info.split("\t") + if name != "HEAD": + # The remote branches are shown as 'ref/head/branch' + branch_name = Path(name).stem + branches[sha] = branch_name + return set(branches.values()) + + def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False): + """ + Initializes the object and clones the git repository if it is not already present + """ + + # This allows us to set this one time and then keep track of the user's choice + ModulesRepo.no_pull_global |= no_pull + + # Check if the remote seems to be well formed + if remote_url is None: + remote_url = NF_CORE_MODULES_REMOTE + + self.remote_url = remote_url + + self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + + self.setup_local_repo(remote_url, branch, hide_progress) + + config_fn, repo_config = load_tools_config(self.local_repo_dir) + try: + self.repo_path = repo_config["org_path"] + except KeyError: + raise UserWarning(f"'org_path' key not present in {config_fn.name}") + + # Verify that the repo seems to be correctly configured + if self.repo_path != NF_CORE_MODULES_NAME or self.branch: + self.verify_branch() + + # Convenience variable + self.modules_dir = os.path.join(self.local_repo_dir, "modules", self.repo_path) + self.subworkflows_dir = os.path.join(self.local_repo_dir, "subworkflows", self.repo_path) + + self.avail_module_names = None + + def verify_sha(self, prompt, sha): + """ + Verify that 'sha' and 'prompt' arguments are not provided together. + Verify that the provided SHA exists in the repo. + + Arguments: + prompt (bool): prompt asking for SHA + sha (str): provided sha + """ + if prompt and sha is not None: + log.error("Cannot use '--sha' and '--prompt' at the same time!") + return False + + if sha: + if not self.sha_exists_on_branch(sha): + log.error(f"Commit SHA '{sha}' doesn't exist in '{self.remote_url}'") + return False + + return True + + def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Args: + remote (str): git url of remote + branch (str): name of branch to use + Sets self.repo + """ + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) + try: + if not os.path.exists(self.local_repo_dir): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) + ModulesRepo.update_local_repo_status(self.fullname, True) + except GitCommandError: + raise LookupError(f"Failed to clone from the remote: `{remote}`") + # Verify that the requested branch exists by checking it out + self.setup_branch(branch) + else: + self.repo = git.Repo(self.local_repo_dir) + + if ModulesRepo.no_pull_global: + ModulesRepo.update_local_repo_status(self.fullname, True) + # If the repo is already cloned, fetch the latest changes from the remote + if not ModulesRepo.local_repo_synced(self.fullname): + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo.remotes.origin.fetch( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) + ModulesRepo.update_local_repo_status(self.fullname, True) + + # Before verifying the branch, fetch the changes + # Verify that the requested branch exists by checking it out + self.setup_branch(branch) + + # Now merge the changes + tracking_branch = self.repo.active_branch.tracking_branch() + if tracking_branch is None: + raise LookupError(f"There is no remote tracking branch '{self.branch}' in '{self.remote_url}'") + self.repo.git.merge(tracking_branch.name) + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(remote, branch, hide_progress) + else: + raise LookupError("Exiting due to error with local modules git repo") + + def setup_branch(self, branch): + """ + Verify that we have a branch and otherwise use the default one. + The branch is then checked out to verify that it exists in the repo. + + Args: + branch (str): Name of branch + """ + if branch is None: + # Don't bother fetching default branch if we're using nf-core + if self.remote_url == NF_CORE_MODULES_REMOTE: + self.branch = "master" + else: + self.branch = self.get_default_branch() + else: + self.branch = branch + + # Verify that the branch exists by checking it out + self.branch_exists() + + def get_default_branch(self): + """ + Gets the default branch for the repo (the branch origin/HEAD is pointing to) + """ + origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") + _, branch = origin_head.ref.name.split("/") + return branch + + def branch_exists(self): + """ + Verifies that the branch exists in the repository by trying to check it out + """ + try: + self.checkout_branch() + except GitCommandError: + raise LookupError(f"Branch '{self.branch}' not found in '{self.remote_url}'") + + def verify_branch(self): + """ + Verifies the active branch conforms do the correct directory structure + """ + dir_names = os.listdir(self.local_repo_dir) + if "modules" not in dir_names: + err_str = f"Repository '{self.remote_url}' ({self.branch}) does not contain the 'modules/' directory" + if "software" in dir_names: + err_str += ( + ".\nAs of nf-core/tools version 2.0, the 'software/' directory should be renamed to 'modules/'" + ) + raise LookupError(err_str) + + def checkout_branch(self): + """ + Checks out the specified branch of the repository + """ + self.repo.git.checkout(self.branch) + + def checkout(self, commit): + """ + Checks out the repository at the requested commit + + Args: + commit (str): Git SHA of the commit + """ + self.repo.git.checkout(commit) + + def component_exists(self, component_name, component_type, checkout=True, commit=None): + """ + Check if a module/subworkflow exists in the branch of the repo + + Args: + component_name (str): The name of the module/subworkflow + + Returns: + (bool): Whether the module/subworkflow exists in this branch of the repository + """ + return component_name in self.get_avail_components(component_type, checkout=checkout, commit=commit) + + def get_component_dir(self, component_name, component_type): + """ + Returns the file path of a module/subworkflow directory in the repo. + Does not verify that the path exists. + Args: + component_name (str): The name of the module/subworkflow + + Returns: + component_path (str): The path of the module/subworkflow in the local copy of the repository + """ + if component_type == "modules": + return os.path.join(self.modules_dir, component_name) + elif component_type == "subworkflows": + return os.path.join(self.subworkflows_dir, component_name) + + def install_component(self, component_name, install_dir, commit, component_type): + """ + Install the module/subworkflow files into a pipeline at the given commit + + Args: + component_name (str): The name of the module/subworkflow + install_dir (str): The path where the module/subworkflow should be installed + commit (str): The git SHA for the version of the module/subworkflow to be installed + + Returns: + (bool): Whether the operation was successful or not + """ + # Check out the repository at the requested ref + try: + self.checkout(commit) + except git.GitCommandError: + return False + + # Check if the module/subworkflow exists in the branch + if not self.component_exists(component_name, component_type, checkout=False): + log.error( + f"The requested {component_type[:-1]} does not exists in the branch '{self.branch}' of {self.remote_url}'" + ) + return False + + # Copy the files from the repo to the install folder + shutil.copytree(self.get_component_dir(component_name, component_type), Path(install_dir, component_name)) + + # Switch back to the tip of the branch + self.checkout_branch() + return True + + def module_files_identical(self, module_name, base_path, commit): + """ + Checks whether the module files in a pipeline are identical to the ones in the remote + Args: + module_name (str): The name of the module + base_path (str): The path to the module in the pipeline + + Returns: + (bool): Whether the pipeline files are identical to the repo files + """ + if commit is None: + self.checkout_branch() + else: + self.checkout(commit) + module_files = ["main.nf", "meta.yml"] + files_identical = {file: True for file in module_files} + module_dir = self.get_component_dir(module_name, "modules") + for file in module_files: + try: + files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) + except FileNotFoundError: + log.debug(f"Could not open file: {os.path.join(module_dir, file)}") + continue + self.checkout_branch() + return files_identical + + def get_component_git_log(self, component_name, component_type, depth=None): + """ + Fetches the commit history the of requested module/subworkflow since a given date. The default value is + not arbitrary - it is the last time the structure of the nf-core/modules repository was had an + update breaking backwards compatibility. + Args: + component_name (str): Name of module/subworkflow + modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question + + Returns: + ( dict ): Iterator of commit SHAs and associated (truncated) message + """ + self.checkout_branch() + component_path = os.path.join(component_type, self.repo_path, component_name) + commits_new = self.repo.iter_commits(max_count=depth, paths=component_path) + commits_new = [ + {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_new + ] + commits_old = [] + if component_type == "modules": + # Grab commits also from previous modules structure + component_path = os.path.join("modules", component_name) + commits_old = self.repo.iter_commits(max_count=depth, paths=component_path) + commits_old = [ + {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_old + ] + commits = iter(commits_new + commits_old) + return commits + + def get_latest_component_version(self, component_name, component_type): + """ + Returns the latest commit in the repository + """ + return list(self.get_component_git_log(component_name, component_type, depth=1))[0]["git_sha"] + + def sha_exists_on_branch(self, sha): + """ + Verifies that a given commit sha exists on the branch + """ + self.checkout_branch() + return sha in (commit.hexsha for commit in self.repo.iter_commits()) + + def get_commit_info(self, sha): + """ + Fetches metadata about the commit (dates, message, etc.) + Args: + commit_sha (str): The SHA of the requested commit + Returns: + message (str): The commit message for the requested commit + date (str): The commit date for the requested commit + Raises: + LookupError: If the search for the commit fails + """ + self.checkout_branch() + for commit in self.repo.iter_commits(): + if commit.hexsha == sha: + message = commit.message.partition("\n")[0] + date_obj = commit.committed_datetime + date = str(date_obj.date()) + return message, date + raise LookupError(f"Commit '{sha}' not found in the '{self.remote_url}'") + + def get_avail_components(self, component_type, checkout=True, commit=None): + """ + Gets the names of the modules/subworkflows in the repository. They are detected by + checking which directories have a 'main.nf' file + + Returns: + ([ str ]): The module/subworkflow names + """ + if checkout: + self.checkout_branch() + if commit is not None: + self.checkout(commit) + # Get directory + if component_type == "modules": + directory = self.modules_dir + elif component_type == "subworkflows": + directory = self.subworkflows_dir + # Module/Subworkflow directories are characterized by having a 'main.nf' file + avail_component_names = [ + os.path.relpath(dirpath, start=directory) + for dirpath, _, file_names in os.walk(directory) + if "main.nf" in file_names + ] + return avail_component_names + + def get_meta_yml(self, component_type, module_name): + """ + Returns the contents of the 'meta.yml' file of a module + + Args: + module_name (str): The name of the module + + Returns: + (str): The contents of the file in text format + """ + self.checkout_branch() + if component_type == "modules": + path = Path(self.modules_dir, module_name, "meta.yml") + elif component_type == "subworkflows": + path = Path(self.subworkflows_dir, module_name, "meta.yml") + else: + raise ValueError(f"Invalid component type: {component_type}") + if not path.exists(): + return None + with open(path) as fh: + contents = fh.read() + return contents From 4fcdcfd3e786de26e68a0da1f07b41c7dc534b8d Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:11:01 +0100 Subject: [PATCH 22/63] Split history ./modules/modules_repo.py to synced_repo.py --- nf_core/{temp => modules/modules_repo.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename nf_core/{temp => modules/modules_repo.py} (100%) diff --git a/nf_core/temp b/nf_core/modules/modules_repo.py similarity index 100% rename from nf_core/temp rename to nf_core/modules/modules_repo.py From fb6a6e687ea1e8e6dd8070dd8e4b949a78920699 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:53:19 +0100 Subject: [PATCH 23/63] Duplication of ModulesRepo to SyncedRepo done. --- nf_core/download.py | 15 ++------------- nf_core/modules/modules_repo.py | 3 ++- nf_core/synced_repo.py | 31 +++++++++++-------------------- 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 74b8abf3d8..e92e50164f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -24,7 +24,7 @@ import nf_core import nf_core.list import nf_core.utils -from nf_core.modules import ModulesRepo # to create subclass WorkflowRepo +from nf_core.synced_repo import SyncedRepo # to create subclass WorkflowRepo log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -826,7 +826,7 @@ def compress_download(self): log.info(f"MD5 checksum for '{self.output_filename}': [blue]{nf_core.utils.file_md5(self.output_filename)}[/]") -class WorkflowRepo(ModulesRepo): +class WorkflowRepo(SyncedRepo): """ An object to store details about a locally cached workflow repository. @@ -851,14 +851,3 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) - - @property - def active_branch(self): - """ - In ModuleRepo.setup_local_repo(), self.repo.active_branch.tracking_branch() is called in line 227. - For a WorkflowRepo, this raises a TypeError ``HEAD is a detached symbolic reference as it points to {commit hash}`` - - This property shadows the call and seemed the cleanest solution to prevent excessive code duplication. - Otherwise, I would have needed to define a setup_local_repo() method for the WorkflowRepo class. - """ - pass # TODO diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 23f62bdee2..5e4d80be16 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -12,6 +12,7 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config +from nf_core.synced_repo import SyncedRepo log = logging.getLogger(__name__) @@ -58,7 +59,7 @@ def update(self, op_code, cur_count, max_count=None, message=""): ) -class ModulesRepo: +class ModulesRepo(SyncedRepo): """ An object to store details about the repository being used for modules. diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 23f62bdee2..89d2f894b1 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -9,8 +9,6 @@ import rich.progress from git.exc import GitCommandError, InvalidGitRepositoryError -import nf_core.modules.modules_json -import nf_core.modules.modules_utils from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config log = logging.getLogger(__name__) @@ -58,16 +56,9 @@ def update(self, op_code, cur_count, max_count=None, message=""): ) -class ModulesRepo: +class SyncedRepo: """ - An object to store details about the repository being used for modules. - - Used by the `nf-core modules` top-level command with -r and -b flags, - so that this can be used in the same way by all sub-commands. - - We keep track of the pull-status of the different installed repos in - the static variable local_repo_status. This is so we don't need to - pull a remote several times in one command. + An object to store details about a locally cached code repository. """ local_repo_statuses = {} @@ -78,14 +69,14 @@ def local_repo_synced(repo_name): """ Checks whether a local repo has been cloned/pull in the current session """ - return ModulesRepo.local_repo_statuses.get(repo_name, False) + return SyncedRepo.local_repo_statuses.get(repo_name, False) @staticmethod def update_local_repo_status(repo_name, up_to_date): """ Updates the clone/pull status of a local repo """ - ModulesRepo.local_repo_statuses[repo_name] = up_to_date + SyncedRepo.local_repo_statuses[repo_name] = up_to_date @staticmethod def get_remote_branches(remote_url): @@ -118,7 +109,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa """ # This allows us to set this one time and then keep track of the user's choice - ModulesRepo.no_pull_global |= no_pull + SyncedRepo.no_pull_global |= no_pull # Check if the remote seems to be well formed if remote_url is None: @@ -194,7 +185,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): self.local_repo_dir, progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), ) - ModulesRepo.update_local_repo_status(self.fullname, True) + SyncedRepo.update_local_repo_status(self.fullname, True) except GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") # Verify that the requested branch exists by checking it out @@ -202,10 +193,10 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): else: self.repo = git.Repo(self.local_repo_dir) - if ModulesRepo.no_pull_global: - ModulesRepo.update_local_repo_status(self.fullname, True) + if SyncedRepo.no_pull_global: + SyncedRepo.update_local_repo_status(self.fullname, True) # If the repo is already cloned, fetch the latest changes from the remote - if not ModulesRepo.local_repo_synced(self.fullname): + if not SyncedRepo.local_repo_synced(self.fullname): pbar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), @@ -217,7 +208,7 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): self.repo.remotes.origin.fetch( progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") ) - ModulesRepo.update_local_repo_status(self.fullname, True) + SyncedRepo.update_local_repo_status(self.fullname, True) # Before verifying the branch, fetch the changes # Verify that the requested branch exists by checking it out @@ -394,7 +385,7 @@ def get_component_git_log(self, component_name, component_type, depth=None): update breaking backwards compatibility. Args: component_name (str): Name of module/subworkflow - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question + modules_repo (SyncedRepo): A SyncedRepo object configured for the repository in question Returns: ( dict ): Iterator of commit SHAs and associated (truncated) message From a3ccffa784034c1bf32a2225056f5543f9c86e37 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 15:59:03 +0100 Subject: [PATCH 24/63] Strip ModulesRepo class of the methods moved to new superclass. --- nf_core/modules/modules_repo.py | 358 +------------------------------- 1 file changed, 1 insertion(+), 357 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 5e4d80be16..20d581af84 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -12,7 +12,7 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config -from nf_core.synced_repo import SyncedRepo +from nf_core.synced_repo import RemoteProgressbar, SyncedRepo log = logging.getLogger(__name__) @@ -22,43 +22,6 @@ NF_CORE_MODULES_DEFAULT_BRANCH = "master" -class RemoteProgressbar(git.RemoteProgress): - """ - An object to create a progressbar for when doing an operation with the remote. - Note that an initialized rich Progress (progress bar) object must be past - during initialization. - """ - - def __init__(self, progress_bar, repo_name, remote_url, operation): - """ - Initializes the object and adds a task to the progressbar passed as 'progress_bar' - - Args: - progress_bar (rich.progress.Progress): A rich progress bar object - repo_name (str): Name of the repository the operation is performed on - remote_url (str): Git URL of the repository the operation is performed on - operation (str): The operation performed on the repository, i.e. 'Pulling', 'Cloning' etc. - """ - super().__init__() - self.progress_bar = progress_bar - self.tid = self.progress_bar.add_task( - f"{operation} from [bold green]'{repo_name}'[/bold green] ([link={remote_url}]{remote_url}[/link])", - start=False, - state="Waiting for response", - ) - - def update(self, op_code, cur_count, max_count=None, message=""): - """ - Overrides git.RemoteProgress.update. - Called every time there is a change in the remote operation - """ - if not self.progress_bar.tasks[self.tid].started: - self.progress_bar.start_task(self.tid) - self.progress_bar.update( - self.tid, total=max_count, completed=cur_count, state=f"{cur_count / max_count * 100:.1f}%" - ) - - class ModulesRepo(SyncedRepo): """ An object to store details about the repository being used for modules. @@ -74,45 +37,6 @@ class ModulesRepo(SyncedRepo): local_repo_statuses = {} no_pull_global = False - @staticmethod - def local_repo_synced(repo_name): - """ - Checks whether a local repo has been cloned/pull in the current session - """ - return ModulesRepo.local_repo_statuses.get(repo_name, False) - - @staticmethod - def update_local_repo_status(repo_name, up_to_date): - """ - Updates the clone/pull status of a local repo - """ - ModulesRepo.local_repo_statuses[repo_name] = up_to_date - - @staticmethod - def get_remote_branches(remote_url): - """ - Get all branches from a remote repository - - Args: - remote_url (str): The git url to the remote repository - - Returns: - (set[str]): All branches found in the remote - """ - try: - unparsed_branches = git.Git().ls_remote(remote_url) - except git.GitCommandError: - raise LookupError(f"Was unable to fetch branches from '{remote_url}'") - else: - branches = {} - for branch_info in unparsed_branches.split("\n"): - sha, name = branch_info.split("\t") - if name != "HEAD": - # The remote branches are shown as 'ref/head/branch' - branch_name = Path(name).stem - branches[sha] = branch_name - return set(branches.values()) - def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False): """ Initializes the object and clones the git repository if it is not already present @@ -147,26 +71,6 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=Fa self.avail_module_names = None - def verify_sha(self, prompt, sha): - """ - Verify that 'sha' and 'prompt' arguments are not provided together. - Verify that the provided SHA exists in the repo. - - Arguments: - prompt (bool): prompt asking for SHA - sha (str): provided sha - """ - if prompt and sha is not None: - log.error("Cannot use '--sha' and '--prompt' at the same time!") - return False - - if sha: - if not self.sha_exists_on_branch(sha): - log.error(f"Commit SHA '{sha}' doesn't exist in '{self.remote_url}'") - return False - - return True - def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): """ Sets up the local git repository. If the repository has been cloned previously, it @@ -237,263 +141,3 @@ def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): self.setup_local_repo(remote, branch, hide_progress) else: raise LookupError("Exiting due to error with local modules git repo") - - def setup_branch(self, branch): - """ - Verify that we have a branch and otherwise use the default one. - The branch is then checked out to verify that it exists in the repo. - - Args: - branch (str): Name of branch - """ - if branch is None: - # Don't bother fetching default branch if we're using nf-core - if self.remote_url == NF_CORE_MODULES_REMOTE: - self.branch = "master" - else: - self.branch = self.get_default_branch() - else: - self.branch = branch - - # Verify that the branch exists by checking it out - self.branch_exists() - - def get_default_branch(self): - """ - Gets the default branch for the repo (the branch origin/HEAD is pointing to) - """ - origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") - _, branch = origin_head.ref.name.split("/") - return branch - - def branch_exists(self): - """ - Verifies that the branch exists in the repository by trying to check it out - """ - try: - self.checkout_branch() - except GitCommandError: - raise LookupError(f"Branch '{self.branch}' not found in '{self.remote_url}'") - - def verify_branch(self): - """ - Verifies the active branch conforms do the correct directory structure - """ - dir_names = os.listdir(self.local_repo_dir) - if "modules" not in dir_names: - err_str = f"Repository '{self.remote_url}' ({self.branch}) does not contain the 'modules/' directory" - if "software" in dir_names: - err_str += ( - ".\nAs of nf-core/tools version 2.0, the 'software/' directory should be renamed to 'modules/'" - ) - raise LookupError(err_str) - - def checkout_branch(self): - """ - Checks out the specified branch of the repository - """ - self.repo.git.checkout(self.branch) - - def checkout(self, commit): - """ - Checks out the repository at the requested commit - - Args: - commit (str): Git SHA of the commit - """ - self.repo.git.checkout(commit) - - def component_exists(self, component_name, component_type, checkout=True, commit=None): - """ - Check if a module/subworkflow exists in the branch of the repo - - Args: - component_name (str): The name of the module/subworkflow - - Returns: - (bool): Whether the module/subworkflow exists in this branch of the repository - """ - return component_name in self.get_avail_components(component_type, checkout=checkout, commit=commit) - - def get_component_dir(self, component_name, component_type): - """ - Returns the file path of a module/subworkflow directory in the repo. - Does not verify that the path exists. - Args: - component_name (str): The name of the module/subworkflow - - Returns: - component_path (str): The path of the module/subworkflow in the local copy of the repository - """ - if component_type == "modules": - return os.path.join(self.modules_dir, component_name) - elif component_type == "subworkflows": - return os.path.join(self.subworkflows_dir, component_name) - - def install_component(self, component_name, install_dir, commit, component_type): - """ - Install the module/subworkflow files into a pipeline at the given commit - - Args: - component_name (str): The name of the module/subworkflow - install_dir (str): The path where the module/subworkflow should be installed - commit (str): The git SHA for the version of the module/subworkflow to be installed - - Returns: - (bool): Whether the operation was successful or not - """ - # Check out the repository at the requested ref - try: - self.checkout(commit) - except git.GitCommandError: - return False - - # Check if the module/subworkflow exists in the branch - if not self.component_exists(component_name, component_type, checkout=False): - log.error( - f"The requested {component_type[:-1]} does not exists in the branch '{self.branch}' of {self.remote_url}'" - ) - return False - - # Copy the files from the repo to the install folder - shutil.copytree(self.get_component_dir(component_name, component_type), Path(install_dir, component_name)) - - # Switch back to the tip of the branch - self.checkout_branch() - return True - - def module_files_identical(self, module_name, base_path, commit): - """ - Checks whether the module files in a pipeline are identical to the ones in the remote - Args: - module_name (str): The name of the module - base_path (str): The path to the module in the pipeline - - Returns: - (bool): Whether the pipeline files are identical to the repo files - """ - if commit is None: - self.checkout_branch() - else: - self.checkout(commit) - module_files = ["main.nf", "meta.yml"] - files_identical = {file: True for file in module_files} - module_dir = self.get_component_dir(module_name, "modules") - for file in module_files: - try: - files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) - except FileNotFoundError: - log.debug(f"Could not open file: {os.path.join(module_dir, file)}") - continue - self.checkout_branch() - return files_identical - - def get_component_git_log(self, component_name, component_type, depth=None): - """ - Fetches the commit history the of requested module/subworkflow since a given date. The default value is - not arbitrary - it is the last time the structure of the nf-core/modules repository was had an - update breaking backwards compatibility. - Args: - component_name (str): Name of module/subworkflow - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question - - Returns: - ( dict ): Iterator of commit SHAs and associated (truncated) message - """ - self.checkout_branch() - component_path = os.path.join(component_type, self.repo_path, component_name) - commits_new = self.repo.iter_commits(max_count=depth, paths=component_path) - commits_new = [ - {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_new - ] - commits_old = [] - if component_type == "modules": - # Grab commits also from previous modules structure - component_path = os.path.join("modules", component_name) - commits_old = self.repo.iter_commits(max_count=depth, paths=component_path) - commits_old = [ - {"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits_old - ] - commits = iter(commits_new + commits_old) - return commits - - def get_latest_component_version(self, component_name, component_type): - """ - Returns the latest commit in the repository - """ - return list(self.get_component_git_log(component_name, component_type, depth=1))[0]["git_sha"] - - def sha_exists_on_branch(self, sha): - """ - Verifies that a given commit sha exists on the branch - """ - self.checkout_branch() - return sha in (commit.hexsha for commit in self.repo.iter_commits()) - - def get_commit_info(self, sha): - """ - Fetches metadata about the commit (dates, message, etc.) - Args: - commit_sha (str): The SHA of the requested commit - Returns: - message (str): The commit message for the requested commit - date (str): The commit date for the requested commit - Raises: - LookupError: If the search for the commit fails - """ - self.checkout_branch() - for commit in self.repo.iter_commits(): - if commit.hexsha == sha: - message = commit.message.partition("\n")[0] - date_obj = commit.committed_datetime - date = str(date_obj.date()) - return message, date - raise LookupError(f"Commit '{sha}' not found in the '{self.remote_url}'") - - def get_avail_components(self, component_type, checkout=True, commit=None): - """ - Gets the names of the modules/subworkflows in the repository. They are detected by - checking which directories have a 'main.nf' file - - Returns: - ([ str ]): The module/subworkflow names - """ - if checkout: - self.checkout_branch() - if commit is not None: - self.checkout(commit) - # Get directory - if component_type == "modules": - directory = self.modules_dir - elif component_type == "subworkflows": - directory = self.subworkflows_dir - # Module/Subworkflow directories are characterized by having a 'main.nf' file - avail_component_names = [ - os.path.relpath(dirpath, start=directory) - for dirpath, _, file_names in os.walk(directory) - if "main.nf" in file_names - ] - return avail_component_names - - def get_meta_yml(self, component_type, module_name): - """ - Returns the contents of the 'meta.yml' file of a module - - Args: - module_name (str): The name of the module - - Returns: - (str): The contents of the file in text format - """ - self.checkout_branch() - if component_type == "modules": - path = Path(self.modules_dir, module_name, "meta.yml") - elif component_type == "subworkflows": - path = Path(self.subworkflows_dir, module_name, "meta.yml") - else: - raise ValueError(f"Invalid component type: {component_type}") - if not path.exists(): - return None - with open(path) as fh: - contents = fh.read() - return contents From 1f0e08adc62a3054777a8b32a0da1e0234e2c8b0 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 17:18:38 +0100 Subject: [PATCH 25/63] Rebase to current dev. --- nf_core/synced_repo.py | 75 ++---------------------------------------- 1 file changed, 2 insertions(+), 73 deletions(-) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 89d2f894b1..715f6d77bb 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -7,9 +7,9 @@ import git import rich import rich.progress -from git.exc import GitCommandError, InvalidGitRepositoryError +from git.exc import GitCommandError -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config +from nf_core.utils import load_tools_config log = logging.getLogger(__name__) @@ -157,77 +157,6 @@ def verify_sha(self, prompt, sha): return True - def setup_local_repo(self, remote, branch, hide_progress=True, in_cache=False): - """ - Sets up the local git repository. If the repository has been cloned previously, it - returns a git.Repo object of that clone. Otherwise it tries to clone the repository from - the provided remote URL and returns a git.Repo of the new clone. - - Args: - remote (str): git url of remote - branch (str): name of branch to use - Sets self.repo - """ - self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) - try: - if not os.path.exists(self.local_repo_dir): - try: - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, - ) - with pbar: - self.repo = git.Repo.clone_from( - remote, - self.local_repo_dir, - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), - ) - SyncedRepo.update_local_repo_status(self.fullname, True) - except GitCommandError: - raise LookupError(f"Failed to clone from the remote: `{remote}`") - # Verify that the requested branch exists by checking it out - self.setup_branch(branch) - else: - self.repo = git.Repo(self.local_repo_dir) - - if SyncedRepo.no_pull_global: - SyncedRepo.update_local_repo_status(self.fullname, True) - # If the repo is already cloned, fetch the latest changes from the remote - if not SyncedRepo.local_repo_synced(self.fullname): - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, - ) - with pbar: - self.repo.remotes.origin.fetch( - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") - ) - SyncedRepo.update_local_repo_status(self.fullname, True) - - # Before verifying the branch, fetch the changes - # Verify that the requested branch exists by checking it out - self.setup_branch(branch) - - # Now merge the changes - tracking_branch = self.repo.active_branch.tracking_branch() - if tracking_branch is None: - raise LookupError(f"There is no remote tracking branch '{self.branch}' in '{self.remote_url}'") - self.repo.git.merge(tracking_branch.name) - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): - log.info(f"Removing '{self.local_repo_dir}'") - shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(remote, branch, hide_progress) - else: - raise LookupError("Exiting due to error with local modules git repo") - def setup_branch(self, branch): """ Verify that we have a branch and otherwise use the default one. From 62d25d002bccdaf7e511d7ff039ef5f989d78c5b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 21 Feb 2023 21:40:40 +0100 Subject: [PATCH 26/63] Local caching of the repo works now. --- nf_core/download.py | 99 ++++++++++++++++++++++++++++++++++++------ nf_core/synced_repo.py | 2 +- 2 files changed, 86 insertions(+), 15 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index e92e50164f..67c987bb44 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,7 +3,8 @@ from __future__ import print_function import concurrent.futures -from git import Repo +import git +from git.exc import GitCommandError, InvalidGitRepositoryError import io import logging import os @@ -24,7 +25,8 @@ import nf_core import nf_core.list import nf_core.utils -from nf_core.synced_repo import SyncedRepo # to create subclass WorkflowRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR +from nf_core.synced_repo import RemoteProgressbar, SyncedRepo log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -127,7 +129,8 @@ def download_workflow(self): self.prompt_container_download() self.prompt_use_singularity_cachedir() self.prompt_singularity_cachedir_only() - self.prompt_compression_type() + if not self.tower: + self.prompt_compression_type() except AssertionError as e: log.critical(e) sys.exit(1) @@ -207,10 +210,10 @@ def download_workflow_tower(self): """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") - self.workflow_repo = WorkflowRepo(remote_url=f"git@github.com:{self.pipeline}.git", branch=self.revision) - import pbb + self.workflow_repo = WorkflowRepo( + remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision, commit=self.wf_sha + ) - pdb.set_trace() log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): @@ -625,12 +628,12 @@ def singularity_image_filenames(self, container): """Check Singularity cache for image, copy to destination folder if found. Args: - container (str): A pipeline's container name. Can be direct download URL - or a Docker Hub repository ID. + container (str): A pipeline's container name. Can be direct download URL + or a Docker Hub repository ID. Returns: - results (bool, str): Returns True if we have the image in the target location. - Returns a download path if not. + results (bool, str): Returns True if we have the image in the target location. + Returns a download path if not. """ # Generate file paths @@ -836,18 +839,86 @@ class WorkflowRepo(SyncedRepo): """ - def __init__(self, remote_url=None, branch=None, no_pull=False, hide_progress=False, in_cache=True): + def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=False, in_cache=True): """ Initializes the object and clones the workflows git repository if it is not already present Args: - remote_url (str, optional): The URL of the remote repository. Defaults to None. - branch (str, optional): The branch to clone. Defaults to None. + remote_url (str): The URL of the remote repository. Defaults to None. + commit (str): The commit to clone. Defaults to None. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ self.remote_url = remote_url + self.revision = revision + self.commit = commit self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, branch, hide_progress, in_cache=in_cache) + self.setup_local_repo(remote_url, revision, commit, hide_progress, in_cache=in_cache) + + def setup_local_repo(self, remote, revision, commit, hide_progress=True, in_cache=True): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Args: + remote (str): git url of remote + branch (str): name of branch to use + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. + Sets self.repo + """ + + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) + try: + if not os.path.exists(self.local_repo_dir): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) + super().update_local_repo_status(self.fullname, True) + except GitCommandError: + raise LookupError(f"Failed to clone from the remote: `{remote}`") + else: + self.repo = git.Repo(self.local_repo_dir) + + if super().no_pull_global: + super().update_local_repo_status(self.fullname, True) + # If the repo is already cloned, fetch the latest changes from the remote + if not super().local_repo_synced(self.fullname): + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.repo.remotes.origin.fetch( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) + super().update_local_repo_status(self.fullname, True) + + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(remote, revision, commit, hide_progress) + else: + raise LookupError("Exiting due to error with local modules git repo") + + finally: + self.repo.git.checkout(commit) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 715f6d77bb..f04ef8e0c7 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -9,7 +9,7 @@ import rich.progress from git.exc import GitCommandError -from nf_core.utils import load_tools_config +from nf_core.utils import load_tools_config log = logging.getLogger(__name__) From 621a8e185480a90bf65de645eca2555562227496 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 Feb 2023 20:13:07 +0100 Subject: [PATCH 27/63] Started implementing the config download. --- nf_core/download.py | 70 +++++++++++++++++++++++++++++++++++++----- nf_core/synced_repo.py | 2 +- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 67c987bb44..0d33b601f0 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -100,6 +100,7 @@ def __init__( self.compress_type = compress_type self.force = force self.tower = tower + self.include_config self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads @@ -210,10 +211,13 @@ def download_workflow_tower(self): """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") + self.workflow_repo = WorkflowRepo( remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision, commit=self.wf_sha ) + import pdb + pdb.set_trace() log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): @@ -374,6 +378,12 @@ def prompt_compression_type(self): if self.compress_type == "none": self.compress_type = None + def prompt_config_inclusion(self): + """Prompt for inclusion of institutional configurations""" + self.include_configs = questionary.confirm( + "Include the institutional configuration files into the download?" + ).ask() + def download_wf_files(self): """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" log.debug(f"Downloading {self.wf_download_url}") @@ -853,11 +863,24 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa self.remote_url = remote_url self.revision = revision self.commit = commit + self.hide_progress = hide_progress self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, revision, commit, hide_progress, in_cache=in_cache) + self.setup_local_repo(remote_url, commit, hide_progress, in_cache=in_cache) + + def __repr__(self): + """Called by print, creates representation of object""" + return f"" + + def retry_setup_local_repo(self): + if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): + log.info(f"Removing '{self.local_repo_dir}'") + shutil.rmtree(self.local_repo_dir) + self.setup_local_repo(self.remote, self.commit, self.hide_progress) + else: + raise LookupError("Exiting due to error with local modules git repo") - def setup_local_repo(self, remote, revision, commit, hide_progress=True, in_cache=True): + def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -913,12 +936,43 @@ def setup_local_repo(self, remote, revision, commit, hide_progress=True, in_cach except (GitCommandError, InvalidGitRepositoryError) as e: log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): - log.info(f"Removing '{self.local_repo_dir}'") - shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(remote, revision, commit, hide_progress) - else: - raise LookupError("Exiting due to error with local modules git repo") + self.retry_setup_local_repo() + finally: + self.repo.git.checkout(commit) + + def add_nfcore_configs(self, commit, hide_progress=False): + """ + Pulls the configuration profiles from the nf-core/config repository on GitHub. + + Args: + commit: The config version to pull + hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + Sets self.repo + """ + try: + if os.path.exists(self.local_repo_dir): + try: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with pbar: + self.configs = git.Submodule.add( + self.repo, + "nf-core configuration", + "./conf_institutional", + f"git@github.com:nf-core/configs.git", + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Adding configuration"), + ) + except GitCommandError: + raise LookupError(f"Failed to retrieve configuration: `{remote}`") + + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") + self.retry_setup_local_repo() finally: self.repo.git.checkout(commit) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index f04ef8e0c7..4bbd4f8443 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -22,7 +22,7 @@ class RemoteProgressbar(git.RemoteProgress): """ An object to create a progressbar for when doing an operation with the remote. - Note that an initialized rich Progress (progress bar) object must be past + Note that an initialized rich Progress (progress bar) object must be passed during initialization. """ From c478d9af341ee3351977c5887d60ea0efe826fa4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 24 Feb 2023 18:42:12 +0100 Subject: [PATCH 28/63] Started to implement the multiple revision selection for the Tower download. --- nf_core/download.py | 78 ++++++++++++++++++++++----------------------- nf_core/utils.py | 10 ++++-- 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 0d33b601f0..ff06faecdc 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -75,7 +75,7 @@ class DownloadWorkflow: Args: pipeline (str): A nf-core pipeline name. - revision (str): The workflow revision to download, like `1.0`. Defaults to None. + revision (List[str]): The workflow revision to download, like `1.0`. Defaults to None. singularity (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. tower (bool): Flag, to customize the download for Nextflow Tower (convert to git bare repo). Defaults to False. outdir (str): Path to the local download directory. Defaults to None. @@ -94,20 +94,19 @@ def __init__( parallel_downloads=4, ): self.pipeline = pipeline - self.revision = revision + self.revision = [].extend(revision) if revision else [] self.outdir = outdir self.output_filename = None self.compress_type = compress_type self.force = force self.tower = tower - self.include_config self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads self.wf_revisions = {} self.wf_branches = {} - self.wf_sha = None + self.wf_sha = {} self.wf_download_url = None self.nf_config = {} self.containers = [] @@ -136,7 +135,7 @@ def download_workflow(self): log.critical(e) sys.exit(1) - summary_log = [f"Pipeline revision: '{self.revision}'", f"Pull containers: '{self.container}'"] + summary_log = [f"Pipeline revision: '{','.join(self.revision)}'", f"Pull containers: '{self.container}'"] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") @@ -213,11 +212,10 @@ def download_workflow_tower(self): log.info("Collecting workflow from GitHub") self.workflow_repo = WorkflowRepo( - remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision, commit=self.wf_sha + remote_url=f"git@github.com:{self.pipeline}.git", + revision=self.revision[0] if self.revision else None, + commit=list(self.wf_sha.values())[0] if bool(self.wf_sha) else "", ) - import pdb - - pdb.set_trace() log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): @@ -230,39 +228,44 @@ def prompt_pipeline_name(self): def prompt_revision(self): """Prompt for pipeline revision / branch""" # Prompt user for revision tag if '--revision' was not set - if self.revision is None: - self.revision = nf_core.utils.prompt_pipeline_release_branch(self.wf_revisions, self.wf_branches) + # If --tower is specified, allow to select multiple revisions + + if not bool(self.revision): + self.revision.extend( + nf_core.utils.prompt_pipeline_release_branch(self.wf_revisions, self.wf_branches, multiple=self.tower) + ) def get_revision_hash(self): """Find specified revision / branch hash""" - # Branch - if self.revision in self.wf_branches.keys(): - self.wf_sha = self.wf_branches[self.revision] - - # Revision - else: - for r in self.wf_revisions: - if r["tag_name"] == self.revision: - self.wf_sha = r["tag_sha"] - break + for revision in self.revision: # revision is a list of strings, but may be of length 1 + # Branch + if revision in self.wf_branches.keys(): + self.wf_sha[revision].append(self.wf_branches[revision]) - # Can't find the revisions or branch - throw an error + # Revision else: - log.info( - "Available {} revisions: '{}'".format( - self.pipeline, "', '".join([r["tag_name"] for r in self.wf_revisions]) + for r in self.wf_revisions: + if r["tag_name"] == revision: + self.wf_sha[revision].append(r["tag_sha"]) + break + + # Can't find the revisions or branch - throw an error + else: + log.info( + "Available {} revisions: '{}'".format( + self.pipeline, "', '".join([r["tag_name"] for r in self.wf_revisions]) + ) ) - ) - log.info("Available {} branches: '{}'".format(self.pipeline, "', '".join(self.wf_branches.keys()))) - raise AssertionError(f"Not able to find revision / branch '{self.revision}' for {self.pipeline}") + log.info("Available {} branches: '{}'".format(self.pipeline, "', '".join(self.wf_branches.keys()))) + raise AssertionError(f"Not able to find revision / branch '{revision}' for {self.pipeline}") # Set the outdir if not self.outdir: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}-{self.revision}" + self.outdir = f"{self.pipeline.replace('/', '-').lower()}-{self.revision[0] if self.revision else ''}" # Set the download URL and return - self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{self.wf_sha}.zip" + self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}.zip" def prompt_container_download(self): """Prompt whether to download container images or not""" @@ -378,12 +381,6 @@ def prompt_compression_type(self): if self.compress_type == "none": self.compress_type = None - def prompt_config_inclusion(self): - """Prompt for inclusion of institutional configurations""" - self.include_configs = questionary.confirm( - "Include the institutional configuration files into the download?" - ).ask() - def download_wf_files(self): """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" log.debug(f"Downloading {self.wf_download_url}") @@ -394,7 +391,7 @@ def download_wf_files(self): zipfile.extractall(self.outdir) # Rename the internal directory name to be more friendly - gh_name = f"{self.pipeline}-{self.wf_sha}".split("/")[-1] + gh_name = f"{self.pipeline}-{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}".split("/")[-1] os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, "workflow")) # Make downloaded files executable @@ -795,7 +792,7 @@ def singularity_pull_image(self, container, out_path, cache_path, progress): if lines: # something went wrong with the container retrieval if any("FATAL: " in line for line in lines): - log.info("Singularity container retrieval fialed with the following error:") + log.info("Singularity container retrieval failed with the following error:") log.info("".join(lines)) raise FileNotFoundError(f'The container "{container}" is unavailable.\n{"".join(lines)}') @@ -855,14 +852,15 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa Args: remote_url (str): The URL of the remote repository. Defaults to None. + self.revision (list): The revision to use. A list of strings. commit (str): The commit to clone. Defaults to None. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ self.remote_url = remote_url - self.revision = revision - self.commit = commit + self.revision = [].extend(revision) if revision else [] + self.commit = [].extend(commit) if commit else [] self.hide_progress = hide_progress self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) diff --git a/nf_core/utils.py b/nf_core/utils.py index 3cd09397e3..4b35deac94 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -836,12 +836,13 @@ def prompt_remote_pipeline_name(wfs): raise AssertionError(f"Not able to find pipeline '{pipeline}'") -def prompt_pipeline_release_branch(wf_releases, wf_branches): +def prompt_pipeline_release_branch(wf_releases, wf_branches, multiple=False): """Prompt for pipeline release / branch Args: wf_releases (array): Array of repo releases as returned by the GitHub API wf_branches (array): Array of repo branches, as returned by the GitHub API + multiple (bool): Allow selection of multiple releases & branches (for Tower) Returns: choice (str): Selected release / branch name @@ -863,7 +864,12 @@ def prompt_pipeline_release_branch(wf_releases, wf_branches): if len(choices) == 0: return False - return questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask() + if multiple: + return questionary.checkbox( + "Select release / branch:", choices=choices, style=nfcore_question_style + ).unsafe_ask() + else: + return questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask() def get_repo_releases_branches(pipeline, wfs): From 67664daf6b61122d13ce7381eb110eb2183a0608 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 Feb 2023 15:19:53 +0100 Subject: [PATCH 29/63] Rewrite get_revision_hash() function to accomodate multiple revisions. --- nf_core/download.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index ff06faecdc..87a618cee0 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -241,13 +241,13 @@ def get_revision_hash(self): for revision in self.revision: # revision is a list of strings, but may be of length 1 # Branch if revision in self.wf_branches.keys(): - self.wf_sha[revision].append(self.wf_branches[revision]) + self.wf_sha = {**self.wf_sha, revision: self.wf_branches[revision]} # Revision else: for r in self.wf_revisions: if r["tag_name"] == revision: - self.wf_sha[revision].append(r["tag_sha"]) + self.wf_sha = {**self.wf_sha, revision: r["tag_sha"]} break # Can't find the revisions or branch - throw an error @@ -262,10 +262,13 @@ def get_revision_hash(self): # Set the outdir if not self.outdir: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}-{self.revision[0] if self.revision else ''}" + self.outdir = ( + f"{self.pipeline.replace('/', '-').lower()}-{'_'.join(self.revision) if self.revision else ''}" + ) - # Set the download URL and return - self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}.zip" + if not self.tower and bool(self.wf_sha): + # Set the download URL and return - only applicable for classic downloads + self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0]}.zip" def prompt_container_download(self): """Prompt whether to download container images or not""" From c9cd858b1b3929dfd144077bb00c34b025d89436 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 Feb 2023 15:40:02 +0100 Subject: [PATCH 30/63] The 2nd revivial of the config choice. Now available for archives with only one revision. --- nf_core/download.py | 55 ++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 87a618cee0..41b1fe1458 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -100,6 +100,7 @@ def __init__( self.compress_type = compress_type self.force = force self.tower = tower + self.include_configs = True self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads @@ -126,6 +127,9 @@ def download_workflow(self): ) self.prompt_revision() self.get_revision_hash() + # inclusion of configs is unsuitable for multi-revision repositories. + if len(self.revision) == 1: + self.prompt_config_inclusion() self.prompt_container_download() self.prompt_use_singularity_cachedir() self.prompt_singularity_cachedir_only() @@ -149,6 +153,10 @@ def download_workflow(self): else: summary_log.append(f"Output directory: '{self.outdir}'") + if len(self.revision) == 1: + # Only show entry, if option was prompted. + summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") + summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") # Check that the outdir doesn't already exist @@ -183,14 +191,15 @@ def download_workflow_classic(self): self.download_wf_files() # Download the centralised configs - log.info("Downloading centralised configs from GitHub") - self.download_configs() - try: - self.wf_use_local_configs() - except FileNotFoundError as e: - log.error("Error editing pipeline config file to use local configs!") - log.critical(e) - sys.exit(1) + if self.include_configs: + log.info("Downloading centralised configs from GitHub") + self.download_configs() + try: + self.wf_use_local_configs() + except FileNotFoundError as e: + log.error("Error editing pipeline config file to use local configs!") + log.critical(e) + sys.exit(1) # Download the singularity images if self.container == "singularity": @@ -213,10 +222,12 @@ def download_workflow_tower(self): self.workflow_repo = WorkflowRepo( remote_url=f"git@github.com:{self.pipeline}.git", - revision=self.revision[0] if self.revision else None, - commit=list(self.wf_sha.values())[0] if bool(self.wf_sha) else "", + revision=self.revision if self.revision else None, + commit=self.wf_sha.values if bool(self.wf_sha) else None, ) - log.info("Downloading centralised configs from GitHub") + + if self.include_configs: + log.info("Downloading centralised configs from GitHub") def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -231,10 +242,13 @@ def prompt_revision(self): # If --tower is specified, allow to select multiple revisions if not bool(self.revision): - self.revision.extend( - nf_core.utils.prompt_pipeline_release_branch(self.wf_revisions, self.wf_branches, multiple=self.tower) + temp = nf_core.utils.prompt_pipeline_release_branch( + self.wf_revisions, self.wf_branches, multiple=self.tower ) + # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. + self.revision.append(temp) if isinstance(temp, str) else self.revision.extend(temp) + def get_revision_hash(self): """Find specified revision / branch hash""" @@ -270,6 +284,13 @@ def get_revision_hash(self): # Set the download URL and return - only applicable for classic downloads self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0]}.zip" + def prompt_config_inclusion(self): + """Prompt for inclusion of institutional configurations""" + self.include_configs = questionary.confirm( + "Include the nf-core's default institutional configuration files into the download?", + style=nf_core.utils.nfcore_question_style, + ).ask() + def prompt_container_download(self): """Prompt whether to download container images or not""" @@ -855,12 +876,16 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa Args: remote_url (str): The URL of the remote repository. Defaults to None. - self.revision (list): The revision to use. A list of strings. - commit (str): The commit to clone. Defaults to None. + self.revision (list of str): The revision to use. A list of strings. + commit (dict of str): The commit to clone. Defaults to None. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ + import pdb + + pdb.set_trace() + self.remote_url = remote_url self.revision = [].extend(revision) if revision else [] self.commit = [].extend(commit) if commit else [] From 2cfd1599eb8bccb434d6371cba4a2987f3cd9049 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Sat, 4 Mar 2023 15:23:43 +0100 Subject: [PATCH 31/63] Inclusion of the revision in the output file name is problematic with the new ability to download multiple revisions at once. This resulted in loooooooong filenames. --- nf_core/download.py | 37 ++++++++++++++++++++++++++++--------- nf_core/utils.py | 18 +++++++++++++----- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 41b1fe1458..f6646d29ab 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,6 +3,7 @@ from __future__ import print_function import concurrent.futures +from datetime import datetime import git from git.exc import GitCommandError, InvalidGitRepositoryError import io @@ -139,7 +140,10 @@ def download_workflow(self): log.critical(e) sys.exit(1) - summary_log = [f"Pipeline revision: '{','.join(self.revision)}'", f"Pull containers: '{self.container}'"] + summary_log = [ + f"Pipeline revision: '{','.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", + f"Pull containers: '{self.container}'", + ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") @@ -242,12 +246,29 @@ def prompt_revision(self): # If --tower is specified, allow to select multiple revisions if not bool(self.revision): - temp = nf_core.utils.prompt_pipeline_release_branch( + (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( self.wf_revisions, self.wf_branches, multiple=self.tower ) - # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. - self.revision.append(temp) if isinstance(temp, str) else self.revision.extend(temp) + # The checkbox() prompt unfortunately does not support passing a Validator, + # so a user who keeps pressing Enter will bump through the selection + + # bool(choice), bool(tag_set): + ############################# + # True, True: A choice was made and revisions were available. + # False, True: No selection was made, but revisions were available -> defaults to all available. + # False, False: No selection was made because no revisions were available -> raise AssertionError. + # True, False: Congratulations, you found a bug! That combo shouldn't happen. + + if bool(choice): + # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. + self.revision.append(choice) if isinstance(choice, str) else self.revision.extend(choice) + else: + if bool(tag_set): + self.revision = tag_set + log.info("No particular revision was selected, all available will be downloaded.") + else: + raise AssertionError(f"No revisions of {self.pipeline} available for download.") def get_revision_hash(self): """Find specified revision / branch hash""" @@ -276,9 +297,7 @@ def get_revision_hash(self): # Set the outdir if not self.outdir: - self.outdir = ( - f"{self.pipeline.replace('/', '-').lower()}-{'_'.join(self.revision) if self.revision else ''}" - ) + self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" if not self.tower and bool(self.wf_sha): # Set the download URL and return - only applicable for classic downloads @@ -876,8 +895,8 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa Args: remote_url (str): The URL of the remote repository. Defaults to None. - self.revision (list of str): The revision to use. A list of strings. - commit (dict of str): The commit to clone. Defaults to None. + self.revision (list of str): The revisions to include. A list of strings. + commits (dict of str): The checksums to linked with the revisions. no_pull (bool, optional): Whether to skip the pull step. Defaults to False. hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. diff --git a/nf_core/utils.py b/nf_core/utils.py index 4b35deac94..3ddce9b870 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -847,29 +847,37 @@ def prompt_pipeline_release_branch(wf_releases, wf_branches, multiple=False): Returns: choice (str): Selected release / branch name """ - # Prompt user for release tag + # Prompt user for release tag, tag_set will contain all available. choices = [] + tag_set = [] # Releases if len(wf_releases) > 0: for tag in map(lambda release: release.get("tag_name"), wf_releases): tag_display = [("fg:ansiblue", f"{tag} "), ("class:choice-default", "[release]")] choices.append(questionary.Choice(title=tag_display, value=tag)) + tag_set.append(tag) # Branches for branch in wf_branches.keys(): branch_display = [("fg:ansiyellow", f"{branch} "), ("class:choice-default", "[branch]")] choices.append(questionary.Choice(title=branch_display, value=branch)) + tag_set.append(branch) if len(choices) == 0: return False if multiple: - return questionary.checkbox( - "Select release / branch:", choices=choices, style=nfcore_question_style - ).unsafe_ask() + return ( + questionary.checkbox("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask(), + tag_set, + ) + else: - return questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask() + return ( + questionary.select("Select release / branch:", choices=choices, style=nfcore_question_style).unsafe_ask(), + tag_set, + ) def get_repo_releases_branches(pipeline, wfs): From 3b7019fcd0ff274e1e7c45e7e246d0cda79f9e3f Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 8 Mar 2023 18:33:17 +0100 Subject: [PATCH 32/63] Allow multiple instances of the -r argument. Needed for scripted download. Ultimately, this now means that I also have to implement multiple version downloads for the classic download. Just downloading the first doesn't seem to make sense from a UX perspective. --- nf_core/__main__.py | 7 ++++++- nf_core/download.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 521454eb99..056242aac2 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -209,7 +209,12 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all # nf-core download @nf_core_cli.command() @click.argument("pipeline", required=False, metavar="") -@click.option("-r", "--revision", type=str, help="Pipeline release") +@click.option( + "-r", + "--revision", + multiple=True, + help="Pipeline release to download. Multiple invocations are possible.", +) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" diff --git a/nf_core/download.py b/nf_core/download.py index f6646d29ab..61d4f2c1c8 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -95,7 +95,12 @@ def __init__( parallel_downloads=4, ): self.pipeline = pipeline - self.revision = [].extend(revision) if revision else [] + if isinstance(revision, str): + self.revision = [revision] + elif isinstance(revision, tuple): + self.revision = [*revision] + else: + self.revision = [] self.outdir = outdir self.output_filename = None self.compress_type = compress_type From a92c3f41a7d8d1f88cb4d1c9c3c5c289f3cc1ee2 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 Mar 2023 16:02:23 +0200 Subject: [PATCH 33/63] Finished updating the prompts for the dialogues. --- nf_core/download.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 61d4f2c1c8..ad8da971d1 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -133,8 +133,8 @@ def download_workflow(self): ) self.prompt_revision() self.get_revision_hash() - # inclusion of configs is unsuitable for multi-revision repositories. - if len(self.revision) == 1: + # inclusion of configs is unnecessary for Tower. + if not self.tower: self.prompt_config_inclusion() self.prompt_container_download() self.prompt_use_singularity_cachedir() @@ -146,7 +146,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{','.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -187,12 +187,6 @@ def download_workflow(self): # Summary log log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) - # Actually download the workflow - if not self.tower: - self.download_workflow_classic() - else: - self.download_workflow_tower() - def download_workflow_classic(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" # Download the pipeline files @@ -256,7 +250,7 @@ def prompt_revision(self): ) # The checkbox() prompt unfortunately does not support passing a Validator, - # so a user who keeps pressing Enter will bump through the selection + # so a user who keeps pressing Enter will bump through the selection without choice. # bool(choice), bool(tag_set): ############################# @@ -266,7 +260,7 @@ def prompt_revision(self): # True, False: Congratulations, you found a bug! That combo shouldn't happen. if bool(choice): - # have to make sure that self.revision is a list of strings, regardless if temp is str or list of strings. + # have to make sure that self.revision is a list of strings, regardless if choice is str or list of strings. self.revision.append(choice) if isinstance(choice, str) else self.revision.extend(choice) else: if bool(tag_set): @@ -302,7 +296,10 @@ def get_revision_hash(self): # Set the outdir if not self.outdir: - self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" + if len(self.wf_sha) > 1: + self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}" + else: + self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{self.revision[0]}" if not self.tower and bool(self.wf_sha): # Set the download URL and return - only applicable for classic downloads From cee66b519b2c243f4cbb2c78b3ef75030a7674f8 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 13 Apr 2023 17:37:20 +0200 Subject: [PATCH 34/63] Converted the self.wf_download_url into a dict. --- nf_core/download.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index ad8da971d1..8e2ead0f86 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -114,7 +114,7 @@ def __init__( self.wf_revisions = {} self.wf_branches = {} self.wf_sha = {} - self.wf_download_url = None + self.wf_download_url = {} self.nf_config = {} self.containers = [] @@ -162,7 +162,7 @@ def download_workflow(self): else: summary_log.append(f"Output directory: '{self.outdir}'") - if len(self.revision) == 1: + if not self.tower: # Only show entry, if option was prompted. summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") @@ -187,8 +187,20 @@ def download_workflow(self): # Summary log log.info("Saving '{}'\n {}".format(self.pipeline, "\n ".join(summary_log))) + # Perform the actual download + if self.tower: + # self.download_workflow_tower() + pass + else: + self.download_workflow_classic() + def download_workflow_classic(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" + + import pdb + + pdb.set_trace() + # Download the pipeline files log.info("Downloading workflow files from GitHub") self.download_wf_files() @@ -301,9 +313,13 @@ def get_revision_hash(self): else: self.outdir = f"{self.pipeline.replace('/', '-').lower()}_{self.revision[0]}" - if not self.tower and bool(self.wf_sha): - # Set the download URL and return - only applicable for classic downloads - self.wf_download_url = f"https://github.com/{self.pipeline}/archive/{list(self.wf_sha.values())[0]}.zip" + if not self.tower: + for revision, wf_sha in self.wf_sha.items(): + # Set the download URL and return - only applicable for classic downloads + self.wf_download_url = { + **self.wf_download_url, + revision: f"https://github.com/{self.pipeline}/archive/{wf_sha}.zip", + } def prompt_config_inclusion(self): """Prompt for inclusion of institutional configurations""" @@ -903,9 +919,6 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ - import pdb - - pdb.set_trace() self.remote_url = remote_url self.revision = [].extend(revision) if revision else [] From 8708d315c65917237d3746446347a03205b451a1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 14 Apr 2023 17:08:09 +0200 Subject: [PATCH 35/63] Enable multi-revision classic download. --- nf_core/download.py | 135 ++++++++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 42 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8e2ead0f86..3327f7580d 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -197,28 +197,32 @@ def download_workflow(self): def download_workflow_classic(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" - import pdb - - pdb.set_trace() - - # Download the pipeline files - log.info("Downloading workflow files from GitHub") - self.download_wf_files() - - # Download the centralised configs + # Download the centralised configs first if self.include_configs: log.info("Downloading centralised configs from GitHub") self.download_configs() - try: - self.wf_use_local_configs() - except FileNotFoundError as e: - log.error("Error editing pipeline config file to use local configs!") - log.critical(e) - sys.exit(1) + + # Download the pipeline files for each selected revision + log.info("Downloading workflow files from GitHub") + + for item in zip(self.revision, self.wf_sha.values(), self.wf_download_url.values()): + revision_dirname = self.download_wf_files(revision=item[0], wf_sha=item[1], download_url=item[2]) + + if self.include_configs: + try: + self.wf_use_local_configs(revision_dirname) + except FileNotFoundError as e: + log.error("Error editing pipeline config file to use local configs!") + log.critical(e) + sys.exit(1) + + # Collect all required singularity images + if self.container == "singularity": + self.find_container_images(revision_dirname) # Download the singularity images if self.container == "singularity": - self.find_container_images() + log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") try: self.get_singularity_images() except OSError as e: @@ -442,24 +446,29 @@ def prompt_compression_type(self): if self.compress_type == "none": self.compress_type = None - def download_wf_files(self): + def download_wf_files(self, revision, wf_sha, download_url): """Downloads workflow files from GitHub to the :attr:`self.outdir`.""" - log.debug(f"Downloading {self.wf_download_url}") + log.debug(f"Downloading {download_url}") # Download GitHub zip file into memory and extract - url = requests.get(self.wf_download_url) + url = requests.get(download_url) with ZipFile(io.BytesIO(url.content)) as zipfile: zipfile.extractall(self.outdir) + # create a filesystem-safe version of the revision name for the directory + revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision) + # Rename the internal directory name to be more friendly - gh_name = f"{self.pipeline}-{list(self.wf_sha.values())[0] if bool(self.wf_sha) else ''}".split("/")[-1] - os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, "workflow")) + gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1] + os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, revision_dirname)) # Make downloaded files executable - for dirpath, _, filelist in os.walk(os.path.join(self.outdir, "workflow")): + for dirpath, _, filelist in os.walk(os.path.join(self.outdir, revision_dirname)): for fname in filelist: os.chmod(os.path.join(dirpath, fname), 0o775) + return revision_dirname + def download_configs(self): """Downloads the centralised config profiles from nf-core/configs to :attr:`self.outdir`.""" configs_zip_url = "https://github.com/nf-core/configs/archive/master.zip" @@ -479,9 +488,9 @@ def download_configs(self): for fname in filelist: os.chmod(os.path.join(dirpath, fname), 0o775) - def wf_use_local_configs(self): + def wf_use_local_configs(self, revision_dirname): """Edit the downloaded nextflow.config file to use the local config files""" - nfconfig_fn = os.path.join(self.outdir, "workflow", "nextflow.config") + nfconfig_fn = os.path.join(self.outdir, revision_dirname, "nextflow.config") find_str = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" repl_str = "${projectDir}/../configs/" log.debug(f"Editing 'params.custom_config_base' in '{nfconfig_fn}'") @@ -507,7 +516,7 @@ def wf_use_local_configs(self): with open(nfconfig_fn, "w") as nfconfig_fh: nfconfig_fh.write(nfconfig) - def find_container_images(self): + def find_container_images(self, revision_dirname): """Find container image names for workflow. Starts by using `nextflow config` to pull out any process.container @@ -533,15 +542,23 @@ def find_container_images(self): 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 'biocontainers/fastqc:0.11.9--0' }" + Later DSL2, variable is being used: + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/${container_id}" : + "quay.io/biocontainers/${container_id}" }" + + container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' + DSL1 / Special case DSL2: container "nfcore/cellranger:6.0.2" """ log.debug("Fetching container names for workflow") - containers_raw = [] + # since this is run for multiple versions now, account for previous invocations + containers_raw = [] if not self.containers else self.containers # Use linting code to parse the pipeline nextflow config - self.nf_config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, "workflow")) + self.nf_config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, revision_dirname)) # Find any config variables that look like a container for k, v in self.nf_config.items(): @@ -549,7 +566,7 @@ def find_container_images(self): containers_raw.append(v.strip('"').strip("'")) # Recursive search through any DSL2 module files for container spec lines. - for subdir, _, files in os.walk(os.path.join(self.outdir, "workflow", "modules")): + for subdir, _, files in os.walk(os.path.join(self.outdir, revision_dirname, "modules")): for file in files: if file.endswith(".nf"): file_path = os.path.join(subdir, file) @@ -569,18 +586,54 @@ def find_container_images(self): break # Prioritise http, exit loop as soon as we find it # No https download, is the entire container string a docker URI? - else: - # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980 - docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(? 1 else ''}") - def get_singularity_images(self): """Loop through container names and download Singularity images""" From e545df529dd8816789cbe7928df914e481f2d3f9 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 17 Apr 2023 15:47:00 +0200 Subject: [PATCH 36/63] Small tweaks to ensure that tools doesn't bail out if there is no symlink from singularity to apptainer on the system. --- nf_core/download.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 3327f7580d..a09e457e36 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -689,8 +689,11 @@ def get_singularity_images(self): containers_pull.append([container, out_path, cache_path]) # Exit if we need to pull images and Singularity is not installed - if len(containers_pull) > 0 and shutil.which("singularity") is None: - raise OSError("Singularity is needed to pull images, but it is not installed") + if len(containers_pull) > 0: + if not shutil.which("singularity") or not shutil.which("apptainer"): + raise OSError( + "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" + ) # Go through each method of fetching containers in order for container in containers_exist: @@ -881,7 +884,12 @@ def singularity_pull_image(self, container, out_path, cache_path, progress): # Pull using singularity address = f"docker://{container.replace('docker://', '')}" - singularity_command = ["singularity", "pull", "--name", output_path, address] + if shutil.which("singularity"): + singularity_command = ["singularity", "pull", "--name", output_path, address] + elif shutil.which("apptainer"): + singularity_command = ["apptainer", "pull", "--name", output_path, address] + else: + raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") log.debug(f"Building singularity image: {address}") log.debug(f"Singularity command: {' '.join(singularity_command)}") From 23cfbf64915593674a1315c2e3cfeaded51ecfff Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 18 Apr 2023 18:33:49 +0200 Subject: [PATCH 37/63] Initialise the Git repo clone of the workflow. --- nf_core/download.py | 80 +++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 50 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index a09e457e36..8c66517972 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -189,8 +189,7 @@ def download_workflow(self): # Perform the actual download if self.tower: - # self.download_workflow_tower() - pass + self.download_workflow_tower() else: self.download_workflow_classic() @@ -242,9 +241,14 @@ def download_workflow_tower(self): self.workflow_repo = WorkflowRepo( remote_url=f"git@github.com:{self.pipeline}.git", revision=self.revision if self.revision else None, - commit=self.wf_sha.values if bool(self.wf_sha) else None, + commit=self.wf_sha.values() if bool(self.wf_sha) else None, + in_cache=False, ) + import pdb + + pdb.set_trace() + if self.include_configs: log.info("Downloading centralised configs from GitHub") @@ -457,6 +461,9 @@ def download_wf_files(self, revision, wf_sha, download_url): # create a filesystem-safe version of the revision name for the directory revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision) + # account for name collisions, if there is a branch / release named "configs" or "singularity-images" + if revision_dirname in ["configs", "singularity-images"]: + revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", self.pipeline + revision_dirname) # Rename the internal directory name to be more friendly gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1] @@ -980,16 +987,25 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa """ self.remote_url = remote_url - self.revision = [].extend(revision) if revision else [] - self.commit = [].extend(commit) if commit else [] - self.hide_progress = hide_progress + if isinstance(revision, str): + self.revision = [revision] + elif isinstance(revision, list): + self.revision = [*revision] + else: + self.revision = [] + if isinstance(commit, str): + self.commit = [commit] + elif isinstance(revision, list): + self.commit = [*commit] + else: + self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, commit, hide_progress, in_cache=in_cache) + self.setup_local_repo(remote_url, commit=None, in_cache=in_cache) def __repr__(self): """Called by print, creates representation of object""" - return f"" + return f"" def retry_setup_local_repo(self): if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): @@ -999,7 +1015,7 @@ def retry_setup_local_repo(self): else: raise LookupError("Exiting due to error with local modules git repo") - def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): + def setup_local_repo(self, remote, commit=None, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -1007,7 +1023,7 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): Args: remote (str): git url of remote - branch (str): name of branch to use + commit (str): name of branch to checkout from (optional) hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. Sets self.repo @@ -1022,7 +1038,7 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None, ) with pbar: self.repo = git.Repo.clone_from( @@ -1045,7 +1061,7 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None, ) with pbar: self.repo.remotes.origin.fetch( @@ -1057,41 +1073,5 @@ def setup_local_repo(self, remote, commit, hide_progress=False, in_cache=True): log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") self.retry_setup_local_repo() finally: - self.repo.git.checkout(commit) - - def add_nfcore_configs(self, commit, hide_progress=False): - """ - Pulls the configuration profiles from the nf-core/config repository on GitHub. - - Args: - commit: The config version to pull - hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. - Sets self.repo - """ - - try: - if os.path.exists(self.local_repo_dir): - try: - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - disable=hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, - ) - with pbar: - self.configs = git.Submodule.add( - self.repo, - "nf-core configuration", - "./conf_institutional", - f"git@github.com:nf-core/configs.git", - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Adding configuration"), - ) - except GitCommandError: - raise LookupError(f"Failed to retrieve configuration: `{remote}`") - - except (GitCommandError, InvalidGitRepositoryError) as e: - log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") - self.retry_setup_local_repo() - finally: - self.repo.git.checkout(commit) + if commit: + self.repo.git.checkout(commit) From d94354f3bc175d7c014e2a91726c4ee9944fd4a4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 19 Apr 2023 18:25:01 +0200 Subject: [PATCH 38/63] WorkflowRepo attributes and functions. --- nf_core/download.py | 68 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8c66517972..43a27e2f51 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -245,12 +245,29 @@ def download_workflow_tower(self): in_cache=False, ) - import pdb + # Remove tags for those revisions that had not been selected + self.workflow_repo.tidy_tags() - pdb.set_trace() + # extract the required containers + if self.container == "singularity": + for commit in self.wf_sha.values(): + # Checkout the repo in the current revision + self.workflow_repo.checkout(commit) + # Collect all required singularity images + self.find_container_images(self.workflow_repo.access()) + + # Download the singularity images + log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") + try: + self.get_singularity_images() + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) - if self.include_configs: - log.info("Downloading centralised configs from GitHub") + # Compress into an archive + if self.compress_type is not None: + log.info("Compressing images") + self.compress_download() def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -973,7 +990,15 @@ class WorkflowRepo(SyncedRepo): """ - def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=False, in_cache=True): + def __init__( + self, + remote_url, + revision, + commit, + no_pull=False, + hide_progress=False, + in_cache=True, + ): """ Initializes the object and clones the workflows git repository if it is not already present @@ -985,7 +1010,6 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. """ - self.remote_url = remote_url if isinstance(revision, str): self.revision = [revision] @@ -1001,11 +1025,23 @@ def __init__(self, remote_url, revision, commit, no_pull=False, hide_progress=Fa self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) - self.setup_local_repo(remote_url, commit=None, in_cache=in_cache) + self.setup_local_repo(remote_url, in_cache=in_cache) + + # expose some instance attributes + self.tags = self.repo.tags def __repr__(self): """Called by print, creates representation of object""" - return f"" + return f"" + + def access(self): + if os.path.exists(self.local_repo_dir): + return self.local_repo_dir + else: + return None + + def checkout(self, commit): + return super().checkout(commit) def retry_setup_local_repo(self): if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): @@ -1015,7 +1051,7 @@ def retry_setup_local_repo(self): else: raise LookupError("Exiting due to error with local modules git repo") - def setup_local_repo(self, remote, commit=None, in_cache=True): + def setup_local_repo(self, remote, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -1072,6 +1108,14 @@ def setup_local_repo(self, remote, commit=None, in_cache=True): except (GitCommandError, InvalidGitRepositoryError) as e: log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") self.retry_setup_local_repo() - finally: - if commit: - self.repo.git.checkout(commit) + + def tidy_tags(self): + """ + Function to delete all tags that point to revisions that are not of interest to the downloader. + This allows a clutter-free experience in Tower. The commits are evidently still available. + """ + if self.revision and self.repo and self.repo.tags: + for tag in self.repo.tags: + if tag.name not in self.revision: + self.repo.delete_tag(tag) + self.tags = self.repo.tags From 7dbcca43aa8dd69a638a93dd354b7b725b280006 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 21 Apr 2023 16:06:57 +0200 Subject: [PATCH 39/63] Finished the Tower download branch. --- nf_core/download.py | 76 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 43a27e2f51..e8fafa5b1a 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -133,12 +133,13 @@ def download_workflow(self): ) self.prompt_revision() self.get_revision_hash() - # inclusion of configs is unnecessary for Tower. + # Inclusion of configs is unnecessary for Tower. if not self.tower: self.prompt_config_inclusion() self.prompt_container_download() self.prompt_use_singularity_cachedir() self.prompt_singularity_cachedir_only() + # Nothing meaningful to compress here. if not self.tower: self.prompt_compression_type() except AssertionError as e: @@ -230,7 +231,7 @@ def download_workflow_classic(self): # Compress into an archive if self.compress_type is not None: - log.info("Compressing download..") + log.info("Compressing output into archive") self.compress_download() def download_workflow_tower(self): @@ -248,6 +249,9 @@ def download_workflow_tower(self): # Remove tags for those revisions that had not been selected self.workflow_repo.tidy_tags() + # create a bare clone of the modified repository needed for Tower + self.workflow_repo.bare_clone(os.path.join(self.outdir, self.output_filename)) + # extract the required containers if self.container == "singularity": for commit in self.wf_sha.values(): @@ -264,10 +268,9 @@ def download_workflow_tower(self): log.critical(f"[red]{e}[/]") sys.exit(1) - # Compress into an archive - if self.compress_type is not None: - log.info("Compressing images") - self.compress_download() + # Justify why compression is skipped for Tower downloads (Prompt is not shown, but CLI argument could have been set) + if self.compress_type is not None: + log.info("Compression choice is ignored for Tower downloads since nothing can be reasonably compressed.") def prompt_pipeline_name(self): """Prompt for the pipeline name if not set with a flag""" @@ -1019,11 +1022,12 @@ def __init__( self.revision = [] if isinstance(commit, str): self.commit = [commit] - elif isinstance(revision, list): + elif isinstance(commit, list): self.commit = [*commit] else: self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) + self.retries = 0 # retries for setting up the locally cached repository self.setup_local_repo(remote_url, in_cache=in_cache) @@ -1043,13 +1047,24 @@ def access(self): def checkout(self, commit): return super().checkout(commit) - def retry_setup_local_repo(self): - if rich.prompt.Confirm.ask(f"[violet]Delete local cache '{self.local_repo_dir}' and try again?"): - log.info(f"Removing '{self.local_repo_dir}'") + def retry_setup_local_repo(self, skip_confirm=False): + self.retries += 1 + if skip_confirm or rich.prompt.Confirm.ask( + f"[violet]Delete local cache '{self.local_repo_dir}' and try again?" + ): + if ( + self.retries > 1 + ): # One unconfirmed retry is acceptable, but prevent infinite loops without user interaction. + log.error( + f"Errors with locally cached repository of '{self.fullname}'. Please delete '{self.local_repo_dir}' manually and try again." + ) + sys.exit(1) + if not skip_confirm: # Feedback to user for manual confirmation. + log.info(f"Removing '{self.local_repo_dir}'") shutil.rmtree(self.local_repo_dir) - self.setup_local_repo(self.remote, self.commit, self.hide_progress) + self.setup_local_repo(self.remote_url, in_cache=False) else: - raise LookupError("Exiting due to error with local modules git repo") + raise LookupError("Exiting due to error with locally cached Git repository.") def setup_local_repo(self, remote, in_cache=True): """ @@ -1113,9 +1128,38 @@ def tidy_tags(self): """ Function to delete all tags that point to revisions that are not of interest to the downloader. This allows a clutter-free experience in Tower. The commits are evidently still available. + + However, due to local caching, the downloader might also want access to revisions that had been deleted before. + In that case, don't bother with re-adding the tags and rather download anew from Github. """ if self.revision and self.repo and self.repo.tags: - for tag in self.repo.tags: - if tag.name not in self.revision: - self.repo.delete_tag(tag) - self.tags = self.repo.tags + desired_tags = self.revision.copy() + try: + for tag in self.repo.tags: + if tag.name not in self.revision: + self.repo.delete_tag(tag) + else: + desired_tags.remove(tag.name) + self.tags = self.repo.tags + if len(desired_tags) > 0: + log.info( + f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_tags)}. Downloading anew from GitHub..." + ) + self.retry_setup_local_repo(skip_confirm=True) + self.tidy_tags() + except (GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Adapting your pipeline download unfortunately failed:[/]\n{e}\n") + self.retry_setup_local_repo(skip_confirm=True) + sys.exit(1) + + def bare_clone(self, destination): + if self.repo: + try: + destfolder = os.path.abspath(destination) + if not os.path.exists(destfolder): + os.makedirs(destfolder) + if os.path.exists(destination): + shutil.rmtree(os.path.abspath(destination)) + self.repo.clone(os.path.abspath(destination), bare=True) + except (OSError, GitCommandError, InvalidGitRepositoryError) as e: + log.error(f"[red]Failure to create the pipeline download[/]\n{e}\n") From faac11b0de7ba1a148e26f24688ee2045ddcb2bd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 24 Apr 2023 14:55:24 +0200 Subject: [PATCH 40/63] Minor tweaks to the container download functionality. --- nf_core/download.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index e8fafa5b1a..cba7eb7dfb 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -235,7 +235,7 @@ def download_workflow_classic(self): self.compress_download() def download_workflow_tower(self): - """Create a bare-cloned git repository of the workflow that includes the configurations, such it can be launched with `tw launch` as file:/ pipeline""" + """Create a bare-cloned git repository of the workflow, such it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") @@ -261,12 +261,12 @@ def download_workflow_tower(self): self.find_container_images(self.workflow_repo.access()) # Download the singularity images - log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") - try: - self.get_singularity_images() - except OSError as e: - log.critical(f"[red]{e}[/]") - sys.exit(1) + log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") + try: + self.get_singularity_images() + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) # Justify why compression is skipped for Tower downloads (Prompt is not shown, but CLI argument could have been set) if self.compress_type is not None: @@ -581,7 +581,7 @@ def find_container_images(self, revision_dirname): """ log.debug("Fetching container names for workflow") - # since this is run for multiple versions now, account for previous invocations + # since this is run for multiple revisions now, account for previously detected containers. containers_raw = [] if not self.containers else self.containers # Use linting code to parse the pipeline nextflow config From 9ca946db597773dbb46fbd4a6883194103b1dd31 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 24 Apr 2023 16:19:27 +0200 Subject: [PATCH 41/63] Updating docs and changelog, fixing linting errors. --- CHANGELOG.md | 4 ++++ README.md | 7 +++++++ nf_core/download.py | 8 ++++---- nf_core/modules/modules_repo.py | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b063983ab4..aec3d9360e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,10 @@ - Removed `quay.io` from all module Docker container references as this is now supplied at pipeline level. ([#2249](https://github.com/nf-core/tools/pull/2249)) - Remove `CITATION.cff` file from pipeline template, to avoid that pipeline Zenodo entries reference the nf-core publication instead of the pipeline ([#2059](https://github.com/nf-core/tools/pull/2059)). +### Download + +- Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). + ### Linting - Update modules lint test to fail if enable_conda is found ([#2213](https://github.com/nf-core/tools/pull/2213)) diff --git a/README.md b/README.md index 0de42e86e8..13d8b381a3 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ A python package with helper tools for the nf-core community. - [`nf-core list` - List available pipelines](#listing-pipelines) - [`nf-core launch` - Run a pipeline with interactive parameter prompts](#launch-a-pipeline) - [`nf-core download` - Download pipeline for offline use](#downloading-pipelines-for-offline-use) +- [`nf-core download --tower` - Download pipeline for Tower](#downloading-pipelines-for-tower) - [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) - [`nf-core create` - Create a new pipeline with the nf-core template](#creating-a-new-pipeline) - [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) @@ -401,6 +402,12 @@ Note that compressing many GBs of binary files can be slow, so specifying `--com If the download speeds are much slower than your internet connection is capable of, you can set `--parallel-downloads` to a large number to download loads of images at once. +### Adapting downloads to Nextflow Tower + +[seqeralabs® Nextflow Tower](https://cloud.tower.nf/) provides a graphical user interface to oversee pipeline runs, gather statistics and configure compute resources. While pipelines added to _Tower_ are preferably hosted at a Git service, providing them as disconnected, self-reliant repositories is also possible for premises with restricted network access. Choosing the `--tower` flag will download the pipeline in an appropriate form. + +Subsequently, the `*.git` folder can be moved to it's final destination and linked with a pipeline in _Tower_ using the `file:/` prefix. + ## Pipeline software licences Sometimes it's useful to see the software licences of the tools used in a pipeline. diff --git a/nf_core/download.py b/nf_core/download.py index cba7eb7dfb..9fe9c29c9f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -3,9 +3,6 @@ from __future__ import print_function import concurrent.futures -from datetime import datetime -import git -from git.exc import GitCommandError, InvalidGitRepositoryError import io import logging import os @@ -15,19 +12,22 @@ import sys import tarfile import textwrap +from datetime import datetime from zipfile import ZipFile +import git import questionary import requests import requests_cache import rich import rich.progress +from git.exc import GitCommandError, InvalidGitRepositoryError import nf_core import nf_core.list import nf_core.utils -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR from nf_core.synced_repo import RemoteProgressbar, SyncedRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR log = logging.getLogger(__name__) stderr = rich.console.Console( diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 20d581af84..152ed7b0c0 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -11,8 +11,8 @@ import nf_core.modules.modules_json import nf_core.modules.modules_utils -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config from nf_core.synced_repo import RemoteProgressbar, SyncedRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR, load_tools_config log = logging.getLogger(__name__) From 7204862e605959fcddaf4e10e4e5d3b2a6555f04 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 24 Apr 2023 21:44:04 +0200 Subject: [PATCH 42/63] Hopefully fixed the existing tests. New ones still need to be written. --- nf_core/download.py | 2 +- tests/test_cli.py | 4 +++- tests/test_download.py | 38 +++++++++++++++++++++----------------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 9fe9c29c9f..53bb744184 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -717,7 +717,7 @@ def get_singularity_images(self): # Exit if we need to pull images and Singularity is not installed if len(containers_pull) > 0: - if not shutil.which("singularity") or not shutil.which("apptainer"): + if not (shutil.which("singularity") or shutil.which("apptainer")): raise OSError( "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0a6b37144d..6f51fe1025 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -165,6 +165,7 @@ def test_cli_download(self, mock_dl): "outdir": "/path/outdir", "compress": "tar.gz", "force": None, + "tower": None, "container": "singularity", "singularity-cache-only": None, "parallel-downloads": 2, @@ -177,10 +178,11 @@ def test_cli_download(self, mock_dl): mock_dl.assert_called_once_with( cmd[-1], - params["revision"], + (params["revision"],), params["outdir"], params["compress"], "force" in params, + "tower" in params, params["container"], "singularity-cache-only" in params, params["parallel-downloads"], diff --git a/tests/test_download.py b/tests/test_download.py index e2ae882394..d1a770a630 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -32,10 +32,10 @@ def test_get_release_hash_release(self): download_obj.wf_branches, ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() - assert download_obj.wf_sha == "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" - assert download_obj.outdir == "nf-core-methylseq-1.6" + assert download_obj.wf_sha[download_obj.revision[0]] == "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" + assert download_obj.outdir == "nf-core-methylseq_1.6" assert ( - download_obj.wf_download_url + download_obj.wf_download_url[download_obj.revision[0]] == "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" ) @@ -51,10 +51,10 @@ def test_get_release_hash_branch(self): download_obj.wf_branches, ) = nf_core.utils.get_repo_releases_branches(pipeline, wfs) download_obj.get_revision_hash() - assert download_obj.wf_sha == "819cbac792b76cf66c840b567ed0ee9a2f620db7" - assert download_obj.outdir == "nf-core-exoseq-dev" + assert download_obj.wf_sha[download_obj.revision[0]] == "819cbac792b76cf66c840b567ed0ee9a2f620db7" + assert download_obj.outdir == "nf-core-exoseq_dev" assert ( - download_obj.wf_download_url + download_obj.wf_download_url[download_obj.revision[0]] == "https://github.com/nf-core/exoseq/archive/819cbac792b76cf66c840b567ed0ee9a2f620db7.zip" ) @@ -78,12 +78,16 @@ def test_get_release_hash_non_existent_release(self): def test_download_wf_files(self, outdir): download_obj = DownloadWorkflow(pipeline="nf-core/methylseq", revision="1.6") download_obj.outdir = outdir - download_obj.wf_sha = "b3e5e3b95aaf01d98391a62a10a3990c0a4de395" - download_obj.wf_download_url = ( - "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" + download_obj.wf_sha = {"1.6": "b3e5e3b95aaf01d98391a62a10a3990c0a4de395"} + download_obj.wf_download_url = { + "1.6": "https://github.com/nf-core/methylseq/archive/b3e5e3b95aaf01d98391a62a10a3990c0a4de395.zip" + } + rev = download_obj.download_wf_files( + download_obj.revision[0], + download_obj.wf_sha[download_obj.revision[0]], + download_obj.wf_download_url[download_obj.revision[0]], ) - download_obj.download_wf_files() - assert os.path.exists(os.path.join(outdir, "workflow", "main.nf")) + assert os.path.exists(os.path.join(outdir, rev, "main.nf")) # # Tests for 'download_configs' @@ -118,7 +122,7 @@ def test_wf_use_local_configs(self, tmp_path): download_obj.download_configs() # Test the function - download_obj.wf_use_local_configs() + download_obj.wf_use_local_configs("workflow") wf_config = nf_core.utils.fetch_wf_config(os.path.join(test_outdir, "workflow"), cache_config=False) assert wf_config["params.custom_config_base"] == f"'{test_outdir}/workflow/../configs/'" @@ -133,14 +137,14 @@ def test_find_container_images(self, tmp_path, mock_fetch_wf_config): "process.mapping.container": "cutting-edge-container", "process.nocontainer": "not-so-cutting-edge", } - download_obj.find_container_images() + download_obj.find_container_images("workflow") assert len(download_obj.containers) == 1 assert download_obj.containers[0] == "cutting-edge-container" # # Tests for 'singularity_pull_image' # - # If Singularity is installed, but the container can't be accessed because it does not exist or there are aceess + # If Singularity is installed, but the container can't be accessed because it does not exist or there are access # restrictions, a FileNotFoundError is raised due to the unavailability of the image. @pytest.mark.skipif( shutil.which("singularity") is None, @@ -153,16 +157,16 @@ def test_singularity_pull_image_singularity_installed(self, tmp_dir, mock_rich_p with pytest.raises(FileNotFoundError): download_obj.singularity_pull_image("a-container", tmp_dir, None, mock_rich_progress) - # If Singularity is not installed, it raises a FileNotFoundError because the singularity command can't be found. + # If Singularity is not installed, it raises a OSError because the singularity command can't be found. @pytest.mark.skipif( shutil.which("singularity") is not None, - reason="Can't test how the code behaves when sungularity is not installed if it is.", + reason="Can't test how the code behaves when singularity is not installed if it is.", ) @with_temporary_folder @mock.patch("rich.progress.Progress.add_task") def test_singularity_pull_image_singularity_not_installed(self, tmp_dir, mock_rich_progress): download_obj = DownloadWorkflow(pipeline="dummy", outdir=tmp_dir) - with pytest.raises(FileNotFoundError): + with pytest.raises(OSError): download_obj.singularity_pull_image("a-container", tmp_dir, None, mock_rich_progress) # From 3a60a54fe8f9371bf5fe9a61272f8024276bff1b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 25 Apr 2023 14:18:58 +0200 Subject: [PATCH 43/63] Refactor the CLI commands for the Singularity Cache Dir --- nf_core/__main__.py | 36 +++++- nf_core/download.py | 287 +++++++++++++++++++++++++++++--------------- 2 files changed, 220 insertions(+), 103 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 056242aac2..46e9ac0988 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -225,11 +225,30 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) @click.option( - "--singularity-cache-only/--singularity-cache-copy", - help="Don't / do copy images to the output directory and set 'singularity.cacheDir' in workflow", + "-s", + "--singularity-cache", + type=click.Choice(["amend", "copy", "remote"]), + help="Utilize the 'singularity.cacheDir' in the download process, if applicable.", +) +@click.option( + "-i", + "--singularity-cache-index", + type=str, + help="List of images already available in a remote 'singularity.cacheDir', imposes --singularity-cache=remote", ) @click.option("-p", "--parallel-downloads", type=int, default=4, help="Number of parallel image downloads") -def download(pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads): +def download( + pipeline, + revision, + outdir, + compress, + force, + tower, + container, + singularity_cache, + singularity_cache_index, + parallel_downloads, +): """ Download a pipeline, nf-core/configs and pipeline singularity images. @@ -239,7 +258,16 @@ def download(pipeline, revision, outdir, compress, force, tower, container, sing from nf_core.download import DownloadWorkflow dl = DownloadWorkflow( - pipeline, revision, outdir, compress, force, tower, container, singularity_cache_only, parallel_downloads + pipeline, + revision, + outdir, + compress, + force, + tower, + container, + singularity_cache, + singularity_cache_index, + parallel_downloads, ) dl.download_workflow() diff --git a/nf_core/download.py b/nf_core/download.py index 53bb744184..59e7f47d3f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -77,7 +77,7 @@ class DownloadWorkflow: Args: pipeline (str): A nf-core pipeline name. revision (List[str]): The workflow revision to download, like `1.0`. Defaults to None. - singularity (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. + container (bool): Flag, if the Singularity container should be downloaded as well. Defaults to False. tower (bool): Flag, to customize the download for Nextflow Tower (convert to git bare repo). Defaults to False. outdir (str): Path to the local download directory. Defaults to None. """ @@ -91,7 +91,8 @@ def __init__( force=False, tower=False, container=None, - singularity_cache_only=False, + singularity_cache=None, + singularity_cache_index=None, parallel_downloads=4, ): self.pipeline = pipeline @@ -106,9 +107,12 @@ def __init__( self.compress_type = compress_type self.force = force self.tower = tower - self.include_configs = True + self.include_configs = None self.container = container - self.singularity_cache_only = singularity_cache_only + self.singularity_cache = ( + singularity_cache if not singularity_cache_index else "remote" + ) # if a singularity_cache_index is given, use the file and overrule choice. + self.singularity_cache_index = singularity_cache_index self.parallel_downloads = parallel_downloads self.wf_revisions = {} @@ -117,6 +121,7 @@ def __init__( self.wf_download_url = {} self.nf_config = {} self.containers = [] + self.containers_remote = [] # stores the remote images provided in the file. # Fetch remote workflows self.wfs = nf_core.list.Workflows() @@ -134,11 +139,16 @@ def download_workflow(self): self.prompt_revision() self.get_revision_hash() # Inclusion of configs is unnecessary for Tower. - if not self.tower: + if not self.tower and self.include_configs is None: self.prompt_config_inclusion() - self.prompt_container_download() - self.prompt_use_singularity_cachedir() - self.prompt_singularity_cachedir_only() + if not self.singularity_cache == "remote": + self.prompt_container_download() + self.prompt_singularity_cachedir_creation() + else: + self.container = "singularity" + self.prompt_singularity_cachedir_utilization() + self.prompt_singularity_cachedir_remote(retry=False) + self.read_remote_containers() # Nothing meaningful to compress here. if not self.tower: self.prompt_compression_type() @@ -220,9 +230,6 @@ def download_workflow_classic(self): if self.container == "singularity": self.find_container_images(revision_dirname) - # Download the singularity images - if self.container == "singularity": - log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") try: self.get_singularity_images() except OSError as e: @@ -260,8 +267,6 @@ def download_workflow_tower(self): # Collect all required singularity images self.find_container_images(self.workflow_repo.access()) - # Download the singularity images - log.info(f"Found {len(self.containers)} container{'s' if len(self.containers) > 1 else ''}") try: self.get_singularity_images() except OSError as e: @@ -280,24 +285,27 @@ def prompt_pipeline_name(self): self.pipeline = nf_core.utils.prompt_remote_pipeline_name(self.wfs) def prompt_revision(self): - """Prompt for pipeline revision / branch""" - # Prompt user for revision tag if '--revision' was not set - # If --tower is specified, allow to select multiple revisions - + """ + Prompt for pipeline revision / branch + Prompt user for revision tag if '--revision' was not set + If --tower is specified, allow to select multiple revisions + Also the classic download allows for multiple revisions, but + """ if not bool(self.revision): (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( self.wf_revisions, self.wf_branches, multiple=self.tower ) + """ + The checkbox() prompt unfortunately does not support passing a Validator, + so a user who keeps pressing Enter will flounder past the selection without choice. - # The checkbox() prompt unfortunately does not support passing a Validator, - # so a user who keeps pressing Enter will bump through the selection without choice. - - # bool(choice), bool(tag_set): + bool(choice), bool(tag_set): ############################# - # True, True: A choice was made and revisions were available. - # False, True: No selection was made, but revisions were available -> defaults to all available. - # False, False: No selection was made because no revisions were available -> raise AssertionError. - # True, False: Congratulations, you found a bug! That combo shouldn't happen. + True, True: A choice was made and revisions were available. + False, True: No selection was made, but revisions were available -> defaults to all available. + False, False: No selection was made because no revisions were available -> raise AssertionError. + True, False: Congratulations, you found a bug! That combo shouldn't happen. + """ if bool(choice): # have to make sure that self.revision is a list of strings, regardless if choice is str or list of strings. @@ -351,10 +359,14 @@ def get_revision_hash(self): def prompt_config_inclusion(self): """Prompt for inclusion of institutional configurations""" - self.include_configs = questionary.confirm( - "Include the nf-core's default institutional configuration files into the download?", - style=nf_core.utils.nfcore_question_style, - ).ask() + if stderr.is_interactive: # Use rich auto-detection of interactive shells + self.include_configs = questionary.confirm( + "Include the nf-core's default institutional configuration files into the download?", + style=nf_core.utils.nfcore_question_style, + ).ask() + else: + self.include_configs = False + # do not include by default. def prompt_container_download(self): """Prompt whether to download container images or not""" @@ -367,7 +379,7 @@ def prompt_container_download(self): style=nf_core.utils.nfcore_question_style, ).unsafe_ask() - def prompt_use_singularity_cachedir(self): + def prompt_singularity_cachedir_creation(self): """Prompt about using $NXF_SINGULARITY_CACHEDIR if not already set""" if ( self.container == "singularity" @@ -381,6 +393,7 @@ def prompt_use_singularity_cachedir(self): if rich.prompt.Confirm.ask( "[blue bold]?[/] [bold]Define [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] for a shared Singularity image download folder?[/]" ): + self.singularity_cache == "amend" # Prompt user for a cache directory path cachedir_path = None while cachedir_path is None: @@ -425,25 +438,89 @@ def prompt_use_singularity_cachedir(self): "You will need reload your terminal after the download completes for this to take effect." ) - def prompt_singularity_cachedir_only(self): + def prompt_singularity_cachedir_utilization(self): """Ask if we should *only* use $NXF_SINGULARITY_CACHEDIR without copying into target""" if ( - self.singularity_cache_only is None + self.singularity_cache is None # no choice regarding singularity cache has been made. and self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None ): stderr.print( - "\nIf you are working on the same system where you will run Nextflow, you can leave the downloaded images in the " - "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them. " + "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" + "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them." "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." ) - self.singularity_cache_only = rich.prompt.Confirm.ask( - "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder?[/]" + self.singularity_cache = rich.prompt.Prompt.ask( + "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the collection?[/]", + choices=["amend", "copy"], ) - # Sanity check, for when passed as a cli flag - if self.singularity_cache_only and self.container != "singularity": - raise AssertionError("Command has '--singularity-cache-only' set, but '--container' is not 'singularity'") + def prompt_singularity_cachedir_remote(self, retry): + """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" + if ( + self.container == "singularity" + and self.singularity_cache == "remote" + and self.singularity_cache_index is None + and stderr.is_interactive # Use rich auto-detection of interactive shells + ): + stderr.print( + "\nNextflow and nf-core can use an environment variable called [blue]$NXF_SINGULARITY_CACHEDIR[/] that is a path to a directory where remote Singularity images are stored. " + "This allows downloaded images to be cached in a central location." + ) + # Prompt user for a file listing the contents of the remote cache directory + cachedir_index = None + while cachedir_index is None: + prompt_cachedir_index = questionary.path( + "Specify a list of the remote images already present in the remote system :", + file_filter="*.txt", + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + cachedir_index = os.path.abspath(os.path.expanduser(prompt_cachedir_index)) + if prompt_cachedir_index == "": + log.error("Will disregard contents of a remote [blue]$NXF_SINGULARITY_CACHEDIR[/]") + self.singularity_cache_index = None + self.singularity_cache = "copy" + elif not os.access(cachedir_index, os.R_OK): + log.error(f"'{cachedir_index}' is not a valid, readable file.") + cachedir_index = None + if cachedir_index: + self.singularity_cache_index = cachedir_index + if retry: # invoke parsing the file again. + self.read_remote_containers() + + def read_remote_containers(self): + """Reads the file specified as index for the remote Singularity cache dir""" + if ( + self.container == "singularity" + and self.singularity_cache == "remote" + and self.singularity_cache_index is not None + ): + n_total_images = 0 + try: + with open(self.singularity_cache_index) as indexfile: + for line in indexfile.readlines(): + match = re.search(r"([^\/\\]+\.img)", line, re.S) + if match: + n_total_images += 1 + self.containers_remote.append(match.group(0)) + if n_total_images == 0: + raise LookupError("Could not find valid container names in the index file.") + else: + log.info( + f"Successfully read {n_total_images} containers from the remote $NXF_SINGULARITY_CACHE contents." + ) + self.containers_remote = sorted(list(set(self.containers_remote))) + except (FileNotFoundError, LookupError) as e: + log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") + if rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): + self.prompt_singularity_cachedir_remote(retry=True) + else: + log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") + self.singularity_cache_index = None + if os.environ.get("NXF_SINGULARITY_CACHEDIR"): + self.singularity_cache = "copy" # default to copy if possible, otherwise skip. + else: + self.singularity_cache = None def prompt_compression_type(self): """Ask user if we should compress the downloaded files""" @@ -531,7 +608,7 @@ def wf_use_local_configs(self, revision_dirname): nfconfig = nfconfig.replace(find_str, repl_str) # Append the singularity.cacheDir to the end if we need it - if self.container == "singularity" and not self.singularity_cache_only: + if self.container == "singularity" and self.singularity_cache == "copy": nfconfig += ( f"\n\n// Added by `nf-core download` v{nf_core.__version__} //\n" + 'singularity.cacheDir = "${projectDir}/../singularity-images/"' @@ -674,8 +751,14 @@ def get_singularity_images(self): if len(self.containers) == 0: log.info("No container names found in workflow") else: + log.info( + f"Found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in workflow." + ) + with DownloadProgress() as progress: - task = progress.add_task("all_containers", total=len(self.containers), progress_type="summary") + task = progress.add_task( + "Collecting container images", total=len(self.containers), progress_type="summary" + ) # Organise containers based on what we need to do with them containers_exist = [] @@ -697,8 +780,8 @@ def get_singularity_images(self): log.debug(f"Cache directory not found, creating: {cache_path_dir}") os.makedirs(cache_path_dir) - # We already have the target file in place, return - if os.path.exists(out_path): + # We already have the target file in place or in remote cache, return + if os.path.exists(out_path) or os.path.basename(out_path) in self.containers_remote: containers_exist.append(container) continue @@ -722,56 +805,62 @@ def get_singularity_images(self): "Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH" ) - # Go through each method of fetching containers in order - for container in containers_exist: - progress.update(task, description="Image file exists") - progress.update(task, advance=1) - - for container in containers_cache: - progress.update(task, description="Copying singularity images from cache") - self.singularity_copy_cache_image(*container) - progress.update(task, advance=1) - - with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: - progress.update(task, description="Downloading singularity images") - - # Kick off concurrent downloads - future_downloads = [ - pool.submit(self.singularity_download_image, *container, progress) - for container in containers_download - ] - - # Make ctrl-c work with multi-threading - self.kill_with_fire = False - - try: - # Iterate over each threaded download, waiting for them to finish - for future in concurrent.futures.as_completed(future_downloads): - future.result() - try: - progress.update(task, advance=1) - except Exception as e: - log.error(f"Error updating progress bar: {e}") - - except KeyboardInterrupt: - # Cancel the future threads that haven't started yet - for future in future_downloads: - future.cancel() - # Set the variable that the threaded function looks for - # Will trigger an exception from each thread - self.kill_with_fire = True - # Re-raise exception on the main thread - raise - - for container in containers_pull: - progress.update(task, description="Pulling singularity images") - try: - self.singularity_pull_image(*container, progress) - except RuntimeWarning as r: - # Raise exception if this is not possible - log.error("Not able to pull image. Service might be down or internet connection is dead.") - raise r - progress.update(task, advance=1) + if containers_exist: + if self.singularity_cache_index is not None: + log.info(f"{len(containers_exist)} are already cached remotely and won't be retrieved.") + # Go through each method of fetching containers in order + for container in containers_exist: + progress.update(task, description="Image file exists at destination") + progress.update(task, advance=1) + + if containers_cache: + for container in containers_cache: + progress.update(task, description="Copying singularity images from cache") + self.singularity_copy_cache_image(*container) + progress.update(task, advance=1) + + if containers_download or containers_pull: + # if clause gives slightly better UX, because Download is no longer displayed if nothing is left to be downloaded. + with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: + progress.update(task, description="Downloading singularity images") + + # Kick off concurrent downloads + future_downloads = [ + pool.submit(self.singularity_download_image, *container, progress) + for container in containers_download + ] + + # Make ctrl-c work with multi-threading + self.kill_with_fire = False + + try: + # Iterate over each threaded download, waiting for them to finish + for future in concurrent.futures.as_completed(future_downloads): + future.result() + try: + progress.update(task, advance=1) + except Exception as e: + log.error(f"Error updating progress bar: {e}") + + except KeyboardInterrupt: + # Cancel the future threads that haven't started yet + for future in future_downloads: + future.cancel() + # Set the variable that the threaded function looks for + # Will trigger an exception from each thread + self.kill_with_fire = True + # Re-raise exception on the main thread + raise + + for container in containers_pull: + progress.update(task, description="Pulling singularity images") + try: + self.singularity_pull_image(*container, progress) + except RuntimeWarning as r: + # Raise exception if this is not possible + log.error("Not able to pull image. Service might be down or internet connection is dead.") + raise r + progress.update(task, advance=1) def singularity_image_filenames(self, container): """Check Singularity cache for image, copy to destination folder if found. @@ -810,11 +899,11 @@ def singularity_image_filenames(self, container): if os.environ.get("NXF_SINGULARITY_CACHEDIR"): cache_path = os.path.join(os.environ["NXF_SINGULARITY_CACHEDIR"], out_name) # Use only the cache - set this as the main output path - if self.singularity_cache_only: + if self.singularity_cache == "amend": out_path = cache_path cache_path = None - elif self.singularity_cache_only: - raise FileNotFoundError("'--singularity-cache' specified but no '$NXF_SINGULARITY_CACHEDIR' set!") + elif self.singularity_cache in ["amend", "copy"]: + raise FileNotFoundError("Singularity cache is required but no '$NXF_SINGULARITY_CACHEDIR' set!") return (out_path, cache_path) @@ -998,7 +1087,6 @@ def __init__( remote_url, revision, commit, - no_pull=False, hide_progress=False, in_cache=True, ): @@ -1028,6 +1116,7 @@ def __init__( self.commit = [] self.fullname = nf_core.modules.modules_utils.repo_full_name_from_remote(self.remote_url) self.retries = 0 # retries for setting up the locally cached repository + self.hide_progress = hide_progress self.setup_local_repo(remote_url, in_cache=in_cache) @@ -1089,7 +1178,7 @@ def setup_local_repo(self, remote, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, ) with pbar: self.repo = git.Repo.clone_from( @@ -1112,7 +1201,7 @@ def setup_local_repo(self, remote, in_cache=True): rich.progress.BarColumn(bar_width=None), "[bold yellow]{task.fields[state]}", transient=True, - disable=os.environ.get("HIDE_PROGRESS", None) is not None, + disable=os.environ.get("HIDE_PROGRESS", None) is not None or self.hide_progress, ) with pbar: self.repo.remotes.origin.fetch( From b53c9fd8cce0e3abc4d7594049ac5b892ea54442 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 26 Apr 2023 21:05:55 +0200 Subject: [PATCH 44/63] Readme updates for the new remote Singularity cache feature. --- README.md | 18 ++++++++++-------- nf_core/download.py | 22 +++++++++++----------- tests/test_cli.py | 6 ++++-- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 13d8b381a3..28c764a09a 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ A python package with helper tools for the nf-core community. - [`nf-core list` - List available pipelines](#listing-pipelines) - [`nf-core launch` - Run a pipeline with interactive parameter prompts](#launch-a-pipeline) - [`nf-core download` - Download pipeline for offline use](#downloading-pipelines-for-offline-use) -- [`nf-core download --tower` - Download pipeline for Tower](#downloading-pipelines-for-tower) +- [`nf-core download --tower` - Adapting downloads to Nextflow Tower](#adapting-downloads-to-nextflow-tower) - [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) - [`nf-core create` - Create a new pipeline with the nf-core template](#creating-a-new-pipeline) - [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) @@ -349,13 +349,13 @@ nextflow run /path/to/download/nf-core-rnaseq-dev/workflow/ --input mydata.csv - ### Downloaded nf-core configs The pipeline files are automatically updated (`params.custom_config_base` is set to `../configs`), so that the local copy of institutional configs are available when running the pipeline. -So using `-profile ` should work if available within [nf-core/configs](https://github.com/nf-core/configs). +So using `-profile ` should work if available within [nf-core/configs](https://github.com/nf-core/configs). This option is not available when downloading a pipeline for use with [Nextflow Tower](#adapting-downloads-to-nextflow-tower) because the application manages all configurations separately. ### Downloading singularity containers If you're using Singularity, the `nf-core download` command can also fetch the required Singularity container images for you. To do this, select `singularity` in the prompt or specify `--container singularity` in the command. -Your archive / target output directory will then include three folders: `workflow`, `configs` and also `singularity-containers`. +Your archive / target output directory will then also include a separate folder `singularity-containers`. The downloaded workflow files are again edited to add the following line to the end of the pipeline's `nextflow.config` file: @@ -373,11 +373,13 @@ We highly recommend setting the `$NXF_SINGULARITY_CACHEDIR` environment variable If found, the tool will fetch the Singularity images to this directory first before copying to the target output archive / directory. Any images previously fetched will be found there and copied directly - this includes images that may be shared with other pipelines or previous pipeline version downloads or download attempts. -If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache-only` / `--singularity-cache-copy`. +If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache amend`. This instructs `nf-core download` to fetch all Singularity images to the `$NXF_SINGULARITY_CACHEDIR` directory but does _not_ copy them to the workflow archive / directory. The workflow config file is _not_ edited. This means that when you later run the workflow, Nextflow will just use the cache folder directly. +If you are downloading a workflow for a different system, you can provide information about its image cache to `nf-core download`. To avoid unnecessary downloads, choose `--singularity-cache remote` and provide a list of already available images as plain text file to `--singularity-cache-index my_list_of_remotely_available_images.txt`. To generate this list on the remote system, run `find $NXF_SINGULARITY_CACHEDIR -name "*.img" > my_list_of_remotely_available_images.txt`. + #### How the Singularity image downloads work The Singularity image download finds containers using two methods: @@ -392,13 +394,13 @@ Where both are found, the download URL is preferred. Once a full list of containers is found, they are processed in the following order: -1. If the target image already exists, nothing is done (eg. with `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache-only` specified) -2. If found in `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache-only` is _not_ specified, they are copied to the output directory +1. If the target image already exists, nothing is done (eg. with `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache amend` specified) +2. If found in `$NXF_SINGULARITY_CACHEDIR` and `--singularity-cache copy` is specified, they are copied to the output directory 3. If they start with `http` they are downloaded directly within Python (default 4 at a time, you can customise this with `--parallel-downloads`) 4. If they look like a Docker image name, they are fetched using a `singularity pull` command - - This requires Singularity to be installed on the system and is substantially slower + - This requires Singularity/Apptainer to be installed on the system and is substantially slower -Note that compressing many GBs of binary files can be slow, so specifying `--compress none` is recommended when downloading Singularity images. +Note that compressing many GBs of binary files can be slow, so specifying `--compress none` is recommended when downloading Singularity images that are copied to the output directory. If the download speeds are much slower than your internet connection is capable of, you can set `--parallel-downloads` to a large number to download loads of images at once. diff --git a/nf_core/download.py b/nf_core/download.py index 59e7f47d3f..8297eb3f5c 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -141,11 +141,12 @@ def download_workflow(self): # Inclusion of configs is unnecessary for Tower. if not self.tower and self.include_configs is None: self.prompt_config_inclusion() + # If a remote cache is specified, it is safe to assume images should be downloaded. if not self.singularity_cache == "remote": self.prompt_container_download() - self.prompt_singularity_cachedir_creation() else: self.container = "singularity" + self.prompt_singularity_cachedir_creation() self.prompt_singularity_cachedir_utilization() self.prompt_singularity_cachedir_remote(retry=False) self.read_remote_containers() @@ -371,7 +372,7 @@ def prompt_config_inclusion(self): def prompt_container_download(self): """Prompt whether to download container images or not""" - if self.container is None: + if self.container is None and stderr.is_interactive: stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") self.container = questionary.select( "Download software container images:", @@ -393,7 +394,8 @@ def prompt_singularity_cachedir_creation(self): if rich.prompt.Confirm.ask( "[blue bold]?[/] [bold]Define [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] for a shared Singularity image download folder?[/]" ): - self.singularity_cache == "amend" + if not self.singularity_cache_index: + self.singularity_cache == "amend" # retain "remote" choice. # Prompt user for a cache directory path cachedir_path = None while cachedir_path is None: @@ -419,7 +421,7 @@ def prompt_singularity_cachedir_creation(self): if bashrc_path: stderr.print( f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(bashrc_path)}[/] file ." - "This will then be autmoatically set every time you open a new terminal. We can add the following line to this file for you: \n" + "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' ) append_to_file = rich.prompt.Confirm.ask( @@ -444,16 +446,18 @@ def prompt_singularity_cachedir_utilization(self): self.singularity_cache is None # no choice regarding singularity cache has been made. and self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None + and stderr.is_interactive ): stderr.print( "\nIf you are working on the same system where you will run Nextflow, you can amend the downloaded images to the ones in the" "[blue not bold]$NXF_SINGULARITY_CACHEDIR[/] folder, Nextflow will automatically find them." "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." ) - self.singularity_cache = rich.prompt.Prompt.ask( - "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the collection?[/]", + self.singularity_cache = questionary.select( + "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the cache?[/]", choices=["amend", "copy"], - ) + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() def prompt_singularity_cachedir_remote(self, retry): """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" @@ -463,10 +467,6 @@ def prompt_singularity_cachedir_remote(self, retry): and self.singularity_cache_index is None and stderr.is_interactive # Use rich auto-detection of interactive shells ): - stderr.print( - "\nNextflow and nf-core can use an environment variable called [blue]$NXF_SINGULARITY_CACHEDIR[/] that is a path to a directory where remote Singularity images are stored. " - "This allows downloaded images to be cached in a central location." - ) # Prompt user for a file listing the contents of the remote cache directory cachedir_index = None while cachedir_index is None: diff --git a/tests/test_cli.py b/tests/test_cli.py index 6f51fe1025..873b7d4b0c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -167,7 +167,8 @@ def test_cli_download(self, mock_dl): "force": None, "tower": None, "container": "singularity", - "singularity-cache-only": None, + "singularity-cache": "copy", + "singularity-cache-index": "/path/index.txt", "parallel-downloads": 2, } @@ -184,7 +185,8 @@ def test_cli_download(self, mock_dl): "force" in params, "tower" in params, params["container"], - "singularity-cache-only" in params, + params["singularity-cache"], + params["singularity-cache-index"], params["parallel-downloads"], ) From 2c6764dd8bb3d75f79d97a7d0102e3de2b5b443a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 27 Apr 2023 11:49:21 +0200 Subject: [PATCH 45/63] Add interactive check in retry for parsing the index. --- nf_core/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8297eb3f5c..76a3f00054 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -512,7 +512,7 @@ def read_remote_containers(self): self.containers_remote = sorted(list(set(self.containers_remote))) except (FileNotFoundError, LookupError) as e: log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") - if rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): + if stderr.is_interactive and rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): self.prompt_singularity_cachedir_remote(retry=True) else: log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") From 2e8d0d2c17408ab499fb081f499a92130929b5b5 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 27 Apr 2023 12:54:16 +0200 Subject: [PATCH 46/63] Incorporating some suggestions by @mashehu. --- nf_core/download.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 76a3f00054..b8ce5a1607 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -148,8 +148,7 @@ def download_workflow(self): self.container = "singularity" self.prompt_singularity_cachedir_creation() self.prompt_singularity_cachedir_utilization() - self.prompt_singularity_cachedir_remote(retry=False) - self.read_remote_containers() + self.prompt_singularity_cachedir_remote() # Nothing meaningful to compress here. if not self.tower: self.prompt_compression_type() @@ -177,8 +176,8 @@ def download_workflow(self): if not self.tower: # Only show entry, if option was prompted. summary_log.append(f"Include default institutional configuration: '{self.include_configs}'") - - summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") + else: + summary_log.append(f"Enabled for seqeralabs® Nextflow Tower: '{self.tower}'") # Check that the outdir doesn't already exist if os.path.exists(self.outdir): @@ -203,9 +202,9 @@ def download_workflow(self): if self.tower: self.download_workflow_tower() else: - self.download_workflow_classic() + self.download_workflow_static() - def download_workflow_classic(self): + def download_workflow_static(self): """Downloads a nf-core workflow from GitHub to the local file system in a self-contained manner.""" # Download the centralised configs first @@ -290,7 +289,8 @@ def prompt_revision(self): Prompt for pipeline revision / branch Prompt user for revision tag if '--revision' was not set If --tower is specified, allow to select multiple revisions - Also the classic download allows for multiple revisions, but + Also the static download allows for multiple revisions, but + we do not prompt this option interactively. """ if not bool(self.revision): (choice, tag_set) = nf_core.utils.prompt_pipeline_release_branch( @@ -459,7 +459,7 @@ def prompt_singularity_cachedir_utilization(self): style=nf_core.utils.nfcore_question_style, ).unsafe_ask() - def prompt_singularity_cachedir_remote(self, retry): + def prompt_singularity_cachedir_remote(self): """Prompt about the index of a remote $NXF_SINGULARITY_CACHEDIR""" if ( self.container == "singularity" @@ -485,8 +485,8 @@ def prompt_singularity_cachedir_remote(self, retry): cachedir_index = None if cachedir_index: self.singularity_cache_index = cachedir_index - if retry: # invoke parsing the file again. - self.read_remote_containers() + # in any case read the remote containers, even if no prompt was shown. + self.read_remote_containers() def read_remote_containers(self): """Reads the file specified as index for the remote Singularity cache dir""" From c86df5ba1db616d3605438d15a806fb599bbc298 Mon Sep 17 00:00:00 2001 From: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> Date: Thu, 27 Apr 2023 12:58:46 +0200 Subject: [PATCH 47/63] Apply suggestions from code review @mashehu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- nf_core/__main__.py | 4 ++-- nf_core/download.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 46e9ac0988..8b94e64715 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -213,14 +213,14 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-r", "--revision", multiple=True, - help="Pipeline release to download. Multiple invocations are possible.", + help="Pipeline release to download. Multiple invocations are possible, e.g. `-r 1.1 -r 1.2.", ) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( "-x", "--compress", type=click.Choice(["tar.gz", "tar.bz2", "zip", "none"]), help="Archive compression type" ) @click.option("-f", "--force", is_flag=True, default=False, help="Overwrite existing files") -@click.option("-t", "--tower", is_flag=True, default=False, help="Customize download for seqeralabs® Nextflow Tower") +@click.option("-t", "--tower", is_flag=True, default=False, help="Download for seqeralabs® Nextflow Tower") @click.option( "-c", "--container", type=click.Choice(["none", "singularity"]), help="Download software container images" ) diff --git a/nf_core/download.py b/nf_core/download.py index b8ce5a1607..8832eca5d3 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -242,7 +242,7 @@ def download_workflow_static(self): self.compress_download() def download_workflow_tower(self): - """Create a bare-cloned git repository of the workflow, such it can be launched with `tw launch` as file:/ pipeline""" + """Create a bare-cloned git repository of the workflow, so it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") @@ -481,7 +481,7 @@ def prompt_singularity_cachedir_remote(self): self.singularity_cache_index = None self.singularity_cache = "copy" elif not os.access(cachedir_index, os.R_OK): - log.error(f"'{cachedir_index}' is not a valid, readable file.") + log.error(f"'{cachedir_index}' is not a readable file.") cachedir_index = None if cachedir_index: self.singularity_cache_index = cachedir_index From e21f2a23800f3f7b293e2ff07842bf9ead560ac4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 27 Apr 2023 20:12:15 +0200 Subject: [PATCH 48/63] Writing additional tests for the --tower download functionality. --- .../workflows/pytest-frozen-ubuntu-20.04.yml | 2 +- CHANGELOG.md | 1 + nf_core/__main__.py | 2 +- nf_core/download.py | 95 +++++++++++-------- tests/data/testdata_remote_containers.txt | 37 ++++++++ tests/test_download.py | 91 +++++++++++++++++- 6 files changed, 188 insertions(+), 40 deletions(-) create mode 100644 tests/data/testdata_remote_containers.txt diff --git a/.github/workflows/pytest-frozen-ubuntu-20.04.yml b/.github/workflows/pytest-frozen-ubuntu-20.04.yml index b015376633..5faf8ce605 100644 --- a/.github/workflows/pytest-frozen-ubuntu-20.04.yml +++ b/.github/workflows/pytest-frozen-ubuntu-20.04.yml @@ -15,7 +15,7 @@ concurrency: cancel-in-progress: true jobs: - pytest: + pytest-frozen: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index aec3d9360e..94f013d7a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ ### Download - Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). +- Refactored the CLI for `--singularity-cache` in `nf-core download` from a flag to an argument. The prior options were renamed to `amend` (container images are only saved in the `$NXF_SINGULARITY_CACHEDIR`) and `copy` (a copy of the image is saved with the download). `remote` was newly introduced and allows to provide a table of contents of a remote cache via an additional argument `--singularity-cache-index` ([#2247](https://github.com/nf-core/tools/pull/2247)). ### Linting diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 8b94e64715..6d6ded471a 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -213,7 +213,7 @@ def launch(pipeline, id, revision, command_only, params_in, params_out, save_all "-r", "--revision", multiple=True, - help="Pipeline release to download. Multiple invocations are possible, e.g. `-r 1.1 -r 1.2.", + help="Pipeline release to download. Multiple invocations are possible, e.g. `-r 1.1 -r 1.2`", ) @click.option("-o", "--outdir", type=str, help="Output directory") @click.option( diff --git a/nf_core/download.py b/nf_core/download.py index 8832eca5d3..db98b17f22 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -108,7 +108,7 @@ def __init__( self.force = force self.tower = tower self.include_configs = None - self.container = container + self.container = container if not singularity_cache_index else "singularity" self.singularity_cache = ( singularity_cache if not singularity_cache_index else "remote" ) # if a singularity_cache_index is given, use the file and overrule choice. @@ -157,7 +157,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...['+str(len(self.revision)-2)+' more revisions]...,'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -228,28 +228,29 @@ def download_workflow_static(self): # Collect all required singularity images if self.container == "singularity": - self.find_container_images(revision_dirname) + self.find_container_images(os.path.join(self.outdir, revision_dirname)) - try: - self.get_singularity_images() - except OSError as e: - log.critical(f"[red]{e}[/]") - sys.exit(1) + try: + self.get_singularity_images(current_revision=item[0]) + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) # Compress into an archive if self.compress_type is not None: log.info("Compressing output into archive") self.compress_download() - def download_workflow_tower(self): + def download_workflow_tower(self, location=None): """Create a bare-cloned git repository of the workflow, so it can be launched with `tw launch` as file:/ pipeline""" log.info("Collecting workflow from GitHub") self.workflow_repo = WorkflowRepo( - remote_url=f"git@github.com:{self.pipeline}.git", + remote_url=f"https://github.com/{self.pipeline}.git", revision=self.revision if self.revision else None, commit=self.wf_sha.values() if bool(self.wf_sha) else None, + location=location if location else None, # manual location is required for the tests to work in_cache=False, ) @@ -261,17 +262,17 @@ def download_workflow_tower(self): # extract the required containers if self.container == "singularity": - for commit in self.wf_sha.values(): + for revision, commit in self.wf_sha.items(): # Checkout the repo in the current revision self.workflow_repo.checkout(commit) # Collect all required singularity images self.find_container_images(self.workflow_repo.access()) - try: - self.get_singularity_images() - except OSError as e: - log.critical(f"[red]{e}[/]") - sys.exit(1) + try: + self.get_singularity_images(current_revision=revision) + except OSError as e: + log.critical(f"[red]{e}[/]") + sys.exit(1) # Justify why compression is skipped for Tower downloads (Prompt is not shown, but CLI argument could have been set) if self.compress_type is not None: @@ -412,30 +413,47 @@ def prompt_singularity_cachedir_creation(self): if cachedir_path: os.environ["NXF_SINGULARITY_CACHEDIR"] = cachedir_path - # Ask if user wants this set in their .bashrc - bashrc_path = os.path.expanduser("~/.bashrc") - if not os.path.isfile(bashrc_path): - bashrc_path = os.path.expanduser("~/.bash_profile") - if not os.path.isfile(bashrc_path): - bashrc_path = False - if bashrc_path: + """ + Optionally, create a permanent entry for the NXF_SINGULARITY_CACHEDIR in the terminal profile. + Currently support for bash and zsh. + ToDo: "sh", "bash", "dash", "ash","csh", "tcsh", "ksh", "zsh", "fish", "cmd", "powershell", "pwsh"? + """ + + if os.environ["SHELL"] == "/bin/bash": + shellprofile_path = os.path.expanduser("~/~/.bash_profile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = os.path.expanduser("~/.bashrc") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + elif os.environ["SHELL"] == "/bin/zsh": + shellprofile_path = os.path.expanduser("~/.zprofile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = os.path.expanduser("~/.zshenv") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + else: + shellprofile_path = os.path.expanduser("~/.profile") + if not os.path.isfile(shellprofile_path): + shellprofile_path = False + + if shellprofile_path: stderr.print( - f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(bashrc_path)}[/] file ." + f"\nSo that [blue]$NXF_SINGULARITY_CACHEDIR[/] is always defined, you can add it to your [blue not bold]~/{os.path.basename(shellprofile_path)}[/] file ." "This will then be automatically set every time you open a new terminal. We can add the following line to this file for you: \n" f'[blue]export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"[/]' ) append_to_file = rich.prompt.Confirm.ask( - f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(bashrc_path)}[/] ?[/]" + f"[blue bold]?[/] [bold]Add to [blue not bold]~/{os.path.basename(shellprofile_path)}[/] ?[/]" ) if append_to_file: - with open(os.path.expanduser(bashrc_path), "a") as f: + with open(os.path.expanduser(shellprofile_path), "a") as f: f.write( "\n\n#######################################\n" f"## Added by `nf-core download` v{nf_core.__version__} ##\n" + f'export NXF_SINGULARITY_CACHEDIR="{cachedir_path}"' + "\n#######################################\n" ) - log.info(f"Successfully wrote to [blue]{bashrc_path}[/]") + log.info(f"Successfully wrote to [blue]{shellprofile_path}[/]") log.warning( "You will need reload your terminal after the download completes for this to take effect." ) @@ -620,7 +638,7 @@ def wf_use_local_configs(self, revision_dirname): with open(nfconfig_fn, "w") as nfconfig_fh: nfconfig_fh.write(nfconfig) - def find_container_images(self, revision_dirname): + def find_container_images(self, workflow_directory): """Find container image names for workflow. Starts by using `nextflow config` to pull out any process.container @@ -662,7 +680,7 @@ def find_container_images(self, revision_dirname): containers_raw = [] if not self.containers else self.containers # Use linting code to parse the pipeline nextflow config - self.nf_config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, revision_dirname)) + self.nf_config = nf_core.utils.fetch_wf_config(workflow_directory) # Find any config variables that look like a container for k, v in self.nf_config.items(): @@ -670,7 +688,7 @@ def find_container_images(self, revision_dirname): containers_raw.append(v.strip('"').strip("'")) # Recursive search through any DSL2 module files for container spec lines. - for subdir, _, files in os.walk(os.path.join(self.outdir, revision_dirname, "modules")): + for subdir, _, files in os.walk(os.path.join(workflow_directory, "modules")): for file in files: if file.endswith(".nf"): file_path = os.path.join(subdir, file) @@ -745,14 +763,14 @@ def find_container_images(self, revision_dirname): # Remove duplicates and sort self.containers = sorted(list(set(containers_raw))) - def get_singularity_images(self): + def get_singularity_images(self, current_revision=""): """Loop through container names and download Singularity images""" if len(self.containers) == 0: log.info("No container names found in workflow") else: log.info( - f"Found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in workflow." + f"Processing workflow revision {current_revision}, found {len(self.containers)} container image{'s' if len(self.containers) > 1 else ''} in total." ) with DownloadProgress() as progress: @@ -1087,6 +1105,7 @@ def __init__( remote_url, revision, commit, + location=None, hide_progress=False, in_cache=True, ): @@ -1118,7 +1137,7 @@ def __init__( self.retries = 0 # retries for setting up the locally cached repository self.hide_progress = hide_progress - self.setup_local_repo(remote_url, in_cache=in_cache) + self.setup_local_repo(remote=remote_url, location=location, in_cache=in_cache) # expose some instance attributes self.tags = self.repo.tags @@ -1155,7 +1174,7 @@ def retry_setup_local_repo(self, skip_confirm=False): else: raise LookupError("Exiting due to error with locally cached Git repository.") - def setup_local_repo(self, remote, in_cache=True): + def setup_local_repo(self, remote, location=None, in_cache=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -1163,13 +1182,15 @@ def setup_local_repo(self, remote, in_cache=True): Args: remote (str): git url of remote - commit (str): name of branch to checkout from (optional) - hide_progress (bool, optional): Whether to hide the progress bar. Defaults to False. + location (Path): location where the clone should be created/cached. in_cache (bool, optional): Whether to clone the repository from the cache. Defaults to False. Sets self.repo """ + if location: + self.local_repo_dir = os.path.join(location, self.fullname) + else: + self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) - self.local_repo_dir = os.path.join(NFCORE_DIR if not in_cache else NFCORE_CACHE_DIR, self.fullname) try: if not os.path.exists(self.local_repo_dir): try: diff --git a/tests/data/testdata_remote_containers.txt b/tests/data/testdata_remote_containers.txt new file mode 100644 index 0000000000..93cf46f2f6 --- /dev/null +++ b/tests/data/testdata_remote_containers.txt @@ -0,0 +1,37 @@ +./depot.galaxyproject.org-singularity-bbmap-38.93--he522d1c_0.img +./depot.galaxyproject.org-singularity-bedtools-2.30.0--hc088bd4_0.img +./depot.galaxyproject.org-singularity-bioconductor-dupradar-1.18.0--r40_1.img +./depot.galaxyproject.org-singularity-bioconductor-summarizedexperiment-1.20.0--r40_0.img +./depot.galaxyproject.org-singularity-bioconductor-tximeta-1.8.0--r40_0.img +./depot.galaxyproject.org-singularity-fastqc-0.11.9--0.img +./depot.galaxyproject.org-singularity-gffread-0.12.1--h8b12597_0.img +./depot.galaxyproject.org-singularity-hisat2-2.2.1--h1b792b2_3.img +./depot.galaxyproject.org-singularity-mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2-59cdd445419f14abac76b31dd0d71217994cbcc9-0.img +./depot.galaxyproject.org-singularity-mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2-afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0.img +./depot.galaxyproject.org-singularity-mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91-ab110436faf952a33575c64dd74615a84011450b-0.img +./depot.galaxyproject.org-singularity-mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1-0e773bb207600fcb4d38202226eb20a33c7909b6-0.img +./depot.galaxyproject.org-singularity-mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1-38aed4501da19db366dc7c8d52d31d94e760cfaf-0.img +./depot.galaxyproject.org-singularity-mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b-64aad4a4e144878400649e71f42105311be7ed87-0.img +./depot.galaxyproject.org-singularity-multiqc-1.11--pyhdfd78af_0.img +./depot.galaxyproject.org-singularity-multiqc-1.13--pyhdfd78af_0.img +./depot.galaxyproject.org-singularity-perl-5.26.2.img +./depot.galaxyproject.org-singularity-picard-2.26.10--hdfd78af_0.img +./depot.galaxyproject.org-singularity-picard-2.27.4--hdfd78af_0.img +./depot.galaxyproject.org-singularity-preseq-3.1.2--h445547b_2.img +./depot.galaxyproject.org-singularity-python-3.9--1.img +./depot.galaxyproject.org-singularity-qualimap-2.2.2d--1.img +./depot.galaxyproject.org-singularity-rseqc-3.0.1--py37h516909a_1.img +./depot.galaxyproject.org-singularity-salmon-1.5.2--h84f40af_0.img +./depot.galaxyproject.org-singularity-samtools-1.15.1--h1170115_0.img +./depot.galaxyproject.org-singularity-sortmerna-4.3.4--h9ee0642_0.img +./depot.galaxyproject.org-singularity-stringtie-2.2.1--hecb563c_2.img +./depot.galaxyproject.org-singularity-subread-2.0.1--hed695b0_0.img +./depot.galaxyproject.org-singularity-trim-galore-0.6.7--hdfd78af_0.img +./depot.galaxyproject.org-singularity-ubuntu-20.04.img +./depot.galaxyproject.org-singularity-ucsc-bedclip-377--h0b8a92a_2.img +./depot.galaxyproject.org-singularity-ucsc-bedgraphtobigwig-377--h446ed27_1.img +./depot.galaxyproject.org-singularity-umi_tools-1.1.2--py38h4a8c8d9_0.img +These entries should not be used: +On October 5, 2011, the 224-meter containership MV Rena struck a reef close to New Zealand’s coast and broke apart. That spells disaster, no? +MV Rena + diff --git a/tests/test_download.py b/tests/test_download.py index d1a770a630..41fb9c625f 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -3,16 +3,20 @@ import hashlib import os +import re import shutil import tempfile import unittest +from pathlib import Path from unittest import mock import pytest import nf_core.create import nf_core.utils -from nf_core.download import DownloadWorkflow +from nf_core.download import DownloadWorkflow, WorkflowRepo +from nf_core.synced_repo import SyncedRepo +from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR from .utils import with_temporary_file, with_temporary_folder @@ -169,6 +173,32 @@ def test_singularity_pull_image_singularity_not_installed(self, tmp_dir, mock_ri with pytest.raises(OSError): download_obj.singularity_pull_image("a-container", tmp_dir, None, mock_rich_progress) + # + # Test for '--singularity-cache remote --singularity-cache-index'. Provide a list of containers already available in a remote location. + # + @with_temporary_folder + def test_remote_container_functionality(self, tmp_dir): + os.environ["NXF_SINGULARITY_CACHEDIR"] = "foo" + + download_obj = DownloadWorkflow( + pipeline="nf-core/rnaseq", + outdir=os.path.join(tmp_dir, "new"), + revision="3.9", + compress_type="none", + singularity_cache_index=Path(__file__).resolve().parent / "data/testdata_remote_containers.txt", + ) + + download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. + + # test if settings are changed accordingly. + assert download_obj.singularity_cache == "remote" and download_obj.container == "singularity" + assert isinstance(download_obj.containers_remote, list) and len(download_obj.containers_remote) == 0 + # read in the file + download_obj.read_remote_containers() + assert len(download_obj.containers_remote) == 33 + assert "depot.galaxyproject.org-singularity-salmon-1.5.2--h84f40af_0.img" in download_obj.containers_remote + assert "MV Rena" not in download_obj.containers_remote # decoy in test file + # # Tests for the main entry method 'download_workflow' # @@ -184,6 +214,65 @@ def test_download_workflow_with_success(self, tmp_dir, mock_download_image, mock container="singularity", revision="1.6", compress_type="none", + singularity_cache="copy", ) + download_obj.include_configs = True # suppress prompt, because stderr.is_interactive doesn't. download_obj.download_workflow() + + # + # Test Download for Tower + # + @with_temporary_folder + def test_download_workflow_for_tower(self, tmp_dir): + download_obj = DownloadWorkflow( + pipeline="nf-core/rnaseq", + revision=("3.7", "3.9"), + compress_type="none", + tower=True, + ) + + download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. + + assert isinstance(download_obj.revision, list) and len(download_obj.revision) == 2 + assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 0 + assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0 + + wfs = nf_core.list.Workflows() + wfs.get_remote_workflows() + ( + download_obj.pipeline, + download_obj.wf_revisions, + download_obj.wf_branches, + ) = nf_core.utils.get_repo_releases_branches(download_obj.pipeline, wfs) + + download_obj.get_revision_hash() + + # download_obj.wf_download_url is not set for tower downloads, but the sha values are + assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 2 + assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0 + + # The outdir for multiple revisions is the pipeline name and date: e.g. nf-core-rnaseq_2023-04-27_18-54 + assert bool(re.search(r"nf-core-rnaseq_\d{4}-\d{2}-\d{1,2}_\d{1,2}-\d{1,2}", download_obj.outdir, re.S)) + + download_obj.output_filename = f"{download_obj.outdir}.git" + download_obj.download_workflow_tower(location=tmp_dir) + + assert download_obj.workflow_repo + assert isinstance(download_obj.workflow_repo, WorkflowRepo) + assert issubclass(type(download_obj.workflow_repo), SyncedRepo) + # corroborate that the other revisions are inaccessible to the user. + assert len(download_obj.workflow_repo.tags) == len(download_obj.revision) + + # manually test container image detection for 3.7 revision + download_obj.workflow_repo.checkout(download_obj.wf_sha["3.7"]) + assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 0 + download_obj.find_container_images(download_obj.workflow_repo.access()) + assert len(download_obj.containers) == 30 # 30 containers for 3.7 + assert ( + "https://depot.galaxyproject.org/singularity/bbmap:38.93--he522d1c_0" in download_obj.containers + ) # direct definition + assert ( + "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0" + in download_obj.containers + ) # indirect definition via $container variable. From 3c62b4d31a5a8b23ccbfadadd4834e92ceae786e Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 2 May 2023 15:52:01 +0200 Subject: [PATCH 49/63] Move alterations from Version 2.8 (which this PR didn't make anymore) to Version 2.9dev. --- CHANGELOG.md | 10 +++++----- README.md | 7 ++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94f013d7a1..dfecb7347f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,11 @@ - Remove shcema validation from `lib` folder and use Nextflow nf-validation plugin instead ([#1771](https://github.com/nf-core/tools/pull/1771/)) - Generate input channel from input file using Nextflow nf-validation plugin ([#1771](https://github.com/nf-core/tools/pull/1771/)) +### Download + +- Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). +- Refactored the CLI for `--singularity-cache` in `nf-core download` from a flag to an argument. The prior options were renamed to `amend` (container images are only saved in the `$NXF_SINGULARITY_CACHEDIR`) and `copy` (a copy of the image is saved with the download). `remote` was newly introduced and allows to provide a table of contents of a remote cache via an additional argument `--singularity-cache-index` ([#2247](https://github.com/nf-core/tools/pull/2247)). + ### Linting - Warn if container access is denied ([#2270](https://github.com/nf-core/tools/pull/2270)) @@ -51,11 +56,6 @@ - Removed `quay.io` from all module Docker container references as this is now supplied at pipeline level. ([#2249](https://github.com/nf-core/tools/pull/2249)) - Remove `CITATION.cff` file from pipeline template, to avoid that pipeline Zenodo entries reference the nf-core publication instead of the pipeline ([#2059](https://github.com/nf-core/tools/pull/2059)). -### Download - -- Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). -- Refactored the CLI for `--singularity-cache` in `nf-core download` from a flag to an argument. The prior options were renamed to `amend` (container images are only saved in the `$NXF_SINGULARITY_CACHEDIR`) and `copy` (a copy of the image is saved with the download). `remote` was newly introduced and allows to provide a table of contents of a remote cache via an additional argument `--singularity-cache-index` ([#2247](https://github.com/nf-core/tools/pull/2247)). - ### Linting - Update modules lint test to fail if enable_conda is found ([#2213](https://github.com/nf-core/tools/pull/2213)) diff --git a/README.md b/README.md index 28c764a09a..06cae66c8e 100644 --- a/README.md +++ b/README.md @@ -373,12 +373,9 @@ We highly recommend setting the `$NXF_SINGULARITY_CACHEDIR` environment variable If found, the tool will fetch the Singularity images to this directory first before copying to the target output archive / directory. Any images previously fetched will be found there and copied directly - this includes images that may be shared with other pipelines or previous pipeline version downloads or download attempts. -If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache amend`. +If you are running the download on the same system where you will be running the pipeline (eg. a shared filesystem where Nextflow won't have an internet connection at a later date), you can choose to _only_ use the cache via a prompt or cli options `--singularity-cache amend`. This instructs `nf-core download` to fetch all Singularity images to the `$NXF_SINGULARITY_CACHEDIR` directory but does _not_ copy them to the workflow archive / directory. The workflow config file is _not_ edited. This means that when you later run the workflow, Nextflow will just use the cache folder directly. -This instructs `nf-core download` to fetch all Singularity images to the `$NXF_SINGULARITY_CACHEDIR` directory but does _not_ copy them to the workflow archive / directory. -The workflow config file is _not_ edited. This means that when you later run the workflow, Nextflow will just use the cache folder directly. - -If you are downloading a workflow for a different system, you can provide information about its image cache to `nf-core download`. To avoid unnecessary downloads, choose `--singularity-cache remote` and provide a list of already available images as plain text file to `--singularity-cache-index my_list_of_remotely_available_images.txt`. To generate this list on the remote system, run `find $NXF_SINGULARITY_CACHEDIR -name "*.img" > my_list_of_remotely_available_images.txt`. +If you are downloading a workflow for a different system, you can provide information about its image cache to `nf-core download`. To avoid unnecessary container image downloads, choose `--singularity-cache remote` and provide a list of already available images as plain text file to `--singularity-cache-index my_list_of_remotely_available_images.txt`. To generate this list on the remote system, run `find $NXF_SINGULARITY_CACHEDIR -name "*.img" > my_list_of_remotely_available_images.txt`. The tool will then only download and copy images into your output directory, which are missing on the remote system. #### How the Singularity image downloads work From 3e0e24afc89fe5d98cce2baebfbe90c96208dd86 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 2 May 2023 16:12:54 +0200 Subject: [PATCH 50/63] Adding the info about remote containers to the summary log rather than showing it separately. --- nf_core/download.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index db98b17f22..cfccf2a235 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -162,6 +162,10 @@ def download_workflow(self): ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") + if self.containers_remote: + summary_log.append( + f"Successfully read {len(self.containers_remote)} containers from the remote '[blue]$NXF_SINGULARITY_CACHEDIR[/]' contents." + ) # Set an output filename now that we have the outdir if self.tower: @@ -523,10 +527,6 @@ def read_remote_containers(self): self.containers_remote.append(match.group(0)) if n_total_images == 0: raise LookupError("Could not find valid container names in the index file.") - else: - log.info( - f"Successfully read {n_total_images} containers from the remote $NXF_SINGULARITY_CACHE contents." - ) self.containers_remote = sorted(list(set(self.containers_remote))) except (FileNotFoundError, LookupError) as e: log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") @@ -825,7 +825,9 @@ def get_singularity_images(self, current_revision=""): if containers_exist: if self.singularity_cache_index is not None: - log.info(f"{len(containers_exist)} are already cached remotely and won't be retrieved.") + log.info( + f"{len(containers_exist)} containers are already cached remotely and won't be retrieved." + ) # Go through each method of fetching containers in order for container in containers_exist: progress.update(task, description="Image file exists at destination") From ec2b8c095560c82b12bfb6e47f251208b49da413 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 5 May 2023 11:48:07 +0200 Subject: [PATCH 51/63] Moved the notification about remote containers to summary_log. --- nf_core/download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index cfccf2a235..8a65252e3b 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -161,10 +161,10 @@ def download_workflow(self): f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: - summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}") + summary_log.append(f"Using [blue]$NXF_SINGULARITY_CACHEDIR[/]': {os.environ['NXF_SINGULARITY_CACHEDIR']}'") if self.containers_remote: summary_log.append( - f"Successfully read {len(self.containers_remote)} containers from the remote '[blue]$NXF_SINGULARITY_CACHEDIR[/]' contents." + f"Successfully read {len(self.containers_remote)} containers from the remote '$NXF_SINGULARITY_CACHEDIR' contents." ) # Set an output filename now that we have the outdir From 6d82c53bea1f693a450bcdfd101d1860cb4f486b Mon Sep 17 00:00:00 2001 From: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> Date: Tue, 9 May 2023 15:33:40 +0200 Subject: [PATCH 52/63] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- nf_core/download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 8a65252e3b..8705add039 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -157,7 +157,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...['+str(len(self.revision)-2)+' more revisions]...,'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...,['+str(len(self.revision)-2)+' more revisions],...,'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -420,7 +420,7 @@ def prompt_singularity_cachedir_creation(self): """ Optionally, create a permanent entry for the NXF_SINGULARITY_CACHEDIR in the terminal profile. Currently support for bash and zsh. - ToDo: "sh", "bash", "dash", "ash","csh", "tcsh", "ksh", "zsh", "fish", "cmd", "powershell", "pwsh"? + ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? """ if os.environ["SHELL"] == "/bin/bash": @@ -476,7 +476,7 @@ def prompt_singularity_cachedir_utilization(self): "However if you will transfer the downloaded files to a different system then they should be copied to the target folder." ) self.singularity_cache = questionary.select( - "[blue bold]?[/] [bold]Copy singularity images from [blue not bold]$NXF_SINGULARITY_CACHEDIR[/] to the target folder or amend new images to the cache?[/]", + "Copy singularity images from $NXF_SINGULARITY_CACHEDIR to the target folder or amend new images to the cache?", choices=["amend", "copy"], style=nf_core.utils.nfcore_question_style, ).unsafe_ask() From b52e8e120cec210eb62e7d0cc62dc8f04ff09647 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 May 2023 15:46:26 +0200 Subject: [PATCH 53/63] Fixes suggested by @mirpedrol during review. Thanks! --- README.md | 3 +-- nf_core/download.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 06cae66c8e..dacb50ebc4 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,7 @@ A python package with helper tools for the nf-core community. - [`nf-core` tools update](#update-tools) - [`nf-core list` - List available pipelines](#listing-pipelines) - [`nf-core launch` - Run a pipeline with interactive parameter prompts](#launch-a-pipeline) -- [`nf-core download` - Download pipeline for offline use](#downloading-pipelines-for-offline-use) -- [`nf-core download --tower` - Adapting downloads to Nextflow Tower](#adapting-downloads-to-nextflow-tower) +- [`nf-core download` - Download a pipeline for offline use](#downloading-pipelines-for-offline-use) - [`nf-core licences` - List software licences in a pipeline](#pipeline-software-licences) - [`nf-core create` - Create a new pipeline with the nf-core template](#creating-a-new-pipeline) - [`nf-core lint` - Check pipeline code against nf-core guidelines](#linting-a-workflow) diff --git a/nf_core/download.py b/nf_core/download.py index 8705add039..5d214d2aaf 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -423,13 +423,13 @@ def prompt_singularity_cachedir_creation(self): ToDo: "sh", "dash", "ash","csh", "tcsh", "ksh", "fish", "cmd", "powershell", "pwsh"? """ - if os.environ["SHELL"] == "/bin/bash": + if os.getenv("SHELL", "") == "/bin/bash": shellprofile_path = os.path.expanduser("~/~/.bash_profile") if not os.path.isfile(shellprofile_path): shellprofile_path = os.path.expanduser("~/.bashrc") if not os.path.isfile(shellprofile_path): shellprofile_path = False - elif os.environ["SHELL"] == "/bin/zsh": + elif os.getenv("SHELL", "") == "/bin/zsh": shellprofile_path = os.path.expanduser("~/.zprofile") if not os.path.isfile(shellprofile_path): shellprofile_path = os.path.expanduser("~/.zshenv") From 02c10609f6f963523f1194cd14374d305db7b273 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 May 2023 17:00:47 +0200 Subject: [PATCH 54/63] @mashehu suggested that downloading the containers should not be optional for Tower downloads. Given that there is the option to provide the list of remote containers to skip their download, I agree that this is reasonable. --- CHANGELOG.md | 1 + nf_core/download.py | 12 ++++++------ nf_core/modules/lint/main_nf.py | 2 +- tests/test_download.py | 8 +++++--- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfecb7347f..f466ab040e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - Remove shcema validation from `lib` folder and use Nextflow nf-validation plugin instead ([#1771](https://github.com/nf-core/tools/pull/1771/)) - Generate input channel from input file using Nextflow nf-validation plugin ([#1771](https://github.com/nf-core/tools/pull/1771/)) + ### Download - Introduce a `--tower` flag for `nf-core download` to obtain pipelines in an offline format suited for [seqeralabs® Nextflow Tower](https://cloud.tower.nf/) ([#2247](https://github.com/nf-core/tools/pull/2247)). diff --git a/nf_core/download.py b/nf_core/download.py index 5d214d2aaf..274132f02b 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -108,10 +108,10 @@ def __init__( self.force = force self.tower = tower self.include_configs = None - self.container = container if not singularity_cache_index else "singularity" - self.singularity_cache = ( - singularity_cache if not singularity_cache_index else "remote" - ) # if a singularity_cache_index is given, use the file and overrule choice. + # force download of containers if a cache index is given or download is meant to be used for Tower. + self.container = "singularity" if singularity_cache_index or bool(tower) else container + # if a singularity_cache_index is given, use the file and overrule choice. + self.singularity_cache = "remote" if singularity_cache_index else singularity_cache self.singularity_cache_index = singularity_cache_index self.parallel_downloads = parallel_downloads @@ -377,7 +377,7 @@ def prompt_config_inclusion(self): def prompt_container_download(self): """Prompt whether to download container images or not""" - if self.container is None and stderr.is_interactive: + if self.container is None and stderr.is_interactive and not self.tower: stderr.print("\nIn addition to the pipeline code, this tool can download software containers.") self.container = questionary.select( "Download software container images:", @@ -722,7 +722,7 @@ def find_container_images(self, workflow_directory): Therefore, we need to repeat the search over the contents, extract the variable name, and use it inside a new regex. To get the variable name ( ${container_id} in above example ), we match the literal word "container" and use lookbehind (reset the match). - Then we skip [^\${}]+ everything that is not $ or curly braces. The next capture group is + Then we skip [^${}]+ everything that is not $ or curly braces. The next capture group is ${ followed by any characters that are not curly braces [^{}]+ and ended by a closing curly brace (}), but only if it's not followed by any other curly braces (?![^{]*}). The latter ensures we capture the innermost variable name. diff --git a/nf_core/modules/lint/main_nf.py b/nf_core/modules/lint/main_nf.py index 8150e7e839..31b8adca3a 100644 --- a/nf_core/modules/lint/main_nf.py +++ b/nf_core/modules/lint/main_nf.py @@ -283,7 +283,7 @@ def check_process_section(self, lines, fix_version, progress_bar): self.failed.append(("docker_tag", "Unable to parse docker tag", self.main_nf)) docker_tag = NoneD if l.startswith("quay.io/"): - l_stripped = re.sub("\W+$", "", l) + l_stripped = re.sub(r"\W+$", "", l) self.failed.append( ( "container_links", diff --git a/tests/test_download.py b/tests/test_download.py index 41fb9c625f..aa2e959f3d 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -190,7 +190,7 @@ def test_remote_container_functionality(self, tmp_dir): download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't. - # test if settings are changed accordingly. + # test if the settings are changed to mandatory defaults, if an external cache index is used. assert download_obj.singularity_cache == "remote" and download_obj.container == "singularity" assert isinstance(download_obj.containers_remote, list) and len(download_obj.containers_remote) == 0 # read in the file @@ -264,9 +264,11 @@ def test_download_workflow_for_tower(self, tmp_dir): # corroborate that the other revisions are inaccessible to the user. assert len(download_obj.workflow_repo.tags) == len(download_obj.revision) - # manually test container image detection for 3.7 revision + # download_obj.download_workflow_tower(location=tmp_dir) will run container image detection for all requested revisions + assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 33 + # manually test container image detection for 3.7 revision only + download_obj.containers = [] # empty container list for the test download_obj.workflow_repo.checkout(download_obj.wf_sha["3.7"]) - assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 0 download_obj.find_container_images(download_obj.workflow_repo.access()) assert len(download_obj.containers) == 30 # 30 containers for 3.7 assert ( From 178121b88eae0f97a9b2f9e171f3b1717dfcc2cd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 26 May 2023 23:52:43 +0200 Subject: [PATCH 55/63] Bugfix: WorkflowRepo.tidy_tags() did indeed only tidy tags. However, revisions may also be branches. Therefore, I rewrote this function to account for revisions that are not releases. --- nf_core/download.py | 51 +++++++++++++++++++++++++++++++----------- nf_core/synced_repo.py | 2 +- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 274132f02b..70f61f35a4 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -259,7 +259,7 @@ def download_workflow_tower(self, location=None): ) # Remove tags for those revisions that had not been selected - self.workflow_repo.tidy_tags() + self.workflow_repo.tidy_tags_and_branches() # create a bare clone of the modified repository needed for Tower self.workflow_repo.bare_clone(os.path.join(self.outdir, self.output_filename)) @@ -1157,6 +1157,9 @@ def access(self): def checkout(self, commit): return super().checkout(commit) + def get_remote_branches(self, remote_url): + return super().get_remote_branches(remote_url) + def retry_setup_local_repo(self, skip_confirm=False): self.retries += 1 if skip_confirm or rich.prompt.Confirm.ask( @@ -1236,29 +1239,51 @@ def setup_local_repo(self, remote, location=None, in_cache=True): log.error(f"[red]Could not set up local cache of modules repository:[/]\n{e}\n") self.retry_setup_local_repo() - def tidy_tags(self): + def tidy_tags_and_branches(self): """ - Function to delete all tags that point to revisions that are not of interest to the downloader. - This allows a clutter-free experience in Tower. The commits are evidently still available. + Function to delete all tags and branches that are not of interest to the downloader. + This allows a clutter-free experience in Tower. The untagged commits are evidently still available. However, due to local caching, the downloader might also want access to revisions that had been deleted before. In that case, don't bother with re-adding the tags and rather download anew from Github. """ if self.revision and self.repo and self.repo.tags: - desired_tags = self.revision.copy() + # create a set to keep track of the revisions to process & check + desired_revisions = set(self.revision) + + # determine what needs pruning + tags_to_remove = {tag for tag in self.repo.tags if tag.name not in desired_revisions} + heads_to_remove = {head for head in self.repo.heads if head.name not in desired_revisions} + try: - for tag in self.repo.tags: - if tag.name not in self.revision: - self.repo.delete_tag(tag) - else: - desired_tags.remove(tag.name) + # delete unwanted tags from repository + for tag in tags_to_remove: + self.repo.delete_tag(tag) self.tags = self.repo.tags - if len(desired_tags) > 0: + + # switch to a revision that should be kept, because deleting heads fails, if they are checked out (e.g. "master") + self.checkout(self.revision[0]) + + # delete unwanted heads/branches from repository + for head in heads_to_remove: + self.repo.delete_head(head) + + # ensure all desired branches are available + for revision in desired_revisions: + self.checkout(revision) + self.heads = self.repo.heads + + # get all tags and available remote_branches + completed_revisions = {revision.name for revision in self.repo.heads + self.repo.tags} + + # verify that all requested revisions are available. + # a local cache might lack revisions that were deleted during a less comprehensive previous download. + if bool(desired_revisions - completed_revisions): log.info( - f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_tags)}. Downloading anew from GitHub..." + f"Locally cached version of the pipeline lacks selected revisions {', '.join(desired_revisions - completed_revisions)}. Downloading anew from GitHub..." ) self.retry_setup_local_repo(skip_confirm=True) - self.tidy_tags() + self.tidy_tags_and_branches() except (GitCommandError, InvalidGitRepositoryError) as e: log.error(f"[red]Adapting your pipeline download unfortunately failed:[/]\n{e}\n") self.retry_setup_local_repo(skip_confirm=True) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index 4bbd4f8443..f78142c031 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -196,7 +196,7 @@ def branch_exists(self): def verify_branch(self): """ - Verifies the active branch conforms do the correct directory structure + Verifies the active branch conforms to the correct directory structure """ dir_names = os.listdir(self.local_repo_dir) if "modules" not in dir_names: From 141ab7da29de8d6992c942fd28b0e9c34c6bd2c9 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 18 May 2023 14:57:30 +0200 Subject: [PATCH 56/63] rm dockerfile + add citations --- nf_core/pipeline-template/.github/CONTRIBUTING.md | 1 - nf_core/pipeline-template/CITATIONS.md | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nf_core/pipeline-template/.github/CONTRIBUTING.md b/nf_core/pipeline-template/.github/CONTRIBUTING.md index 9afdd2987b..ecdda0f86b 100644 --- a/nf_core/pipeline-template/.github/CONTRIBUTING.md +++ b/nf_core/pipeline-template/.github/CONTRIBUTING.md @@ -124,4 +124,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/nf_core/pipeline-template/CITATIONS.md b/nf_core/pipeline-template/CITATIONS.md index 740b045103..ceaba0cb5f 100644 --- a/nf_core/pipeline-template/CITATIONS.md +++ b/nf_core/pipeline-template/CITATIONS.md @@ -12,7 +12,10 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -31,5 +34,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. From 6dcf9618f996aa42d288592a27f166fd3a22557b Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 30 May 2023 10:18:09 +0200 Subject: [PATCH 57/63] rm outdated Dockerfile --- .github/CONTRIBUTING.md | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index fc73294b5a..a90c06f9a9 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -142,4 +142,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) From 2590eb5b9efbbe3fc95690a68668d01ad450b5e6 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 1 Jun 2023 13:45:37 +0200 Subject: [PATCH 58/63] add version and subpath to multiqc report comment link --- nf_core/pipeline-template/assets/multiqc_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/pipeline-template/assets/multiqc_config.yml b/nf_core/pipeline-template/assets/multiqc_config.yml index 440b0b9a3a..c3cf9647db 100644 --- a/nf_core/pipeline-template/assets/multiqc_config.yml +++ b/nf_core/pipeline-template/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > This report has been generated by the {{ name }} analysis pipeline.{% if branded %} For information about how to interpret these results, please see the - documentation.{% endif %} + documentation.{% endif %} report_section_order: "{{ name_noslash }}-methods-description": order: -1000 From 1340d424e5be6ee0b08e5bc2c4abf2dc55888253 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 1 Jun 2023 13:47:36 +0200 Subject: [PATCH 59/63] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f466ab040e..89c293bc0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Move registry definitions out of profile scope ([#2286])(https://github.com/nf-core/tools/pull/2286) - Remove `aws_tower` profile ([#2287])(https://github.com/nf-core/tools/pull/2287) - Fixed the Slack report to include the pipeline name ([#2291](https://github.com/nf-core/tools/pull/2291)) +- Fix link in the MultiQC report to point to exact version of output docs ([#2298](https://github.com/nf-core/tools/pull/2298)) - Remove shcema validation from `lib` folder and use Nextflow nf-validation plugin instead ([#1771](https://github.com/nf-core/tools/pull/1771/)) - Generate input channel from input file using Nextflow nf-validation plugin ([#1771](https://github.com/nf-core/tools/pull/1771/)) From 5473fd68d95a54f360e03c8e155842820efd52db Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 1 Jun 2023 13:49:12 +0200 Subject: [PATCH 60/63] also add version to main link --- nf_core/pipeline-template/assets/multiqc_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/pipeline-template/assets/multiqc_config.yml b/nf_core/pipeline-template/assets/multiqc_config.yml index c3cf9647db..570ed3d8e5 100644 --- a/nf_core/pipeline-template/assets/multiqc_config.yml +++ b/nf_core/pipeline-template/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the {{ name }} + This report has been generated by the {{ name }} analysis pipeline.{% if branded %} For information about how to interpret these results, please see the documentation.{% endif %} report_section_order: From b2c92f1e85fafe137bef0c77f2cde0fb3a2706ce Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 1 Jun 2023 14:03:31 +0200 Subject: [PATCH 61/63] fix linting test --- nf_core/lint/multiqc_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nf_core/lint/multiqc_config.py b/nf_core/lint/multiqc_config.py index 3378efce5f..9eff60091f 100644 --- a/nf_core/lint/multiqc_config.py +++ b/nf_core/lint/multiqc_config.py @@ -71,12 +71,13 @@ def multiqc_config(self): if "report_comment" not in ignore_configs: # Check that the minimum plugins exist and are coming first in the summary try: + version = self.nf_config.get("manifest.version", "").strip(" '\"") if "report_comment" not in mqc_yml: raise AssertionError() if mqc_yml["report_comment"].strip() != ( - f'This report has been generated by the nf-core/{self.pipeline_name} analysis pipeline. For information about how to ' - f'interpret these results, please see the documentation.' ): raise AssertionError() From db4774adf2b4108190f00130796591eae164a703 Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 1 Jun 2023 14:27:08 +0200 Subject: [PATCH 62/63] update multiqc_config during version bump --- nf_core/bump_version.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/nf_core/bump_version.py b/nf_core/bump_version.py index 129016fa38..2ab75f26bc 100644 --- a/nf_core/bump_version.py +++ b/nf_core/bump_version.py @@ -44,6 +44,17 @@ def bump_pipeline_version(pipeline_obj, new_version): ) ], ) + # multiqc_config.yaml + update_file_version( + "multiqc_config.yaml", + pipeline_obj, + [ + ( + rf"version\s*=\s*[\'\"]?{re.escape(current_version)}[\'\"]?", + f"version = '{new_version}'", + ) + ], + ) def bump_nextflow_version(pipeline_obj, new_version): From d53a975bd81bf9f8c1a67810fa611c070d903ddc Mon Sep 17 00:00:00 2001 From: mashehu Date: Thu, 1 Jun 2023 15:07:43 +0200 Subject: [PATCH 63/63] bump version in multiqc_config correctly --- nf_core/bump_version.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nf_core/bump_version.py b/nf_core/bump_version.py index 2ab75f26bc..b462ee1377 100644 --- a/nf_core/bump_version.py +++ b/nf_core/bump_version.py @@ -3,8 +3,8 @@ """ import logging -import os import re +from pathlib import Path import rich.console @@ -46,12 +46,12 @@ def bump_pipeline_version(pipeline_obj, new_version): ) # multiqc_config.yaml update_file_version( - "multiqc_config.yaml", + Path("assets", "multiqc_config.yml"), pipeline_obj, [ ( - rf"version\s*=\s*[\'\"]?{re.escape(current_version)}[\'\"]?", - f"version = '{new_version}'", + rf"{re.escape(current_version)}", + f"{new_version}", ) ], ) @@ -88,7 +88,7 @@ def bump_nextflow_version(pipeline_obj, new_version): # .github/workflows/ci.yml - Nextflow version matrix update_file_version( - os.path.join(".github", "workflows", "ci.yml"), + Path(".github", "workflows", "ci.yml"), pipeline_obj, [ (