diff --git a/README.md b/README.md index 5f2c074..37692cf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# nf-core/hic +# ![nf-core/hic](docs/images/nfcore-hic_logo.png) **Analysis of Chromosome Conformation Capture data (Hi-C)**. diff --git a/Singularity b/Singularity deleted file mode 100644 index 927866f..0000000 --- a/Singularity +++ /dev/null @@ -1,18 +0,0 @@ -From:nfcore/base -Bootstrap:docker - -%labels - MAINTAINER Nicolas Servant - DESCRIPTION Singularity image containing all requirements for the nf-core/hic pipeline - VERSION 1.0dev - -%environment - PATH=/opt/conda/envs/nf-core-hic-1.0dev/bin:$PATH - export PATH - -%files - environment.yml / - -%post - /opt/conda/bin/conda env create -f /environment.yml - /opt/conda/bin/conda clean -a diff --git a/docs/images/nfcore-hic_logo.png b/docs/images/nfcore-hic_logo.png index 713ca9c..d75e44b 100644 Binary files a/docs/images/nfcore-hic_logo.png and b/docs/images/nfcore-hic_logo.png differ diff --git a/docs/images/nfcore-hic_logo.svg b/docs/images/nfcore-hic_logo.svg new file mode 100644 index 0000000..7a20869 --- /dev/null +++ b/docs/images/nfcore-hic_logo.svg @@ -0,0 +1,205 @@ + +image/svg+xmlnf- +core/ +hic + \ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md index 9b2bb6a..d166cf6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -51,6 +51,11 @@ * [`--splitFastq`](#--splitFastq) * [`--saveReference`](#--saveReference) * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) +* [Skip options](#skip-options) + * [--skip_maps](#--skip_maps) + * [--skip_ice](#--skip_ice) + * [--skip_cool](#--skip_cool) + * [--skip_multiqc](#--skip_multiqc) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) * [Custom resource requests](#custom-resource-requests) @@ -457,6 +462,40 @@ If specified, all intermediate mapping files are saved and exported in the resul --saveReference ``` +## Skip options + +#### `--skip_maps` + +If defined, the workflow stops with the list of valid interactions, and the genome-wide maps are not built. Usefult for capture-C analysis. Default: false + +```bash +--skip_maps +``` + +#### `--skip_ice` + +If defined, the ICE normalization is not run on the raw contact maps. Default: false + +```bash +--skip_ice +``` + +#### `--skip_cool` + +If defined, cooler files are not generated. Default: false + +```bash +--skip_cool +``` + +#### `--skip_multiqc` + +If defined, the MultiQC report is not generated. Default: false + +```bash +--skip_multiqc +``` + ## Job resources ### Automatic resubmission Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. diff --git a/main.nf b/main.nf index eeb6923..ce29fd5 100644 --- a/main.nf +++ b/main.nf @@ -67,7 +67,7 @@ def helpMessage() { Step options: --skip_maps Skip generation of contact maps. Useful for capture-C --skip_ice Skip ICE normalization - --skip_cool Skip generation of cool files + --skip_cool Skip generation of cooler files --skip_multiQC Skip MultiQC AWSBatch options: @@ -666,12 +666,17 @@ process remove_duplicates { if ( params.rm_dup ){ """ mkdir -p stats/${sample} + + ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) sort -T /tmp/ -S 50% -k2,2V -k3,3n -k5,5V -k6,6n -m ${vpairs} | \ awk -F"\\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=\$2 || c2!=\$5 || s1!=\$3 || s2!=\$6){print;c1=\$2;c2=\$5;s1=\$3;s2=\$6}' > ${sample}.allValidPairs + echo -n "valid_interaction\t" > stats/${sample}/${sample}_allValidPairs.mergestat cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat cat ${sample}.allValidPairs | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat + + ## Count short range (<20000) vs long range contacts awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> stats/${sample}/${sample}_allValidPairs.mergestat """ @@ -683,6 +688,8 @@ process remove_duplicates { cat ${vpairs} | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat echo -n "valid_interaction_rmdup\t" >> stats/${sample}/${sample}_allValidPairs.mergestat cat ${sample}.allValidPairs | wc -l >> stats/${sample}/${sample}_allValidPairs.mergestat + + ## Count short range (<20000) vs long range contacts awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} \$2 == \$5{cis=cis+1; d=\$6>\$3?\$6-\$3:\$3-\$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} \$2!=\$5{trans=trans+1}END{print "trans_interaction\\t"trans"\\ncis_interaction\\t"cis"\\ncis_shortRange\\t"sr"\\ncis_longRange\\t"lr}' ${sample}.allValidPairs >> stats/${sample}/${sample}_allValidPairs.mergestat """ }