nf-core · nservant · Oct 16, 2019 · May 6, 2019 · May 6, 2019 · Sep 14, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -8,13 +8,12 @@ matrix:
   fast_finish: true
 
 before_install:
-  # PRs to master are only ok if coming from dev branch
-  - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
+  - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && ([ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ] || [ $TRAVIS_PULL_REQUEST_BRANCH = "patch" ]))'
   # Pull the docker image first so the test doesn't wait for this
   - docker pull nfcore/hic:dev
   # Fake the tag locally so that the pipeline runs properly
   # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1)
-  - docker tag nfcore/hic:dev nfcore/hic:1.0.0
+  - docker tag nfcore/hic:dev nfcore/hic:1.1.0
 
 install:
   # Install Nextflow
@@ -30,7 +29,7 @@ install:
   - sudo apt-get install npm && npm install -g markdownlint-cli
 
 env:
-  - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work
+  - NXF_VER='19.04.0' # Specify a minimum NF version that should be tested and work
   - NXF_VER='' # Plus: get the latest NF version and check that it works
 
 script:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,14 +1,26 @@
 # nf-core/hic: Changelog
 
+## v1.1.0 - 2019-10-15
+
+* Support 'N' base motif in restriction/ligation sites
+* Support multiple restriction enzymes/ligattion sites (comma separated) ([#31](https://github.com/nf-core/hic/issues/31))
+* Add --saveInteractionBAM option
+* Add DOI ([#29](https://github.com/nf-core/hic/issues/29))
+* Fix bug for reads extension _1/_2 ([#30](https://github.com/nf-core/hic/issues/30))
+* Update manual ([#28](https://github.com/nf-core/hic/issues/28))
+
 ## v1.0 - 2019-05-06
 
-First version of nf-core Hi-C pipeline which is a Nextflow implementation of the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/).
+First version of nf-core Hi-C pipeline which is a Nextflow implementation of
+the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/).
 Note that all HiC-Pro functionalities are not yet all implemented.
-The current version supports most protocols including Hi-C, in situ Hi-C, DNase Hi-C, Micro-C, capture-C or HiChip data.
+The current version supports most protocols including Hi-C, in situ Hi-C,
+DNase Hi-C, Micro-C, capture-C or HiChip data.
 
 In summary, this version allows :
 
-* Automatic detection and generation of annotation files based on igenomes if not provided.
+* Automatic detection and generation of annotation files based on igenomes
+if not provided.
 * Two-steps alignment of raw sequencing reads
 * Reads filtering and detection of valid interaction products
 * Generation of raw contact matrices for a set of resolutions

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -2,11 +2,17 @@
 
 ## Our Pledge
 
-In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project
+and our community a harassment-free experience for everyone, regardless of
+age, body size, disability, ethnicity, gender identity and expression, level
+of experience, nationality, personal appearance, race, religion, or sexual
+identity and orientation.
 
 ## Our Standards
 
-Examples of behavior that contributes to creating a positive environment include:
+Examples of behavior that contributes to creating a positive environment
+include:
 
 * Using welcoming and inclusive language
 * Being respectful of differing viewpoints and experiences
@@ -16,31 +22,55 @@ Examples of behavior that contributes to creating a positive environment include
 
 Examples of unacceptable behavior by participants include:
 
-* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* The use of sexualized language or imagery and unwelcome sexual attention
+or advances
 * Trolling, insulting/derogatory comments, and personal or political attacks
 * Public or private harassment
-* Publishing others' private information, such as a physical or electronic address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a professional setting
+* Publishing others' private information, such as a physical or electronic
+address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+professional setting
 
 ## Our Responsibilities
 
-Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
 
-Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
 
 ## Scope
 
-This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an
+appointed representative at an online or offline event. Representation of a
+project may be further defined and clarified by project maintainers.
 
 ## Enforcement
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team on
+[Slack](https://nf-core-invite.herokuapp.com/). The project team will review
+and investigate all complaints, and will respond in a way that it deems
+appropriate to the circumstances. The project team is obligated to maintain
+confidentiality with regard to the reporter of an incident. Further details
+of specific enforcement policies may be posted separately.
 
-Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
 
 ## Attribution
 
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 1.4, available at
+[http://contributor-covenant.org/version/1/4][version]
 
 [homepage]: http://contributor-covenant.org
 [version]: http://contributor-covenant.org/version/1/4/
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM nfcore/base
+FROM nfcore/base:1.7
 LABEL authors="Nicolas Servant" \
       description="Docker image containing all requirements for nf-core/hic pipeline"
 
@@ -7,4 +7,4 @@ RUN apt-get update && apt-get install -y gcc g++ && apt-get clean -y
 
 COPY environment.yml /
 RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/nf-core-hic-1.0.0/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-hic-1.1.0/bin:$PATH
diff --git a/README.md b/README.md
@@ -3,41 +3,99 @@
 **Analysis of Chromosome Conformation Capture data (Hi-C)**.
 
 [![Build Status](https://travis-ci.com/nf-core/hic.svg?branch=master)](https://travis-ci.com/nf-core/hic)
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.04.0-brightgreen.svg)](https://www.nextflow.io/)
 
 [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
 [![Docker](https://img.shields.io/docker/automated/nfcore/hic.svg)](https://hub.docker.com/r/nfcore/hic)
-![Singularity Container available](
-https://img.shields.io/badge/singularity-available-7E4C74.svg)
+![Singularity Container available](https://img.shields.io/badge/singularity-available-7E4C74.svg)
 
-### Introduction
-This pipeline is based on the [HiC-Pro workflow](https://github.com/nservant/HiC-Pro).
-It was designed to process Hi-C data from raw fastq files (paired-end Illumina data) to normalized contact maps.
-The current version supports most protocols, including digestion protocols as well as protocols that do not require restriction enzymes such as DNase Hi-C.
-In practice, this workflow was successfully applied to many data-sets including dilution Hi-C, in situ Hi-C, DNase Hi-C, Micro-C, capture-C, capture Hi-C or HiChip data.
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2669513.svg)](https://doi.org/10.5281/zenodo.2669513)
 
-The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker / singularity containers making installation trivial and results highly reproducible.
+## Introduction
 
-### Pipeline summary
-1. Mapping using a two steps strategy to rescue reads spanning the ligation sites (bowtie2)
+This pipeline is based on the
+[HiC-Pro workflow](https://github.com/nservant/HiC-Pro).
+It was designed to process Hi-C data from raw fastq files (paired-end Illumina
+data) to normalized contact maps.
+The current version supports most protocols, including digestion protocols as
+well as protocols that do not require restriction enzymes such as DNase Hi-C.
+In practice, this workflow was successfully applied to many data-sets including
+dilution Hi-C, in situ Hi-C, DNase Hi-C, Micro-C, capture-C, capture Hi-C or
+HiChip data.
+
+The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
+to run tasks across multiple compute infrastructures in a very portable manner.
+It comes with docker / singularity containers making installation trivial and
+results highly reproducible.
+
+## Pipeline summary
+
+1. Mapping using a two steps strategy to rescue reads spanning the ligation
+sites (bowtie2)
 2. Detection of valid interaction products
 3. Duplicates removal
 4. Create genome-wide contact maps at various resolution
 5. Contact maps normalization using the ICE algorithm (iced)
 6. Quality controls and report (MultiQC)
 7. Addition export for visualisation and downstream analysis (cooler)
 
-### Documentation
-The nf-core/hic pipeline comes with documentation about the pipeline, found in the `docs/` directory:
+## Quick Start
+
+i. Install [`nextflow`](https://nf-co.re/usage/installation)
+
+ii. Install one of [`docker`](https://docs.docker.com/engine/installation/),
+[`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or
+[`conda`](https://conda.io/miniconda.html)
+
+iii. Download the pipeline and test it on a minimal dataset with a single command
+
+```bash
+nextflow run hic -profile test,<docker/singularity/conda>
+```
+
+iv. Start running your own analysis!
+
+```bash
+nextflow run hic -profile <docker/singularity/conda> --reads '*_R{1,2}.fastq.gz' --genome GRCh37
+```
+
+See [usage docs](docs/usage.md) for all of the available options when running the pipeline.
 
-1. [Installation](docs/installation.md)
+## Documentation
+
+The nf-core/hic pipeline comes with documentation about the pipeline, found in
+the `docs/` directory:
+
+1. [Installation](https://nf-co.re/usage/installation)
 2. Pipeline configuration
-    * [Local installation](docs/configuration/local.md)
-    * [Adding your own system](docs/configuration/adding_your_own.md)
-    * [Reference genomes](docs/configuration/reference_genomes.md)  
+    * [Local installation](https://nf-co.re/usage/local_installation)
+    * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
+    * [Reference genomes](https://nf-co.re/usage/reference_genomes)
 3. [Running the pipeline](docs/usage.md)
 4. [Output and how to interpret the results](docs/output.md)
-5. [Troubleshooting](docs/troubleshooting.md)
+5. [Troubleshooting](https://nf-co.re/usage/troubleshooting)
+
+## Contributions and Support
+
+If you would like to contribute to this pipeline, please see the
+[contributing guidelines](.github/CONTRIBUTING.md).
+
+For further information or help, don't hesitate to get in touch on
+[Slack](https://nfcore.slack.com/channels/hic).
+You can join with [this invite](https://nf-co.re/join/slack).
+
+
+## Credits
 
-### Credits
 nf-core/hic was originally written by Nicolas Servant.
+
+## Citation
+
+If you use nf-core/hic for your analysis, please cite it using the following
+doi: [10.5281/zenodo.2669513](https://doi.org/10.5281/zenodo.2669513)
+
+You can cite the `nf-core` pre-print as follows:
+Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di
+Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**.
+*bioRxiv*. 2019. p. 610741.
+[doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1).
diff --git a/bin/digest_genome.py b/bin/digest_genome.py
@@ -47,6 +47,7 @@ def find_re_sites(filename, sequences, offset):
                 indices.sort()
                 all_indices.append(indices)
                 indices = []
+
             # This is a new chromosome. Empty the sequence string, and add the
             # correct chrom id
             big_str = ""
@@ -67,6 +68,7 @@ def find_re_sites(filename, sequences, offset):
                     for m in re.finditer(pattern, big_str)]
     indices.sort()
     all_indices.append(indices)
+
     return contig_names, all_indices
 
 
@@ -87,6 +89,22 @@ def find_chromsomose_lengths(reference_filename):
     return chromosome_names, np.array(chromosome_lengths)
 
 
+def replaceN(cs):
+    npos = int(cs.find('N'))
+    cseql = []
+    if npos!= -1:
+        for nuc in ["A","C","G","T"]:
+            tmp = cs.replace('N', nuc, 1)
+            tmpl = replaceN(tmp)
+            if type(tmpl)==list:
+                cseql = cseql + tmpl
+            else:
+                cseql.append(tmpl)
+    else:
+        cseql.append(cs)
+    return cseql
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('fastafile')
@@ -102,8 +120,13 @@ def find_chromsomose_lengths(reference_filename):
 
     filename = args.fastafile
     out = args.out
-    cutsites = args.res_sites
-
+
+    # Split restriction sites if comma-separated
+    cutsites=[]
+    for s in args.res_sites:
+        for m in s.split(','):
+            cutsites.append(m)
+
     # process args and get restriction enzyme sequences
     sequences = []
     offset = []
@@ -112,15 +135,34 @@ def find_chromsomose_lengths(reference_filename):
             cseq = ''.join(RE_cutsite[cs.lower()])
         else:
             cseq = cs
+
         offpos = int(cseq.find('^'))
         if offpos == -1:
             print "Unable to detect offset for", cseq
             print "Please, use '^' to specified the cutting position,",
             print "i.e A^GATCT for HindIII digestion"
             sys.exit(-1)
+
+        for nuc in list(set(cs)):
+            if nuc != 'A' and nuc != 'C' and nuc != 'G' and nuc != 'T' and nuc != 'N' and nuc != '^':
+                print "Find unexpected character ['",nuc,"']in restriction motif"
+                print "Note that multiple motifs should be separated by a space (not a comma !)"
+                sys.exit(-1)
+
         offset.append(offpos)
         sequences.append(re.sub('\^', '', cseq))
 
+    # replace all N in restriction motif
+    sequences_without_N = []
+    offset_without_N = []
+    for rs in range(len(sequences)):
+        nrs = replaceN(sequences[rs])
+        sequences_without_N = sequences_without_N + nrs
+        offset_without_N = offset_without_N + [offset[rs]] * len(nrs)
+
+    sequences = sequences_without_N
+    offset = offset_without_N
+
     if out is None:
         out = os.path.splitext(filename)[0] + "_fragments.bed"
 
@@ -129,8 +171,7 @@ def find_chromsomose_lengths(reference_filename):
     print "Offset(s)",  ','.join(str(x) for x in offset)
 
     # Read fasta file and look for rs per chromosome
-    contig_names, all_indices = find_re_sites(filename, sequences,
-                                              offset=offset)
+    contig_names, all_indices = find_re_sites(filename, sequences,  offset=offset)
     _, lengths = find_chromsomose_lengths(filename)
 
     valid_fragments = []