From 4d7c502974a6ed0bb558cc23d5aad24f990f9020 Mon Sep 17 00:00:00 2001 From: Austyn Trull Date: Thu, 5 Oct 2023 09:20:02 -0500 Subject: [PATCH] Committing reformatting performed by prettier --- .nf-core.yml | 2 +- CITATIONS.md | 3 +- README.md | 16 ++--- assets/multiqc_config.yml | 146 +++++++++++++++++++------------------- 4 files changed, 83 insertions(+), 84 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index c834f7d..37d9e32 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,4 +1,4 @@ repository_type: pipeline lint: - template_strings: False # "Jinja string found in" bin/create_regex.py and bin/seurat_qc.R \ No newline at end of file + template_strings: False # "Jinja string found in" bin/create_regex.py and bin/seurat_qc.R diff --git a/CITATIONS.md b/CITATIONS.md index 563d225..7cd2465 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -19,7 +19,7 @@ > You Y, Prawer Y D, De Paoli-Iseppi R, Hunt C P, Parish C L, Shim H, Clark M B. Identification of cell barcodes from long-read single-cell RNA-seq with BLAZE. bioRxiv 2022 Aug .08.16.504056; doi: 10.1101/2022.08.16.504056. - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. - [IsoQuant](https://pubmed.ncbi.nlm.nih.gov/36593406/) @@ -87,6 +87,7 @@ > Wickham H, Averick M, Bryan J, Winston C, McGowan LD, François R, Grolemund G, Hayes A , Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H. Welcome to the Tidyverse. Journal of Open Source Software 2019, 4(43), 1686, doi:10.21105/joss.01686 ## Python libraries + - [Biopython](https://pubmed.ncbi.nlm.nih.gov/19304878/) > Cock PJ, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A, Friedberg I, Hamelryck T, Kauff F, Wilczynski B, de Hoon MJ. Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics 2009 Jun 1; 25(11):1422-3 doi:10.1093/bioinformatics/btp163. PubMed PMID: 19304878; PubMed Central PMCID: PMC2682512. diff --git a/README.md b/README.md index 33e9886..7046ab6 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ ## Introduction -**nf-core/scnanoseq** is a bioinformatics best-practice analysis pipeline for 10X Genomics single-cell/nuclei RNA-seq for data derived from Oxford Nanopore Q20+ chemistry ([R10.4 flow cells (>Q20)](https://nanoporetech.com/about-us/news/oxford-nanopore-announces-technology-updates-nanopore-community-meeting)). Due to the expectation of >Q20 quality, the input data for the pipeline is not dependent on Illumina paired data. +**nf-core/scnanoseq** is a bioinformatics best-practice analysis pipeline for 10X Genomics single-cell/nuclei RNA-seq for data derived from Oxford Nanopore Q20+ chemistry ([R10.4 flow cells (>Q20)](https://nanoporetech.com/about-us/news/oxford-nanopore-announces-technology-updates-nanopore-community-meeting)). Due to the expectation of >Q20 quality, the input data for the pipeline is not dependent on Illumina paired data. @@ -35,8 +35,8 @@ On release, automated continuous integration tests run the pipeline on a full-si 5. Pre-extraction QC in the R2 reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot)) 6. Barcode detection using a custom whitelist or 10X whitelist. [`BLAZE`](https://github.com/shimlab/BLAZE) 7. Extract barcodes. Consists of the following steps: - 1. Parse FASTQ files into R1 reads containing barcode and UMI and R2 reads containing sequencing without barcode and UMI (custom script `./bin/pre_extract_barcodes.py`) - 2. Re-zip FASTQs ([`pigz`](https://github.com/madler/pigz)) + 1. Parse FASTQ files into R1 reads containing barcode and UMI and R2 reads containing sequencing without barcode and UMI (custom script `./bin/pre_extract_barcodes.py`) + 2. Re-zip FASTQs ([`pigz`](https://github.com/madler/pigz)) 8. Post-extraction QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot)) 9. Alignment ([`minimap2`](https://github.com/lh3/minimap2)) 10. SAMtools processing including ([`SAMtools`](http://www.htslib.org/doc/samtools.html)): @@ -74,9 +74,9 @@ Each row represents a fastq file (single-end) or a pair of fastq files (paired e --> - ```console - nextflow run nf-core/scnanoseq --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` +```console +nextflow run nf-core/scnanoseq --input samplesheet.csv --outdir --genome GRCh37 -profile +``` ```bash nextflow run nf-core/scnanoseq \ @@ -106,8 +106,8 @@ We thank the following people for their extensive assistance in the development We would also like to thank the following people and groups for their support, including financial support: -* Dr. Elizabeth Worthey -* University of Alabama at Birmingham Biological Data Science Core (U-BDS), RRID:SCR_021766, https://github.com/U-BDS +- Dr. Elizabeth Worthey +- University of Alabama at Birmingham Biological Data Science Core (U-BDS), RRID:SCR_021766, https://github.com/U-BDS diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2c87ce5..fe36cdb 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -4,90 +4,88 @@ report_comment: > documentation. report_section_order: - "nf-core-scnanoseq-methods-description": - order: -1000 - software_versions: - order: -1001 - "nf-core-scnanoseq-summary": - order: -1002 - fastqc_postextract_fastqc_per_base_sequence_quality: remove - fastqc_postextract_fastqc_per_sequence_quality_scores: remove - fastqc_postextract_fastqc_per_base_sequence_content: remove - fastqc_postextract_fastqc_per_sequence_gc_content: remove - fastqc_postextract_fastqc_per_base_n_content: remove - fastqc_postextract_fastqc_sequence_length_distribution: remove - fastqc_postextract_fastqc_sequence_duplication_levels: remove - fastqc_postextract_fastqc_overrepresented_sequences: remove - fastqc_postextract_fastqc_adapter_content: remove - fastqc_postextract_fastqc_status_checks: remove + "nf-core-scnanoseq-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-scnanoseq-summary": + order: -1002 + fastqc_postextract_fastqc_per_base_sequence_quality: remove + fastqc_postextract_fastqc_per_sequence_quality_scores: remove + fastqc_postextract_fastqc_per_base_sequence_content: remove + fastqc_postextract_fastqc_per_sequence_gc_content: remove + fastqc_postextract_fastqc_per_base_n_content: remove + fastqc_postextract_fastqc_sequence_length_distribution: remove + fastqc_postextract_fastqc_sequence_duplication_levels: remove + fastqc_postextract_fastqc_overrepresented_sequences: remove + fastqc_postextract_fastqc_adapter_content: remove + fastqc_postextract_fastqc_status_checks: remove export_plots: true top_modules: - - general_stats - - custom_content - - samtools - - fastqc: - name: "FastQC (raw)" - anchor: "fastqc" - info: "This section of the report shows FastQC results of the raw data" - path_filters: - - "*raw*fastqc*" - - fastqc: - name: "FastQC (post trimmed)" - anchor: "fastqc_posttrim" - info: "This section of the report shows FastQC results of the trimmed - data" - path_filters: - - "*trimmed*fastqc*" - - fastqc: - name: "FastQC (post extract)" - anchor: "fastqc_postextract" - info: "This section of the report shows FastQC results of the trimmed - and umi extracted data" - path_filters: - - "*extracted*fastqc*" + - general_stats + - custom_content + - samtools + - fastqc: + name: "FastQC (raw)" + anchor: "fastqc" + info: "This section of the report shows FastQC results of the raw data" + path_filters: + - "*raw*fastqc*" + - fastqc: + name: "FastQC (post trimmed)" + anchor: "fastqc_posttrim" + info: "This section of the report shows FastQC results of the trimmed + data" + path_filters: + - "*trimmed*fastqc*" + - fastqc: + name: "FastQC (post extract)" + anchor: "fastqc_postextract" + info: "This section of the report shows FastQC results of the trimmed + and umi extracted data" + path_filters: + - "*extracted*fastqc*" section_comments: - fastqc_posttrim: "Please note that if 'prowler' is selected as the trimming - method, these qc's should be looked at closely. Prowler does not - always remove reads, as it will replace low quality windows with - N's, which can have notable effects on the qc's below. For more - information on the prowler algorithm, please refer to the - [Prowler paper](https://doi.org/10.1093/bioinformatics/btab630)" + fastqc_posttrim: "Please note that if 'prowler' is selected as the trimming + method, these qc's should be looked at closely. Prowler does not + always remove reads, as it will replace low quality windows with + N's, which can have notable effects on the qc's below. For more + information on the prowler algorithm, please refer to the + [Prowler paper](https://doi.org/10.1093/bioinformatics/btab630)" custom_content: - seurat_section: - parent_id: seurat_section - order: - - transcript_seurat_stats_module - - gene_seurat_stats_module + seurat_section: + parent_id: seurat_section + order: + - transcript_seurat_stats_module + - gene_seurat_stats_module custom_data: + gene_seurat_stats_module: + parent_id: seurat_section + parent_name: "Seurat Section" + parent_description: "Preliminary expression analysis summary completed with + Seurat. Note that these numbers are generated + without any filtering done on the dataset" + section_name: "Gene Seurat Stats" + file_format: "tsv" + plot_type: "table" - gene_seurat_stats_module: - parent_id: seurat_section - parent_name: "Seurat Section" - parent_description: "Preliminary expression analysis summary completed with - Seurat. Note that these numbers are generated - without any filtering done on the dataset" - section_name: "Gene Seurat Stats" - file_format: "tsv" - plot_type: "table" - - transcript_seurat_stats_module: - parent_id: seurat_section - parent_name: "Seurat Section" - parent_description: "Preliminary expression analysis summary completed with - Seurat. Note that these numbers are generated - without any filtering done on the dataset" - section_name: "Transcript Seurat Stats" - file_format: "tsv" - plot_type: "table" + transcript_seurat_stats_module: + parent_id: seurat_section + parent_name: "Seurat Section" + parent_description: "Preliminary expression analysis summary completed with + Seurat. Note that these numbers are generated + without any filtering done on the dataset" + section_name: "Transcript Seurat Stats" + file_format: "tsv" + plot_type: "table" sp: - gene_seurat_stats_module: - fn: "gene.*.tsv" - transcript_seurat_stats_module: - fn: "transcript.*.tsv" - + gene_seurat_stats_module: + fn: "gene.*.tsv" + transcript_seurat_stats_module: + fn: "transcript.*.tsv"