From 51d0dfaec160c5fe56634fe051776dfc7688f70a Mon Sep 17 00:00:00 2001 From: Trestan Pillonel Date: Tue, 22 Feb 2022 17:24:30 +0100 Subject: [PATCH 01/11] use af wrapper --- mess/envs/af.yml | 5 +++++ mess/scripts/Snakefile | 23 ++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 mess/envs/af.yml diff --git a/mess/envs/af.yml b/mess/envs/af.yml new file mode 100644 index 0000000..7a694da --- /dev/null +++ b/mess/envs/af.yml @@ -0,0 +1,5 @@ +name: assembly_finder +channels: +- metagenlab +dependencies: +- assembly_finder \ No newline at end of file diff --git a/mess/scripts/Snakefile b/mess/scripts/Snakefile index cb9a2b6..cd09825 100644 --- a/mess/scripts/Snakefile +++ b/mess/scripts/Snakefile @@ -1,5 +1,7 @@ include : "simulate_reads.rules" -include : "../assembly_finder/Snakefile" + + + replicates = list(range(1, config["replicates"] + 1)) community_name = config["community_name"] @@ -11,6 +13,25 @@ if config['seq_tech'] == 'illumina' and config['read_status'] == 'single': if config['seq_tech'] == 'illumina' and config['read_status'] == 'paired': read_direction = ['R1','R2'] + +checkpoint download_assemblies: + conda: + "../envs/af.yml" + output: + f"{community_name}-assemblies-summary.tsv" + params: + input_table = config["input_table_path"], + conda_prefix = workflow.conda_prefix, + ncbi_key = config["NCBI_key"], + ncbi_email = config["NCBI_email"], + complete_assemblies = config["complete_assemblies"], + filter_rank = config["Rank_to_filter_by"], + shell: + """ + af run --nolock --input-table {params.input_table} --output_prefix genomes --conda-prefix {params.conda_prefix} --ncbi_key {params.ncbi_key} --ncbi_email {params.ncbi_email} --complete_assemblies {params.complete_assemblies} --filter_rank {params.filter_rank} + """ + + def assembly_and_bam_list(wildcards): try: directory = config['assemblies_dir'] From 9aeabdc88c314becc4111c2a2ba02bd9ea8afbf3 Mon Sep 17 00:00:00 2001 From: Trestan Pillonel Date: Tue, 22 Feb 2022 17:25:00 +0100 Subject: [PATCH 02/11] removed assembly_finder folder --- mess/assembly_finder/Snakefile | 13 - mess/assembly_finder/envs/Assembly_finder.yml | 9 - mess/assembly_finder/envs/download.yml | 5 - .../envs/singularity/aspera.Dockerfile | 27 -- .../singularity/assembly_finder.Dockerfile | 30 --- mess/assembly_finder/rules/assembly_table.py | 248 ------------------ mess/assembly_finder/rules/combine_tables.py | 13 - .../assembly_finder/rules/concat-ftp-links.py | 15 -- .../rules/find_assemblies.rules | 95 ------- mess/assembly_finder/rules/update-ete3.py | 24 -- 10 files changed, 479 deletions(-) delete mode 100644 mess/assembly_finder/Snakefile delete mode 100644 mess/assembly_finder/envs/Assembly_finder.yml delete mode 100644 mess/assembly_finder/envs/download.yml delete mode 100644 mess/assembly_finder/envs/singularity/aspera.Dockerfile delete mode 100644 mess/assembly_finder/envs/singularity/assembly_finder.Dockerfile delete mode 100644 mess/assembly_finder/rules/assembly_table.py delete mode 100644 mess/assembly_finder/rules/combine_tables.py delete mode 100644 mess/assembly_finder/rules/concat-ftp-links.py delete mode 100644 mess/assembly_finder/rules/find_assemblies.rules delete mode 100644 mess/assembly_finder/rules/update-ete3.py diff --git a/mess/assembly_finder/Snakefile b/mess/assembly_finder/Snakefile deleted file mode 100644 index ff3e995..0000000 --- a/mess/assembly_finder/Snakefile +++ /dev/null @@ -1,13 +0,0 @@ -import os -include: 'rules/find_assemblies.rules' -community_name=config['community_name'] -def downloaded_list(wildcards): - checkpoint_output = checkpoints.download_assemblies.get(**wildcards).output[0] - directory = '/'.join((checkpoint_output.split('/')[0:2])) - return expand(f'assembly_gz/{community_name}/{{i}}_genomic.fna.gz', - i=glob_wildcards(os.path.join(directory, '{i}_genomic.fna.gz')).i) - -rule all_download: - input: f"{community_name}-assemblies-summary.tsv", - downloaded_list, - f"assembly_gz/{community_name}/{community_name}.done" \ No newline at end of file diff --git a/mess/assembly_finder/envs/Assembly_finder.yml b/mess/assembly_finder/envs/Assembly_finder.yml deleted file mode 100644 index 336fe17..0000000 --- a/mess/assembly_finder/envs/Assembly_finder.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: Assembly_finder -channels: - - bioconda - - conda-forge - -dependencies: - - biopython = 1.78 - - pandas = 1.2.2 - - ete3 = 3.1.2 \ No newline at end of file diff --git a/mess/assembly_finder/envs/download.yml b/mess/assembly_finder/envs/download.yml deleted file mode 100644 index ace0629..0000000 --- a/mess/assembly_finder/envs/download.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: download -channels: - - hcc -dependencies: - - aspera-cli = 3.9.1 \ No newline at end of file diff --git a/mess/assembly_finder/envs/singularity/aspera.Dockerfile b/mess/assembly_finder/envs/singularity/aspera.Dockerfile deleted file mode 100644 index 09ece05..0000000 --- a/mess/assembly_finder/envs/singularity/aspera.Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM continuumio/miniconda3:4.7.12 - - -################## METADATA ###################### - -LABEL base.image="miniconda3:4.7.12" -LABEL version="v.1.0" -LABEL software="aspera" -LABEL software.version="3.9.1" -LABEL description="IBM aspera CLI (https://downloads.asperasoft.com/en/documentation/62) but install from a Conda environment " -LABEL tags="Genomics" - - -################## INSTALLATION ###################### -ENV DEBIAN_FRONTEND noninteractive - -COPY ./envs/download.yml ./download.yml - -RUN conda update conda && \ - conda env create -f download.yml && \ - conda clean --all --yes - - -RUN conda init bash -ENTRYPOINT ["/bin/bash"] -ENV PATH /opt/conda/envs/download/bin:$PATH -ENV CONDA_PREFIX "/opt/conda/envs/download" \ No newline at end of file diff --git a/mess/assembly_finder/envs/singularity/assembly_finder.Dockerfile b/mess/assembly_finder/envs/singularity/assembly_finder.Dockerfile deleted file mode 100644 index d6bbbde..0000000 --- a/mess/assembly_finder/envs/singularity/assembly_finder.Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM continuumio/miniconda3:4.7.12 - - -################## METADATA ###################### - -LABEL base.image="miniconda3:4.7.12" -LABEL version="v.1.1" -LABEL software="assembly_finder" -LABEL tags="Genomics" - -################## MAINTAINER ###################### - -MAINTAINER Valentin Scherz - -################## INSTALLATION ###################### -ENV DEBIAN_FRONTEND noninteractive - -COPY ./envs/Assembly_finder.yml ./Assembly_finder.yml - -RUN conda config --add channels defaults && \ - conda config --add channels bioconda && \ - conda config --add channels conda-forge && \ - conda update conda && \ - conda env create -f Assembly_finder.yml && \ - conda clean --all --yes - - -RUN conda init bash -ENTRYPOINT ["/bin/bash"] -ENV PATH /opt/conda/envs/Assembly_finder/bin:$PATH \ No newline at end of file diff --git a/mess/assembly_finder/rules/assembly_table.py b/mess/assembly_finder/rules/assembly_table.py deleted file mode 100644 index d99bc76..0000000 --- a/mess/assembly_finder/rules/assembly_table.py +++ /dev/null @@ -1,248 +0,0 @@ -from Bio import Entrez -import pandas as pd -import warnings -import numpy as np -import logging -from ete3 import NCBITaxa -ncbi = NCBITaxa() - - -class AssemblyFinder: - def __init__(self, name, isassembly=False, genbank=False, refseq=True, representative=True, reference=True, - complete=True, - exclude_metagenomes=True, nb=1, rank_to_select='None', outf='f.tsv', outnf='nf.tsv'): - self.name = name - self.assembly = isassembly - self.genbank = genbank - self.refseq = refseq - self.representative = representative - self.reference = reference - self.complete = complete - self.exclude_metagenomes = exclude_metagenomes - self.target_ranks = ['strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom'] - self.nchunks = 10000 - self.rank_to_select = rank_to_select - self.nb = nb - self.outf = outf - self.outnf = outnf - logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', datefmt='%d %b %Y %H:%M:%S', - filename=snakemake.log[0], level=logging.DEBUG) - - # Static methods to apply functions on assembly summary table - def get_stat(self, meta, stat): - """ - function to extract assembly Meta stats (contig count, assembly length) - """ - return meta.split(f' ')[1].split('')[0] - - def get_names(self, gbftp): - """ - function to extract assembly file names - """ - return gbftp.split('/')[-1] - - def get_lin_tax(self, lineages): - """ - function to get lineages from a list of taxids - """ - ranks = ncbi.get_rank(lineages).values() - ranknames = ncbi.get_taxid_translator(lineages).values() - return dict(zip(ranks, ranknames)) - - def replace_nans(self, tb): - """ - function to replace unknown taxonomic rank with placeholder names - """ - tb = tb.replace(np.nan, 'unknown') - for i in range(len(tb)): - for n, col in enumerate(tb.columns): - if tb.iloc[i, n] == 'unknown' and col != 'superkingdom': - tmpname = tb.iloc[i, n - 1] + '_' + col[0] - if col == 'species': - tmpname = tb.iloc[i, n - 1] + '_' + col[0:1] - tb.iloc[i, n] = tmpname - return tb - - def chunks(self, ls, n): - """ - function to split assembly list into chunks - """ - return [ls[i:i + n] for i in range(0, len(ls), n)] - - def taxid_find(self): - """ - Function to parse input_list to convert scientific names to taxid - returns dictionary with taxid found - """ - logging.info(f'> Searching for taxIDs {self.name} ...') - try: - int(self.name) - logging.info('Query is a taxID') - taxid = self.name - - except ValueError: - logging.warning('Query is not a taxID, enter taxID to be more precise') - logging.info(f'Search term: {self.name}[all Names]') - taxid_list = Entrez.read(Entrez.esearch(db='taxonomy', term=f'{self.name}[all Names]', retmax=100))[ - 'IdList'] - if len(taxid_list) == 1: - taxid = taxid_list[0] - logging.info(f'TaxID:{taxid} found') - if len(taxid_list) > 1: - taxid = taxid_list[0] - logging.warning(f'{len(taxid_list)} TaxIDs found, change query (taking first one : {taxid})') - if len(taxid_list) == 0: - raise Exception('TaxID not found! Change search term!') - return taxid - - def search_assemblies(self): - if self.assembly: # If the input is an assembly name or Gbuid use it as a search term - search_term = f'{self.name}' - else: # If not, search ncbi taxonomy for the taxid - taxid = self.taxid_find() - search_term = f'txid{taxid}[Organism:exp] ' - if self.refseq and not self.genbank: - search_term += 'AND ("latest refseq"[filter] ' - if self.genbank and not self.refseq: - search_term += 'AND ("latest genbank"[filter] ' - if self.genbank and self.refseq: - search_term += 'AND (latest[filter] ' - if self.complete and not self.representative and not self.reference: - search_term += 'AND "complete genome"[filter] ' - if self.complete and self.representative and not self.reference: - search_term += 'AND "complete genome"[filter] OR "representative genome"[filter] ' - if self.complete and self.representative and self.reference: - search_term += 'AND "complete genome"[filter] OR "representative genome"[filter] OR ' \ - '"reference genome"[filter] ' - if self.representative and not self.reference: - search_term += 'AND "representative genome"[filter] ' - if self.reference and not self.representative: - search_term += 'AND "reference genome"[filter] ' - if self.representative and self.reference: - search_term += 'AND "representative genome"[filter] OR "reference genome"[filter] ' - if self.exclude_metagenomes: - search_term += 'AND all[filter] NOT metagenome[filter])' - assembly_ids = Entrez.read(Entrez.esearch(db='assembly', term=search_term, retmax=500000))['IdList'] - logging.info(f'> Search term: {search_term}') - logging.info(f'found {len(assembly_ids)} assemblies') - if not assembly_ids: - raise Exception('No assemblies found ! Change search term!') - return assembly_ids - - def generate_assembly_table(self, assemblies): - assembly_list = ','.join(assemblies) - assembly_summary = Entrez.read(Entrez.esummary(db='assembly', id=assembly_list), validate=False) - summaries = assembly_summary['DocumentSummarySet']['DocumentSummary'] - tb = pd.DataFrame.from_records(summaries) - columns = ['GbUid', 'RefSeq_category', 'AssemblyStatus', 'FtpPath_GenBank', 'FtpPath_RefSeq', 'Meta', - 'AsmReleaseDate_GenBank', 'ContigN50', 'ScaffoldN50', 'Coverage', 'Taxid'] - subset = tb[columns] - lens = subset.apply(lambda x: self.get_stat(x['Meta'], stat='total_length'), axis=1) - contigs = subset.apply(lambda x: self.get_stat(x['Meta'], stat='contig_count'), axis=1) - subset.insert(loc=subset.shape[1] - 1, value=lens, column='Assembly_length') - subset.insert(loc=subset.shape[1] - 1, value=contigs, column='Contig_count') - subset.insert(loc=1, value=subset['FtpPath_GenBank'].apply(self.get_names), column='AssemblyNames') - subset = subset.rename(columns={'Coverage': 'Assembly_coverage'}) - subset = subset.drop('Meta', axis=1) - return subset - - def add_lineage(self, assembly_tb): - unique_taxids = list(set(assembly_tb['Taxid'])) - taxid2lineage = ncbi.get_lineage_translator(unique_taxids) - tax = {taxid: self.get_lin_tax(lineage) for taxid, lineage in taxid2lineage.items()} - lineage_tb = pd.DataFrame.from_dict(tax, orient='index') - lineage_tb.index.set_names('Taxid', inplace=True) - lineage_tb.reset_index(inplace=True) - ordered_ranks = self.target_ranks[::-1] - ordered_ranks.append('Taxid') - lin_cols = list(lineage_tb.columns) - all_cols = list(set().union(lin_cols, ordered_ranks)) - lineage_tb = lineage_tb.reindex(columns=all_cols, fill_value=np.nan) - lineage_tb = lineage_tb[ordered_ranks] - lineage_tb = self.replace_nans(lineage_tb) - lineage_tb = lineage_tb.astype({'Taxid': 'string'}) - merged_table = assembly_tb.merge(lineage_tb, on='Taxid') - return merged_table - - def select_assemblies(self, table): - fact_table = table.replace({'RefSeq_category': {'reference genome': 0, 'representative genome': 1, 'na': 6}, - 'AssemblyStatus': {'Complete Genome': 2, 'Chromosome': 3, 'Scaffold': 4, - 'Contig': 5, 'na': 6}}) - sorted_table = fact_table.sort_values(['RefSeq_category', 'AssemblyStatus', 'Contig_count', - 'ScaffoldN50', 'ContigN50', 'AsmReleaseDate_GenBank'], - ascending=[True, True, True, False, False, False]) - if self.rank_to_select != 'None': - logging.info(f'Filtering according to {self.rank_to_select}, Refseq categories, assembly status, ' - f'contig count and release date') - select_index = [] - unique_list = list(set(sorted_table[self.rank_to_select])) - if len(unique_list) > 1: - for i in unique_list: - select_index.append(sorted_table[sorted_table[self.rank_to_select] == i].sample(1).index[0]) - # randomly select one assembly ID for each unique selected rank (species for example) - sorted_table = sorted_table.loc[select_index, :] - if len(unique_list) == 1: - logging.info(f'Same {self.rank_to_select} for all assemblies, Filtering according to Refseq ' - f'categories, assembly status,contig count and release date') - if len(unique_list) == 0: - logging.error(f'{self.rank_to_select} is not a target rank') - else: - logging.info('No taxonomic rank specified, sorting according to Refseq category, ' - 'assembly status, contig count and release date') - if len(sorted_table) >= self.nb: - logging.info(f'Selecting {self.nb} sorted assemblies out of {len(sorted_table)}') - sorted_table = sorted_table[0:self.nb] - if len(sorted_table) < self.nb: - logging.warning(f'Found less than {self.nb} assemblies in total, returning {len(sorted_table)} instead') - sorted_table = sorted_table.replace({'RefSeq_category': {0: 'reference genome', 1: 'representative genome', - 6: 'na'}, - 'AssemblyStatus': {2: 'Complete Genome', 3: 'Chromosome', 4: 'Scaffold', - 5: 'Contig', 6: 'na'}}) - return sorted_table - - def run(self): - assemblies_found = self.search_assemblies() - if len(assemblies_found) > self.nchunks: - warnings.warn(f'{len(assemblies_found)} assemblies found, restrict search term to find less assemblies') - assemblies_chunks = self.chunks(assemblies_found, - self.nchunks) # Divide assembly lists by chunks of 10000 - logging.info(f'Parsing assemblies by chucks of {self.nchunks}') - table_chunks = [] - for n, chunk in enumerate(assemblies_chunks): - logging.info(f'chunk n°{n}') - assembly_tb = self.generate_assembly_table(chunk) - tb = self.add_lineage(assembly_tb) - table_chunks.append(tb) - non_filtered_tb = pd.concat(table_chunks, sort=False) - else: - assembly_tb = self.generate_assembly_table(assemblies_found) - non_filtered_tb = self.add_lineage(assembly_tb) - - non_filtered_tb.to_csv(self.outnf, sep='\t', index=None) - filtered_tb = self.select_assemblies(non_filtered_tb) - filtered_tb.to_csv(self.outf, sep='\t', index=None) - return filtered_tb - - -''' -Main -''' -Entrez.email = snakemake.params['ncbi_email'] -Entrez.api_key = snakemake.params['ncbi_key'] -comp = snakemake.params['comp'] -ref = snakemake.params['ref'] -rep = snakemake.params['rep'] -met = snakemake.params['met'] -gb = snakemake.params['gb'] -rs = snakemake.params['rs'] -entry = snakemake.wildcards.entry -assembly = snakemake.params['assembly'] -column = snakemake.params['column'] -rank = snakemake.params['rank_filter'] -intb = pd.read_csv(snakemake.input[0], sep='\t', dtype={f'{column}': 'str'}) -intb.set_index(f'{column}', inplace=True) -nb = int(intb.loc[entry]['nb_genomes']) -find_assemblies = AssemblyFinder(name=entry, isassembly=assembly, genbank=gb, refseq=rs, representative=rep, - reference=ref, complete=comp, exclude_metagenomes=met, nb=nb, rank_to_select=rank, - outnf=snakemake.output.all, outf=snakemake.output.filtered) -find_assemblies.run() diff --git a/mess/assembly_finder/rules/combine_tables.py b/mess/assembly_finder/rules/combine_tables.py deleted file mode 100644 index 2418704..0000000 --- a/mess/assembly_finder/rules/combine_tables.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas as pd -""" -Main -""" -column = snakemake.params['column'] -df_list = [] -for file in snakemake.input: - entry = file.split('/')[1].split('-filtered.tsv')[0] - tb = pd.read_csv(file, sep='\t') - tb.insert(loc=0, column=f'{column}', value=[entry]*len(tb)) - df_list.append(tb) -df = pd.concat(df_list, sort=False) -df.to_csv(snakemake.output[0], sep='\t', index=None) diff --git a/mess/assembly_finder/rules/concat-ftp-links.py b/mess/assembly_finder/rules/concat-ftp-links.py deleted file mode 100644 index 7b88b0e..0000000 --- a/mess/assembly_finder/rules/concat-ftp-links.py +++ /dev/null @@ -1,15 +0,0 @@ -import pandas as pd -""" -Main -""" -ftplinks = pd.read_csv(snakemake.input[0], sep='\t')['FtpPath_GenBank'] -links = [] -for link in ftplinks: - link = link.replace('ftp://ftp.ncbi.nlm.nih.gov', '') - fna = '/' + link.split('/')[-1]+'_genomic.fna.gz\n' - link += fna - links.append(link) - -f = open(snakemake.output[0], "w") -f.writelines(links) -f.close() diff --git a/mess/assembly_finder/rules/find_assemblies.rules b/mess/assembly_finder/rules/find_assemblies.rules deleted file mode 100644 index 26ccad1..0000000 --- a/mess/assembly_finder/rules/find_assemblies.rules +++ /dev/null @@ -1,95 +0,0 @@ -import pandas as pd -community_name=config['community_name'] -try: - entries = list(pd.read_csv(config['input_table_path'],delimiter='\t')['TaxonomyInput']) - isassembly = False - col = 'TaxonomyInput' -except KeyError: - entries = list(pd.read_csv(config['input_table_path'],delimiter='\t')['AssemblyInput']) - isassembly = True - col = 'AssemblyInput' - -rule check_for_update_ete3: - conda: '../envs/Assembly_finder.yml' - - container: "docker://metagenlab/assemblyfinder:v.1.1" - - output: temp('ete3-update.txt') - - log: 'logs/ete3/update.log' - - script: 'update-ete3.py' - -rule get_assembly_tables: - conda: '../envs/Assembly_finder.yml' - - container: "docker://metagenlab/assemblyfinder:v.1.1" - - input: config["input_table_path"], - 'ete3-update.txt' - - output: all='tables/{entry}-all.tsv', - filtered='tables/{entry}-filtered.tsv' - - params: ncbi_key = config['NCBI_key'], ncbi_email = config['NCBI_email'], - comp=config['complete_assemblies'], ref=config['reference_assemblies'], - rep=config['representative_assemblies'], met=config['exclude_from_metagenomes'], - gb=config['Genbank_assemblies'], rs=config['Refseq_assemblies'], rank_filter=config['Rank_to_filter_by'], - assembly = isassembly, column = col - - resources: ncbi_requests=1 - - log: 'logs/find-assemblies/{entry}.log' - - benchmark: "benchmark/find-assemblies/{entry}.txt" - - script: 'assembly_table.py' - - -rule combine_assembly_tables: - conda: '../envs/Assembly_finder.yml' - - container: "docker://metagenlab/assemblyfinder:v.1.1" - - input: expand('tables/{entry}-filtered.tsv',entry=entries) - - output: f'{community_name}-assemblies-summary.tsv' - - params: column=col - - script: 'combine_tables.py' - - -rule get_ftp_links_list: - conda: '../envs/Assembly_finder.yml' - - container: "docker://metagenlab/assemblyfinder:v.1.1" - - input: f'{community_name}-assemblies-summary.tsv' - - output: temp(f"assembly_gz/{community_name}/{community_name}-ftp-links.txt") - - script: "concat-ftp-links.py" - - -checkpoint download_assemblies: - conda: "../envs/download.yml" - - container: "docker://metagenlab/aspera-cli-conda:v.1.0" - - input: f"assembly_gz/{community_name}/{community_name}-ftp-links.txt" - - output: f"assembly_gz/{community_name}/{community_name}.done" - - log: f"logs/downloads/{community_name}.log" - - benchmark: f"benchmark/downloads/{community_name}.txt" - - params: f"assembly_gz/{community_name}" - - shell: - """ - ascp -T -k 1 -i ${{CONDA_PREFIX}}/etc/asperaweb_id_dsa.openssh --mode=recv --user=anonftp \ - --host=ftp.ncbi.nlm.nih.gov --file-list={input} {params} &>> {log} - touch {output} - """ \ No newline at end of file diff --git a/mess/assembly_finder/rules/update-ete3.py b/mess/assembly_finder/rules/update-ete3.py deleted file mode 100644 index 11bce5b..0000000 --- a/mess/assembly_finder/rules/update-ete3.py +++ /dev/null @@ -1,24 +0,0 @@ -import sys -import os -from datetime import datetime -from ete3 import NCBITaxa -with open(snakemake.log[0], "w") as f: - sys.stderr = sys.stdout = f - ncbi = NCBITaxa() - sqldb = os.path.join(os.environ['HOME'], '.etetoolkit', 'taxa.sqlite') # path to ete sql db - db_modification_time = datetime.fromtimestamp(os.path.getctime(sqldb)) - database_age_days = abs((db_modification_time-datetime.now()).days) - if database_age_days >= 10: - ncbi.update_taxonomy_database() - comment = f'taxa.sqlite is more than {database_age_days} days old, updating database' - else: - comment = 'taxa.sqlite is up to date' - file = open(snakemake.output[0], 'w') - file.write(comment) - file.close() - - - - - - From f77da2a3250790a8b35939d3ad22b43e1b09c273 Mon Sep 17 00:00:00 2001 From: Trestan Pillonel Date: Tue, 22 Feb 2022 18:06:11 +0100 Subject: [PATCH 03/11] added ouput prefix --- mess/scripts/Snakefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mess/scripts/Snakefile b/mess/scripts/Snakefile index cd09825..e100fa0 100644 --- a/mess/scripts/Snakefile +++ b/mess/scripts/Snakefile @@ -26,9 +26,10 @@ checkpoint download_assemblies: ncbi_email = config["NCBI_email"], complete_assemblies = config["complete_assemblies"], filter_rank = config["Rank_to_filter_by"], + output_prefix = f"{community_name}" shell: """ - af run --nolock --input-table {params.input_table} --output_prefix genomes --conda-prefix {params.conda_prefix} --ncbi_key {params.ncbi_key} --ncbi_email {params.ncbi_email} --complete_assemblies {params.complete_assemblies} --filter_rank {params.filter_rank} + af run --nolock --input-table {params.input_table} --output_prefix {params.output_prefix} --conda-prefix {params.conda_prefix} --ncbi_key {params.ncbi_key} --ncbi_email {params.ncbi_email} --complete_assemblies {params.complete_assemblies} --filter_rank {params.filter_rank} """ From 11b91a788b44fc04b1bb75647911462102912ae5 Mon Sep 17 00:00:00 2001 From: Trestan Pillonel Date: Wed, 23 Feb 2022 10:08:43 +0100 Subject: [PATCH 04/11] removed individual conda envs --- mess/envs/af.yml | 5 ----- mess/envs/art.yml | 5 ----- mess/envs/bioconvert.yml | 5 ----- mess/envs/krona.yml | 5 ----- mess/envs/pbsim2.yml | 5 ----- mess/envs/seqkit.yml | 5 ----- mess/envs/utils.yml | 7 ------- mess/scripts/Snakefile | 7 +------ mess/scripts/simulate_reads.rules | 9 --------- messenv.yml | 12 +++++++++++- 10 files changed, 12 insertions(+), 53 deletions(-) delete mode 100644 mess/envs/af.yml delete mode 100644 mess/envs/art.yml delete mode 100644 mess/envs/bioconvert.yml delete mode 100644 mess/envs/krona.yml delete mode 100644 mess/envs/pbsim2.yml delete mode 100644 mess/envs/seqkit.yml delete mode 100644 mess/envs/utils.yml diff --git a/mess/envs/af.yml b/mess/envs/af.yml deleted file mode 100644 index 7a694da..0000000 --- a/mess/envs/af.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: assembly_finder -channels: -- metagenlab -dependencies: -- assembly_finder \ No newline at end of file diff --git a/mess/envs/art.yml b/mess/envs/art.yml deleted file mode 100644 index 08bf34b..0000000 --- a/mess/envs/art.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: art -channels: - - bioconda -dependencies: - - art = 2016.06.05 \ No newline at end of file diff --git a/mess/envs/bioconvert.yml b/mess/envs/bioconvert.yml deleted file mode 100644 index e41c916..0000000 --- a/mess/envs/bioconvert.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: samtools -channels: - - bioconda -dependencies: - - bioconvert = 0.4.3 \ No newline at end of file diff --git a/mess/envs/krona.yml b/mess/envs/krona.yml deleted file mode 100644 index cf24710..0000000 --- a/mess/envs/krona.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: krona -channels: - - bioconda -dependencies: - - krona = 2.8 \ No newline at end of file diff --git a/mess/envs/pbsim2.yml b/mess/envs/pbsim2.yml deleted file mode 100644 index 7bbf1b4..0000000 --- a/mess/envs/pbsim2.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: pbsim2 -channels: - - bioconda -dependencies: - - pbsim2 = 2.0.1 \ No newline at end of file diff --git a/mess/envs/seqkit.yml b/mess/envs/seqkit.yml deleted file mode 100644 index 7213592..0000000 --- a/mess/envs/seqkit.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: seqkit -channels: - - bioconda -dependencies: - - seqkit = 0.16.0 \ No newline at end of file diff --git a/mess/envs/utils.yml b/mess/envs/utils.yml deleted file mode 100644 index 537ca23..0000000 --- a/mess/envs/utils.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: pandas-numpy -channels: - - conda-forge -dependencies: - - pandas = 1.2.2 - - numpy = 1.20.1 - - biopython = 1.78 \ No newline at end of file diff --git a/mess/scripts/Snakefile b/mess/scripts/Snakefile index e100fa0..2d16c8e 100644 --- a/mess/scripts/Snakefile +++ b/mess/scripts/Snakefile @@ -1,8 +1,6 @@ include : "simulate_reads.rules" - - replicates = list(range(1, config["replicates"] + 1)) community_name = config["community_name"] read_direction = [] @@ -15,13 +13,10 @@ if config['seq_tech'] == 'illumina' and config['read_status'] == 'paired': checkpoint download_assemblies: - conda: - "../envs/af.yml" output: f"{community_name}-assemblies-summary.tsv" params: input_table = config["input_table_path"], - conda_prefix = workflow.conda_prefix, ncbi_key = config["NCBI_key"], ncbi_email = config["NCBI_email"], complete_assemblies = config["complete_assemblies"], @@ -29,7 +24,7 @@ checkpoint download_assemblies: output_prefix = f"{community_name}" shell: """ - af run --nolock --input-table {params.input_table} --output_prefix {params.output_prefix} --conda-prefix {params.conda_prefix} --ncbi_key {params.ncbi_key} --ncbi_email {params.ncbi_email} --complete_assemblies {params.complete_assemblies} --filter_rank {params.filter_rank} + af run --nolock --input-table {params.input_table} --output_prefix {params.output_prefix} --ncbi_key {params.ncbi_key} --ncbi_email {params.ncbi_email} --complete_assemblies {params.complete_assemblies} --filter_rank {params.filter_rank} """ diff --git a/mess/scripts/simulate_reads.rules b/mess/scripts/simulate_reads.rules index 918d536..445cd6c 100644 --- a/mess/scripts/simulate_reads.rules +++ b/mess/scripts/simulate_reads.rules @@ -82,7 +82,6 @@ rule decompress_assemblies: rule merge_contigs: - conda: "../envs/utils.yml" input: f'assembly_gz/{community_name}/{{assemblyname}}_genomic.fna' @@ -104,7 +103,6 @@ if config['seq_tech']=='illumina': if config['read_status']=='paired': rule generate_illumina_paired_reads: - conda: "../envs/art.yml" input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -135,7 +133,6 @@ if config['seq_tech']=='illumina': if config['read_status']=='single': rule generate_illumina_single_reads: - conda: "../envs/art.yml" input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -169,7 +166,6 @@ if config['seq_tech']=='illumina': rule convert_sam_to_bam_files: - conda: "../envs/bioconvert.yml" input: get_sam(config['read_status']) @@ -193,7 +189,6 @@ elif config['seq_tech']=='ont' or config['seq_tech']=='pacbio': return cov rule generate_long_reads: - conda: '../envs/pbsim2.yml' input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -226,7 +221,6 @@ elif config['seq_tech']=='ont' or config['seq_tech']=='pacbio': --length-sd {params.sd_read_len} {input.fa} &> {log} """ rule convert_maf_to_bam_files: - conda: "../envs/bioconvert.yml" input: 'simreads/{community}-{rep}-{assemblyname}_0001.maf' @@ -273,7 +267,6 @@ rule concat_fastqs: rule shuffle_fastqs: - conda: "../envs/seqkit.yml" input: 'simreads/{community}-{rep}_{rd}.unshuffled.fq' @@ -294,7 +287,6 @@ rule shuffle_fastqs: rule get_taxonomic_profile: - conda: "../envs/utils.yml" input: reads='simreads/{community}-{rep}_{rd}.fq.gz', table='readcounts-{community}-{rep}.tsv' @@ -310,7 +302,6 @@ rule get_taxonomic_profile: rule create_krona_chart: - conda: "../envs/krona.yml" input: 'krona/{community}-{rep}_{rd}.txt' diff --git a/messenv.yml b/messenv.yml index 5e01596..9119832 100644 --- a/messenv.yml +++ b/messenv.yml @@ -3,7 +3,17 @@ channels: - bioconda - conda-forge - metagenlab + - HCC dependencies: - - snakemake-minimal = 6.2.1 + - aspera-cli = 3.9.1 + - assembly_finder = 0.2.0 + - ete3 = 3.1.2 + - art = 2016.06.05 + - bioconvert = 0.4.3 - click = 7 + - krona = 2.8 - pandas = 1.2.2 + - pbsim2 = 2.0.1 + - snakemake-minimal = 6.2.1 + - seqkit = 0.16.0 + - biopython = 1.78 \ No newline at end of file From e7f1c34794c1f27a9e21f37b2a78d8b5e82ed881 Mon Sep 17 00:00:00 2001 From: rdchaabafa Date: Wed, 23 Feb 2022 13:44:00 +0100 Subject: [PATCH 05/11] added all dependencies in messenv.yml --- mess/scripts/Snakefile | 5 ----- mess/scripts/simulate_reads.rules | 18 ------------------ messenv.yml | 14 +++++++++++++- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/mess/scripts/Snakefile b/mess/scripts/Snakefile index e100fa0..77bec55 100644 --- a/mess/scripts/Snakefile +++ b/mess/scripts/Snakefile @@ -1,8 +1,5 @@ include : "simulate_reads.rules" - - - replicates = list(range(1, config["replicates"] + 1)) community_name = config["community_name"] read_direction = [] @@ -15,8 +12,6 @@ if config['seq_tech'] == 'illumina' and config['read_status'] == 'paired': checkpoint download_assemblies: - conda: - "../envs/af.yml" output: f"{community_name}-assemblies-summary.tsv" params: diff --git a/mess/scripts/simulate_reads.rules b/mess/scripts/simulate_reads.rules index 918d536..411263b 100644 --- a/mess/scripts/simulate_reads.rules +++ b/mess/scripts/simulate_reads.rules @@ -82,8 +82,6 @@ rule decompress_assemblies: rule merge_contigs: - conda: "../envs/utils.yml" - input: f'assembly_gz/{community_name}/{{assemblyname}}_genomic.fna' output: temp('assembly_gz/{community,[-_0-9a-zA-Z]+}-{assemblyname,[0-9a-zA-Z._-]+}.fa') @@ -104,8 +102,6 @@ if config['seq_tech']=='illumina': if config['read_status']=='paired': rule generate_illumina_paired_reads: - conda: "../envs/art.yml" - input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -135,8 +131,6 @@ if config['seq_tech']=='illumina': if config['read_status']=='single': rule generate_illumina_single_reads: - conda: "../envs/art.yml" - input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -169,8 +163,6 @@ if config['seq_tech']=='illumina': rule convert_sam_to_bam_files: - conda: "../envs/bioconvert.yml" - input: get_sam(config['read_status']) output: 'bam/{community,[-_0-9a-zA-Z]+}-{rep,[0-9]+}-{assemblyname,[0-9a-zA-Z._-]+}.bam' @@ -193,8 +185,6 @@ elif config['seq_tech']=='ont' or config['seq_tech']=='pacbio': return cov rule generate_long_reads: - conda: '../envs/pbsim2.yml' - input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' output: temp('simreads/{community,[-_0-9a-zA-Z]+}-{rep,[0-9]+}-{assemblyname,[0-9a-zA-Z._-]+}_0001.fastq'), @@ -226,8 +216,6 @@ elif config['seq_tech']=='ont' or config['seq_tech']=='pacbio': --length-sd {params.sd_read_len} {input.fa} &> {log} """ rule convert_maf_to_bam_files: - conda: "../envs/bioconvert.yml" - input: 'simreads/{community}-{rep}-{assemblyname}_0001.maf' output: sam=temp('bam/{community,[-_0-9a-zA-Z]+}-{rep,[0-9]+}-{assemblyname,[0-9a-zA-Z._-]+}.sam'), @@ -273,8 +261,6 @@ rule concat_fastqs: rule shuffle_fastqs: - conda: "../envs/seqkit.yml" - input: 'simreads/{community}-{rep}_{rd}.unshuffled.fq' output: 'simreads/{community}-{rep}_{rd}.fq.gz' @@ -294,8 +280,6 @@ rule shuffle_fastqs: rule get_taxonomic_profile: - conda: "../envs/utils.yml" - input: reads='simreads/{community}-{rep}_{rd}.fq.gz', table='readcounts-{community}-{rep}.tsv' @@ -310,8 +294,6 @@ rule get_taxonomic_profile: rule create_krona_chart: - conda: "../envs/krona.yml" - input: 'krona/{community}-{rep}_{rd}.txt' output: 'krona/{community}-{rep}_{rd}.html' diff --git a/messenv.yml b/messenv.yml index 5e01596..4e755e3 100644 --- a/messenv.yml +++ b/messenv.yml @@ -3,7 +3,19 @@ channels: - bioconda - conda-forge - metagenlab + - HCC dependencies: - - snakemake-minimal = 6.2.1 + - aspera-cli = 3.9.1 + - assembly_finder = 0.2.0 + - ete3 = 3.1.2 + - art = 2016.06.05 + - bioconvert = 0.4.3 - click = 7 + - krona = 2.8 - pandas = 1.2.2 + - pbsim2 = 2.0.1 + - snakemake-minimal = 6.2.1 + - seqkit = 0.16.0 + - biopython = 1.78 + + From 5fe68c237138c5ac08f6b2873bfd785c3cfec5b2 Mon Sep 17 00:00:00 2001 From: rdchaabafa Date: Wed, 23 Feb 2022 13:49:58 +0100 Subject: [PATCH 06/11] added hmp data templates --- .../buccal_mucosa/SRS013506-sim.tsv | 354 +++ .../buccal_mucosa/SRS013506-sim.yml | 20 + .../buccal_mucosa/SRS019352-sim.tsv | 120 + .../buccal_mucosa/SRS019352-sim.yml | 20 + .../buccal_mucosa/SRS050029-sim.tsv | 224 ++ .../buccal_mucosa/SRS050029-sim.yml | 20 + .../buccal_mucosa/SRS054569-sim.tsv | 305 +++ .../buccal_mucosa/SRS054569-sim.yml | 20 + .../buccal_mucosa/SRS056042-sim.tsv | 108 + .../buccal_mucosa/SRS056042-sim.yml | 20 + data/hmp_templates/gut/SRS1041031-sim.tsv | 990 ++++++++ data/hmp_templates/gut/SRS1041031-sim.yml | 20 + data/hmp_templates/gut/SRS1041129-sim.tsv | 719 ++++++ data/hmp_templates/gut/SRS1041129-sim.yml | 20 + data/hmp_templates/gut/SRS104912-sim.tsv | 1705 ++++++++++++++ data/hmp_templates/gut/SRS104912-sim.yml | 20 + data/hmp_templates/gut/SRS105082-sim.tsv | 2084 +++++++++++++++++ data/hmp_templates/gut/SRS105082-sim.yml | 20 + data/hmp_templates/gut/SRS147088-sim.tsv | 227 ++ data/hmp_templates/gut/SRS147088-sim.yml | 20 + data/hmp_templates/throat/SRS014475-sim.tsv | 72 + data/hmp_templates/throat/SRS014475-sim.yml | 20 + data/hmp_templates/throat/SRS014689-sim.tsv | 345 +++ data/hmp_templates/throat/SRS014689-sim.yml | 20 + data/hmp_templates/throat/SRS065335-sim.tsv | 150 ++ data/hmp_templates/throat/SRS065335-sim.yml | 20 + data/hmp_templates/throat/SRS143032-sim.tsv | 40 + data/hmp_templates/throat/SRS143032-sim.yml | 20 + data/hmp_templates/throat/SRS144378-sim.tsv | 86 + data/hmp_templates/throat/SRS144378-sim.yml | 20 + data/hmp_templates/vagina/SRS097885-sim.tsv | 27 + data/hmp_templates/vagina/SRS097885-sim.yml | 20 + data/hmp_templates/vagina/SRS098585-sim.tsv | 108 + data/hmp_templates/vagina/SRS098585-sim.yml | 20 + data/hmp_templates/vagina/SRS142957-sim.tsv | 32 + data/hmp_templates/vagina/SRS142957-sim.yml | 20 + data/hmp_templates/vagina/SRS143776-sim.tsv | 42 + data/hmp_templates/vagina/SRS143776-sim.yml | 20 + data/hmp_templates/vagina/SRS146847-sim.tsv | 40 + data/hmp_templates/vagina/SRS146847-sim.yml | 20 + 40 files changed, 8178 insertions(+) create mode 100644 data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv create mode 100644 data/hmp_templates/buccal_mucosa/SRS013506-sim.yml create mode 100644 data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv create mode 100644 data/hmp_templates/buccal_mucosa/SRS019352-sim.yml create mode 100644 data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv create mode 100644 data/hmp_templates/buccal_mucosa/SRS050029-sim.yml create mode 100644 data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv create mode 100644 data/hmp_templates/buccal_mucosa/SRS054569-sim.yml create mode 100644 data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv create mode 100644 data/hmp_templates/buccal_mucosa/SRS056042-sim.yml create mode 100644 data/hmp_templates/gut/SRS1041031-sim.tsv create mode 100644 data/hmp_templates/gut/SRS1041031-sim.yml create mode 100644 data/hmp_templates/gut/SRS1041129-sim.tsv create mode 100644 data/hmp_templates/gut/SRS1041129-sim.yml create mode 100644 data/hmp_templates/gut/SRS104912-sim.tsv create mode 100644 data/hmp_templates/gut/SRS104912-sim.yml create mode 100644 data/hmp_templates/gut/SRS105082-sim.tsv create mode 100644 data/hmp_templates/gut/SRS105082-sim.yml create mode 100644 data/hmp_templates/gut/SRS147088-sim.tsv create mode 100644 data/hmp_templates/gut/SRS147088-sim.yml create mode 100644 data/hmp_templates/throat/SRS014475-sim.tsv create mode 100644 data/hmp_templates/throat/SRS014475-sim.yml create mode 100644 data/hmp_templates/throat/SRS014689-sim.tsv create mode 100644 data/hmp_templates/throat/SRS014689-sim.yml create mode 100644 data/hmp_templates/throat/SRS065335-sim.tsv create mode 100644 data/hmp_templates/throat/SRS065335-sim.yml create mode 100644 data/hmp_templates/throat/SRS143032-sim.tsv create mode 100644 data/hmp_templates/throat/SRS143032-sim.yml create mode 100644 data/hmp_templates/throat/SRS144378-sim.tsv create mode 100644 data/hmp_templates/throat/SRS144378-sim.yml create mode 100644 data/hmp_templates/vagina/SRS097885-sim.tsv create mode 100644 data/hmp_templates/vagina/SRS097885-sim.yml create mode 100644 data/hmp_templates/vagina/SRS098585-sim.tsv create mode 100644 data/hmp_templates/vagina/SRS098585-sim.yml create mode 100644 data/hmp_templates/vagina/SRS142957-sim.tsv create mode 100644 data/hmp_templates/vagina/SRS142957-sim.yml create mode 100644 data/hmp_templates/vagina/SRS143776-sim.tsv create mode 100644 data/hmp_templates/vagina/SRS143776-sim.yml create mode 100644 data/hmp_templates/vagina/SRS146847-sim.tsv create mode 100644 data/hmp_templates/vagina/SRS146847-sim.yml diff --git a/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv new file mode 100644 index 0000000..a930789 --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv @@ -0,0 +1,354 @@ +TaxonomyInput nb_genomes Reads +28037 1 1314603 +1313 1 505181 +735 1 426416 +729 1 246908 +726 1 223778 +1433513 1 215058 +712310 1 169487 +1303 1 157763 +727 1 138141 +2598453 1 125570 +1379 1 121449 +483 1 57209 +43675 1 32918 +1302 1 29915 +589873 1 27620 +113107 1 26690 +712633 1 26031 +1305 1 21915 +488 1 21836 +28131 1 20608 +487 1 19045 +1304 1 14434 +732 1 13518 +28132 1 11852 +84135 1 11484 +29391 1 11251 +1177574 1 10751 +486 1 9645 +95486 1 8493 +2133944 1 8340 +29466 1 8339 +739 1 7644 +34105 1 7619 +75985 1 7498 +28449 1 7427 +738 1 7349 +45634 1 7291 +189834 1 7076 +249188 1 6682 +85404 1 6502 +851 1 6439 +28135 1 6266 +2663009 1 6132 +489 1 6127 +1307 1 6040 +39777 1 5428 +2047 1 5108 +39778 1 4959 +495 1 4690 +485 1 4661 +837 1 4632 +714 1 4631 +2610896 1 4513 +78535 1 4423 +492670 1 4244 +731 1 4185 +2030797 1 4028 +747 1 3861 +2382163 1 3824 +1343 1 3785 +157687 1 3509 +199 1 3508 +817 1 3358 +76123 1 3352 +28129 1 3251 +2585118 1 3169 +715 1 3125 +644007 1 3047 +1432056 1 3006 +821 1 2915 +484 1 2831 +1156433 1 2774 +1338 1 2682 +1308 1 2642 +85402 1 2637 +12402 1 2594 +730 1 2573 +2679994 1 2483 +1328 1 2461 +1660 1 2380 +1796613 1 2373 +716 1 2238 +123824 1 1978 +28116 1 1938 +33040 1 1914 +28113 1 1903 +1156431 1 1863 +51049 1 1860 +718 1 1793 +1019 1 1735 +1351 1 1705 +28119 1 1684 +36874 1 1602 +1491 1 1596 +2576606 1 1556 +329854 1 1491 +1499308 1 1477 +1311 1 1476 +544580 1 1460 +720 1 1457 +1624 1 1451 +818 1 1379 +1335 1 1280 +1655 1 1202 +1906744 1 1200 +762 1 1196 +1566990 1 1184 +1314 1 1130 +28188 1 1128 +712710 1 1102 +34085 1 1096 +47678 1 1074 +1032623 1 1062 +728 1 1049 +754 1 1034 +47671 1 1000 +860 1 968 +712357 1 937 +187101 1 915 +1583098 1 912 +172042 1 894 +393921 1 862 +820 1 840 +1701814 1 817 +562 1 804 +2606626 1 799 +33033 1 780 +1839799 1 779 +357276 1 764 +996 1 751 +1496 1 747 +248315 1 710 +2528203 1 709 +1796646 1 705 +749 1 690 +1280 1 686 +246787 1 681 +253 1 676 +197575 1 653 +617123 1 653 +1852374 1 645 +1359 1 642 +1358 1 625 +655307 1 621 +856 1 614 +539 1 598 +1502 1 596 +114527 1 595 +712991 1 585 +1334 1 577 +371601 1 569 +2545799 1 569 +1585976 1 567 +28901 1 557 +1017 1 543 +823 1 540 +459526 1 528 +1642647 1 519 +51161 1 518 +1428 1 515 +327575 1 487 +1639 1 480 +584 1 479 +2708117 1 478 +43768 1 475 +76860 1 471 +1117645 1 469 +1642646 1 454 +587 1 452 +126385 1 452 +536441 1 449 +1492 1 448 +1825069 1 444 +1316596 1 434 +157691 1 432 +670 1 431 +1309 1 429 +254 1 428 +1396 1 423 +554406 1 421 +674 1 418 +2364787 1 416 +1433126 1 410 +446 1 403 +109328 1 402 +712368 1 386 +237258 1 386 +1701813 1 385 +36745 1 384 +79329 1 384 +28251 1 382 +1701837 1 376 +1352 1 372 +1598 1 372 +147802 1 371 +238 1 367 +1520 1 366 +204 1 365 +64104 1 363 +1656 1 363 +38293 1 362 +470 1 361 +1282 1 359 +157924 1 351 +2583851 1 349 +1257021 1 346 +2490633 1 345 +1562970 1 341 +1191459 1 341 +1813871 1 341 +45243 1 339 +756892 1 330 +889453 1 328 +210 1 325 +1241981 1 324 +1785996 1 324 +703 1 323 +157688 1 321 +28118 1 321 +859 1 320 +680 1 318 +157692 1 316 +2545455 1 314 +102684 1 313 +2681766 1 310 +1329 1 306 +1717717 1 305 +1400053 1 302 +2666100 1 301 +2516557 1 298 +120683 1 298 +1501 1 297 +672 1 293 +630 1 293 +861 1 291 +1307839 1 291 +2713414 1 290 +1346 1 290 +661488 1 289 +1218801 1 289 +2487065 1 283 +2259595 1 280 +1340 1 278 +2487072 1 278 +1397 1 278 +400065 1 278 +1349 1 278 +1290 1 278 +1345 1 277 +1561 1 276 +649196 1 276 +1238 1 275 +44008 1 272 +1448274 1 272 +2686361 1 272 +1150389 1 272 +1014 1 270 +82348 1 270 +1905730 1 269 +371142 1 268 +712538 1 265 +320787 1 265 +2751 1 265 +1852377 1 264 +1316593 1 262 +437897 1 261 +239935 1 261 +250 1 258 +1538644 1 253 +986 1 253 +172045 1 253 +33945 1 251 +182773 1 250 +1505 1 250 +96345 1 247 +588 1 246 +1701842 1 246 +2487074 1 245 +2494373 1 245 +51669 1 245 +93973 1 245 +48296 1 244 +2081703 1 243 +1254 1 242 +1288 1 241 +315405 1 241 +1404 1 239 +2489595 1 239 +1405 1 239 +511435 1 239 +1010 1 239 +1286 1 238 +2706887 1 237 +1542 1 236 +1493872 1 236 +1888195 1 235 +1936081 1 234 +651561 1 233 +689 1 233 +504 1 233 +996801 1 232 +1411621 1 231 +34062 1 230 +2175091 1 230 +1168034 1 230 +333965 1 229 +162 1 228 +394958 1 228 +1296 1 228 +585 1 228 +2093856 1 228 +1283 1 227 +1903686 1 226 +2698458 1 226 +1406 1 225 +266749 1 225 +363852 1 222 +188932 1 222 +2183896 1 222 +246 1 221 +663 1 221 +1848904 1 221 +46867 1 220 +52773 1 220 +1354 1 220 +2756 1 219 +1747 1 218 +33959 1 218 +2496028 1 218 +713030 1 217 +1790137 1 217 +480 1 216 +2748 1 215 +2173853 1 214 +28110 1 214 +2547394 1 214 +584609 1 213 +104268 1 211 +106654 1 211 +1727163 1 210 +1604 1 209 +2021971 1 208 +1513 1 207 +1855823 1 207 +313603 1 207 +2571750 1 205 +702745 1 205 +1421 1 204 +1608047 1 204 +29341 1 204 +1178516 1 203 +1705617 1 202 +79604 1 201 diff --git a/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml b/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml new file mode 100644 index 0000000..9950320 --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS013506-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS013506-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 1 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv new file mode 100644 index 0000000..a0d82db --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv @@ -0,0 +1,120 @@ +TaxonomyInput nb_genomes Reads +729 1 219672 +1303 1 145134 +28037 1 107476 +726 1 55044 +712310 1 37632 +29466 1 35187 +1305 1 33435 +1313 1 31497 +727 1 27752 +1433513 1 19730 +1302 1 18874 +1019 1 16073 +851 1 15918 +544580 1 15015 +1655 1 13785 +488 1 13137 +1379 1 12100 +732 1 12017 +47671 1 11703 +1017 1 10214 +2598453 1 9379 +712357 1 7062 +113107 1 7054 +2718 1 6690 +172042 1 6224 +495 1 5757 +1750 1 4495 +29391 1 4237 +1656 1 4005 +2708117 1 3863 +249188 1 3805 +2663009 1 3586 +539 1 3311 +2047 1 3142 +487 1 3088 +483 1 3085 +1583098 1 3044 +739 1 2985 +712633 1 2950 +714 1 2883 +28449 1 2777 +39778 1 2706 +43675 1 2496 +1658672 1 2120 +28132 1 2079 +45634 1 1842 +28131 1 1642 +821 1 1305 +357276 1 1283 +84135 1 1228 +712710 1 1207 +1660 1 1197 +735 1 1197 +1316596 1 1178 +486 1 1121 +1304 1 1107 +837 1 1084 +485 1 1038 +1853278 1 971 +484 1 970 +39777 1 961 +1747 1 918 +28116 1 904 +2545799 1 752 +1705617 1 695 +248315 1 691 +817 1 688 +327575 1 632 +2528037 1 608 +1316593 1 606 +655307 1 588 +489 1 584 +326522 1 560 +1328 1 554 +78535 1 525 +1307 1 507 +712122 1 502 +43768 1 478 +157687 1 461 +820 1 448 +492 1 442 +2133944 1 440 +2666100 1 440 +860 1 439 +747 1 436 +2610896 1 413 +47678 1 409 +1343 1 404 +1338 1 403 +1853276 1 399 +2382163 1 385 +326523 1 379 +28129 1 375 +371601 1 372 +1156431 1 350 +157692 1 335 +712991 1 335 +109328 1 330 +1311 1 313 +1177574 1 300 +617123 1 294 +1906744 1 293 +818 1 293 +1308 1 272 +1211417 1 260 +76123 1 240 +1156433 1 235 +493 1 229 +28135 1 227 +33040 1 223 +1334 1 220 +36874 1 219 +199 1 218 +197575 1 213 +2528203 1 212 +738 1 209 +246787 1 206 +1282 1 202 +589873 1 201 diff --git a/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml b/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml new file mode 100644 index 0000000..384ae3a --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS019352-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS019352-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 4 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv new file mode 100644 index 0000000..8fdccf0 --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv @@ -0,0 +1,224 @@ +TaxonomyInput nb_genomes Reads +729 1 4944831 +43675 1 382534 +28037 1 364409 +29466 1 284871 +726 1 223338 +727 1 183566 +1660 1 181707 +712633 1 135251 +1303 1 109737 +1313 1 98596 +249188 1 93822 +2663009 1 85108 +732 1 77349 +2610896 1 64167 +1433513 1 58217 +113107 1 57253 +1379 1 55130 +78535 1 49163 +2598453 1 44979 +488 1 38852 +2382163 1 36914 +739 1 36683 +712310 1 35328 +28449 1 31680 +39778 1 26357 +1156431 1 24006 +1156433 1 23065 +495 1 19666 +1852377 1 16418 +714 1 15952 +47671 1 15398 +84135 1 14120 +747 1 14055 +28132 1 13435 +1302 1 12108 +1304 1 11776 +1305 1 9802 +617123 1 9303 +29391 1 8933 +484 1 8455 +1906744 1 8271 +589873 1 7040 +837 1 6718 +483 1 6108 +487 1 5978 +45634 1 5596 +860 1 5494 +1017 1 5463 +39777 1 5197 +851 1 5081 +172042 1 4789 +735 1 3986 +712710 1 3907 +248315 1 3686 +544580 1 3423 +1307 1 3292 +1655 1 3155 +12402 1 3119 +712357 1 3019 +539 1 2885 +95486 1 2739 +2047 1 2636 +52773 1 2476 +28131 1 2408 +486 1 2389 +1328 1 2310 +1308 1 2306 +731 1 2265 +2490633 1 2085 +715 1 1978 +1583098 1 1967 +1338 1 1801 +36874 1 1732 +485 1 1728 +738 1 1718 +75985 1 1686 +1335 1 1676 +28129 1 1663 +1656 1 1657 +1177574 1 1656 +762 1 1613 +393921 1 1445 +2545799 1 1445 +1343 1 1417 +28188 1 1299 +327575 1 1242 +730 1 1219 +754 1 1203 +1019 1 1173 +28135 1 1140 +716 1 1094 +85404 1 990 +2718 1 958 +2133944 1 954 +1311 1 882 +584 1 879 +1314 1 832 +114527 1 816 +489 1 814 +1747 1 806 +1351 1 798 +2030797 1 793 +33033 1 753 +34085 1 746 +1334 1 735 +123824 1 725 +718 1 715 +1432056 1 688 +199 1 682 +189834 1 678 +492670 1 676 +2606626 1 676 +2576606 1 638 +51161 1 635 +1839799 1 626 +817 1 624 +197575 1 620 +82348 1 596 +85402 1 595 +749 1 593 +728 1 584 +33040 1 553 +2364787 1 550 +51049 1 549 +1888195 1 529 +1309 1 524 +28901 1 520 +47678 1 493 +2585119 1 486 +1310 1 486 +821 1 484 +76123 1 482 +1496 1 480 +1491 1 464 +1585976 1 448 +204 1 444 +818 1 438 +2173853 1 436 +1358 1 431 +2652177 1 430 +1796646 1 424 +126385 1 421 +28113 1 409 +28119 1 408 +2679994 1 404 +1340 1 393 +1642646 1 392 +2021971 1 388 +315405 1 388 +1624 1 385 +2585118 1 385 +1825069 1 380 +29394 1 380 +253 1 379 +1346 1 377 +1349 1 368 +1701813 1 361 +2528037 1 359 +1316596 1 348 +339 1 344 +79329 1 340 +562 1 339 +1639 1 334 +1750 1 327 +2571750 1 321 +1448274 1 320 +1280 1 317 +190893 1 312 +157687 1 309 +655307 1 307 +1642647 1 305 +1032623 1 295 +47878 1 294 +670 1 292 +1348 1 290 +149016 1 287 +1599 1 287 +2708117 1 284 +720 1 282 +1352 1 281 +820 1 277 +44008 1 276 +1903686 1 271 +1329 1 267 +29549 1 266 +157924 1 263 +1326 1 263 +326522 1 262 +1260 1 259 +587 1 257 +326523 1 257 +680 1 257 +237258 1 254 +400065 1 253 +640030 1 252 +536441 1 252 +28116 1 251 +43768 1 251 +1814128 1 250 +303 1 249 +1359 1 246 +45243 1 242 +712122 1 242 +585 1 241 +2291597 1 240 +56 1 240 +40324 1 236 +246787 1 235 +201 1 232 +573 1 231 +250 1 230 +33945 1 229 +1428 1 228 +28118 1 227 +1912897 1 225 +1502 1 224 +554406 1 221 +1316593 1 218 +2756 1 218 +823 1 217 +470 1 213 +38293 1 205 +254 1 201 diff --git a/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml b/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml new file mode 100644 index 0000000..437e63d --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS050029-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS050029-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 5 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv new file mode 100644 index 0000000..2c84ba3 --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv @@ -0,0 +1,305 @@ +TaxonomyInput nb_genomes Reads +28037 1 2577789 +1313 1 1010632 +729 1 634149 +1379 1 494603 +1433513 1 417463 +2598453 1 273577 +1303 1 246869 +726 1 171165 +28449 1 165802 +735 1 162050 +712633 1 99967 +727 1 84234 +2663009 1 78779 +113107 1 57658 +712310 1 56495 +484 1 53395 +1305 1 53297 +39778 1 41033 +84135 1 40530 +29466 1 39900 +29391 1 29498 +172042 1 27333 +1660 1 26587 +47671 1 25874 +1302 1 24406 +1019 1 22832 +1304 1 19514 +495 1 17691 +45634 1 17377 +487 1 17143 +488 1 17081 +732 1 16597 +78535 1 14786 +712357 1 14108 +43675 1 12409 +28131 1 12196 +249188 1 11077 +739 1 11044 +851 1 10700 +1307 1 9594 +28132 1 8795 +2382163 1 8560 +860 1 8484 +33033 1 8357 +2047 1 7872 +39777 1 7381 +2545799 1 6273 +1343 1 6156 +2610896 1 5782 +1583098 1 5717 +1156431 1 5629 +327575 1 5628 +486 1 5395 +483 1 5350 +617123 1 5296 +1338 1 5259 +1328 1 5177 +837 1 5008 +43768 1 4861 +1309 1 4459 +1017 1 4359 +1156433 1 4157 +2708117 1 4140 +12402 1 4054 +712991 1 4035 +75985 1 3689 +589873 1 3676 +738 1 3582 +714 1 3450 +189834 1 3449 +1308 1 3351 +199 1 3348 +85404 1 3286 +485 1 3203 +712710 1 3200 +1750 1 3146 +28135 1 3094 +539 1 2680 +747 1 2634 +109328 1 2529 +157924 1 2463 +248315 1 2387 +2718 1 2358 +357276 1 2336 +1566990 1 2332 +1316596 1 2171 +1656 1 2161 +1334 1 2151 +1311 1 2068 +1825069 1 2067 +544580 1 2032 +489 1 1943 +1177574 1 1928 +1314 1 1926 +1655 1 1919 +644007 1 1893 +1585976 1 1863 +1345 1 1796 +2030797 1 1779 +33040 1 1762 +715 1 1741 +1335 1 1672 +29394 1 1656 +1432056 1 1579 +204 1 1520 +34085 1 1508 +730 1 1508 +85402 1 1463 +120683 1 1424 +28116 1 1373 +1852377 1 1284 +2679994 1 1265 +718 1 1255 +157688 1 1228 +655307 1 1199 +712122 1 1195 +820 1 1159 +716 1 1151 +1839799 1 1122 +51049 1 1084 +2572087 1 1070 +1853278 1 1051 +1539298 1 1040 +123824 1 1028 +52773 1 1011 +76860 1 1002 +1280 1 972 +28129 1 964 +157687 1 957 +731 1 955 +1496 1 941 +1491 1 924 +371601 1 877 +554406 1 860 +1701811 1 860 +36874 1 844 +817 1 832 +76123 1 819 +1316593 1 815 +720 1 805 +393921 1 803 +1906744 1 792 +1705617 1 752 +824 1 735 +1032623 1 734 +2572088 1 721 +95486 1 713 +1340 1 711 +1639 1 710 +1351 1 710 +818 1 709 +754 1 686 +47678 1 662 +504 1 656 +1701814 1 629 +1358 1 620 +762 1 620 +821 1 615 +326522 1 611 +2666100 1 590 +237258 1 585 +492 1 574 +33959 1 553 +713059 1 542 +157692 1 540 +28118 1 535 +2528037 1 531 +1624 1 529 +253 1 527 +82348 1 526 +1352 1 513 +1747 1 510 +1853276 1 502 +2133944 1 501 +1888195 1 500 +1349 1 499 +749 1 497 +1448274 1 491 +147802 1 488 +1211417 1 468 +254 1 461 +1598 1 455 +536441 1 453 +712411 1 450 +1396 1 448 +315405 1 446 +197575 1 441 +728 1 438 +1329 1 438 +250 1 434 +238 1 432 +996 1 426 +2490633 1 422 +2583851 1 422 +326523 1 415 +1852374 1 407 +1884263 1 407 +246787 1 401 +1359 1 399 +44008 1 396 +149016 1 396 +1310 1 394 +150055 1 392 +2528203 1 391 +859 1 386 +64104 1 385 +1428 1 381 +1785995 1 381 +1701842 1 375 +1701813 1 375 +562 1 375 +2585118 1 371 +2751 1 367 +2713414 1 348 +1117645 1 346 +51161 1 346 +1465 1 343 +1502 1 341 +197 1 337 +584 1 336 +1282 1 335 +1855823 1 333 +493 1 327 +1492 1 323 +640030 1 319 +1448273 1 314 +2487072 1 309 +28091 1 298 +157691 1 298 +1493872 1 294 +1290 1 291 +1286 1 289 +28113 1 288 +1701837 1 287 +2606626 1 286 +28188 1 285 +28119 1 284 +2021971 1 283 +28901 1 278 +247 1 277 +1326 1 276 +1701828 1 275 +856 1 274 +45243 1 273 +1903686 1 271 +1348 1 267 +2585119 1 267 +28251 1 264 +38293 1 263 +421525 1 262 +1288 1 262 +283734 1 261 +34105 1 257 +1296 1 257 +2015076 1 255 +1721091 1 255 +1520 1 252 +28035 1 251 +2487065 1 249 +46867 1 248 +1404 1 248 +2364787 1 246 +1346 1 246 +1421 1 241 +492670 1 240 +2686361 1 239 +2594269 1 236 +1283 1 236 +33945 1 233 +2571750 1 232 +2576606 1 230 +1796646 1 230 +47715 1 230 +1911587 1 229 +114527 1 229 +1814128 1 229 +585 1 229 +1685010 1 229 +37482 1 228 +1124835 1 228 +182773 1 228 +1354 1 225 +1785996 1 225 +246 1 223 +1701824 1 222 +470 1 219 +400065 1 219 +1540872 1 219 +861 1 219 +1561 1 218 +51669 1 217 +1796613 1 217 +2494373 1 216 +558152 1 215 +266749 1 215 +29388 1 214 +53346 1 213 +1260 1 212 +1397 1 208 +2748 1 207 +587 1 206 +1324352 1 206 +2562451 1 202 +1241979 1 200 diff --git a/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml b/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml new file mode 100644 index 0000000..0a85edb --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS054569-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS054569-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 6 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv new file mode 100644 index 0000000..a6239be --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv @@ -0,0 +1,108 @@ +TaxonomyInput nb_genomes Reads +28037 1 83452 +729 1 82441 +1303 1 40384 +43675 1 40208 +29466 1 23250 +1302 1 20266 +199 1 19898 +1313 1 18263 +28449 1 18151 +544580 1 16268 +1655 1 13912 +851 1 12758 +1433513 1 11806 +39778 1 11208 +1305 1 9800 +1379 1 8460 +28132 1 8273 +2663009 1 8186 +43768 1 7524 +712633 1 7320 +1660 1 6656 +1017 1 6598 +2598453 1 5860 +739 1 5761 +2047 1 5514 +1656 1 4890 +113107 1 4533 +726 1 4460 +29391 1 4376 +732 1 3826 +1019 1 3814 +2610896 1 3811 +1304 1 3746 +47671 1 3543 +727 1 3417 +1177574 1 3408 +39777 1 3214 +45634 1 2884 +488 1 2635 +2708117 1 2603 +84135 1 2363 +327575 1 2230 +2545799 1 2169 +172042 1 2165 +157687 1 2081 +28131 1 1603 +860 1 1492 +249188 1 1421 +1747 1 1413 +1583098 1 1392 +712310 1 1373 +712357 1 1294 +617123 1 1212 +484 1 1155 +539 1 1005 +554406 1 1002 +821 1 987 +78535 1 983 +495 1 948 +712122 1 839 +204 1 832 +208479 1 782 +1343 1 772 +28116 1 733 +824 1 676 +853 1 644 +735 1 632 +1316596 1 610 +818 1 607 +487 1 592 +714 1 561 +2382163 1 550 +1338 1 539 +109328 1 449 +28129 1 448 +1328 1 443 +28135 1 442 +52773 1 435 +1852377 1 431 +1156433 1 428 +114527 1 427 +589873 1 422 +157688 1 393 +1156431 1 390 +2564099 1 372 +47678 1 356 +157692 1 356 +1316593 1 333 +1335 1 332 +248315 1 330 +712710 1 314 +1705617 1 308 +1834205 1 288 +111015 1 281 +1839799 1 271 +52774 1 260 +1308 1 258 +1307 1 257 +486 1 248 +712538 1 244 +747 1 238 +52771 1 236 +76123 1 232 +1851395 1 225 +2718 1 222 +2081702 1 222 +713030 1 207 diff --git a/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml b/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml new file mode 100644 index 0000000..09e3eda --- /dev/null +++ b/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS056042-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS056042-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 7 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS1041031-sim.tsv b/data/hmp_templates/gut/SRS1041031-sim.tsv new file mode 100644 index 0000000..2c3be07 --- /dev/null +++ b/data/hmp_templates/gut/SRS1041031-sim.tsv @@ -0,0 +1,990 @@ +TaxonomyInput nb_genomes Reads +47678 1 1160133 +246787 1 972403 +820 1 838086 +821 1 653905 +239935 1 525437 +853 1 258585 +2528203 1 245987 +28116 1 191034 +818 1 176450 +2585118 1 110448 +371601 1 101518 +2479767 1 94825 +2109691 1 91855 +301301 1 89628 +2093857 1 88027 +1680 1 84905 +823 1 78209 +1550024 1 75858 +817 1 75500 +2109688 1 75210 +562 1 74798 +649756 1 64652 +329854 1 53588 +357276 1 50067 +244366 1 43277 +44742 1 39343 +28901 1 38324 +292800 1 38036 +74426 1 37123 +28118 1 35821 +33025 1 33557 +1160721 1 31999 +39488 1 27698 +1496 1 24483 +1796613 1 21392 +1681 1 20764 +2109687 1 19110 +2610895 1 15663 +2585119 1 14759 +28026 1 14549 +2364787 1 14435 +2109690 1 12135 +2086584 1 12046 +1297617 1 11405 +2584943 1 11289 +33038 1 11131 +294 1 11051 +28113 1 8728 +84112 1 8528 +208479 1 8091 +2093856 1 7721 +2086585 1 6651 +29347 1 6576 +626937 1 5714 +1335613 1 5712 +1834198 1 5252 +1871021 1 4771 +33035 1 4526 +28131 1 4433 +2025876 1 4292 +216816 1 4254 +1686 1 4149 +1796616 1 3568 +2093742 1 3513 +1834196 1 3341 +1042156 1 3256 +1352 1 3247 +1491 1 3171 +1912897 1 2928 +53442 1 2784 +2109692 1 2750 +28132 1 2712 +2662363 1 2631 +1736 1 2587 +47466 1 2579 +1685 1 2450 +253239 1 2426 +2507162 1 2396 +2564099 1 2382 +2041044 1 2267 +1307 1 2084 +1572656 1 2030 +1834207 1 2024 +40324 1 1911 +1308 1 1901 +84030 1 1869 +56 1 1830 +28129 1 1810 +28135 1 1774 +54571 1 1772 +303 1 1655 +85698 1 1654 +197 1 1622 +2109685 1 1616 +28119 1 1613 +1502 1 1528 +2606626 1 1369 +2003188 1 1360 +837 1 1347 +2507160 1 1345 +185007 1 1342 +316 1 1336 +1492 1 1334 +84110 1 1330 +712991 1 1327 +192 1 1290 +1177574 1 1281 +384 1 1278 +1076 1 1273 +587753 1 1202 +305 1 1197 +1852373 1 1181 +34073 1 1134 +2576606 1 1129 +2490939 1 1101 +2144175 1 1071 +644 1 1069 +907 1 1063 +851 1 1053 +1229621 1 1012 +1197717 1 1005 +195 1 1003 +287 1 950 +2696063 1 944 +1702221 1 898 +300 1 892 +615 1 890 +2133944 1 879 +712710 1 878 +358 1 874 +1501 1 866 +29343 1 854 +185008 1 843 +1642647 1 835 +76123 1 830 +106590 1 814 +29549 1 812 +79329 1 811 +1542 1 798 +184914 1 787 +217203 1 774 +2582419 1 774 +712411 1 771 +1796646 1 766 +1304 1 760 +1513 1 760 +617123 1 759 +28095 1 758 +2161821 1 746 +856 1 745 +36745 1 738 +84032 1 734 +622 1 731 +901 1 729 +502558 1 726 +573 1 717 +1909395 1 715 +1520 1 712 +1561 1 685 +79604 1 672 +94132 1 671 +1428 1 663 +2094242 1 663 +1505 1 656 +103733 1 650 +47763 1 644 +1216932 1 644 +1534 1 643 +46867 1 642 +33033 1 636 +169679 1 632 +654 1 631 +1888 1 627 +876 1 623 +1679721 1 606 +1280 1 606 +34 1 601 +135487 1 600 +2681984 1 597 +1716143 1 597 +35 1 594 +2583823 1 594 +82996 1 593 +82633 1 583 +2656914 1 582 +2528023 1 580 +44251 1 577 +39950 1 569 +1270 1 568 +1396 1 562 +1969 1 554 +103855 1 553 +89152 1 551 +121719 1 548 +2663009 1 547 +1063 1 540 +37326 1 539 +48 1 537 +187327 1 532 +1642646 1 532 +1806508 1 531 +29341 1 529 +1903704 1 524 +317 1 523 +2674991 1 522 +562959 1 518 +69 1 515 +1531429 1 512 +2320868 1 509 +182773 1 509 +314722 1 508 +406548 1 507 +37329 1 502 +1683 1 501 +2583377 1 501 +1548 1 498 +1660 1 496 +1636152 1 495 +317577 1 494 +394958 1 492 +37332 1 492 +1653480 1 490 +114 1 490 +380021 1 489 +641491 1 488 +197614 1 487 +713030 1 486 +1884263 1 485 +87883 1 476 +382 1 474 +67304 1 473 +2599308 1 472 +210 1 469 +755731 1 468 +649831 1 466 +51515 1 465 +1841249 1 464 +860235 1 463 +2704462 1 462 +1398 1 461 +1712675 1 461 +1424294 1 460 +888845 1 460 +1834205 1 459 +2653857 1 458 +2018025 1 457 +32002 1 457 +1517 1 453 +2082193 1 452 +28025 1 452 +1351 1 452 +54006 1 450 +1117645 1 450 +861299 1 450 +408 1 449 +13690 1 447 +1586287 1 445 +496014 1 443 +2597769 1 443 +1299 1 442 +1275 1 440 +28448 1 439 +1768108 1 438 +83656 1 438 +1871025 1 435 +2487118 1 434 +1639 1 433 +1488 1 433 +253 1 432 +1465 1 431 +980427 1 430 +1761012 1 429 +2014 1 427 +35554 1 427 +95486 1 426 +433296 1 426 +80869 1 426 +2579971 1 426 +1406 1 425 +712538 1 424 +2321403 1 423 +1744 1 422 +292 1 422 +588932 1 421 +2697030 1 420 +114527 1 420 +645 1 419 +61647 1 417 +1791 1 415 +1411117 1 413 +1536773 1 413 +1393 1 412 +548 1 412 +96344 1 411 +1504 1 411 +43 1 410 +2483110 1 409 +52 1 408 +169760 1 407 +2576307 1 407 +1219491 1 407 +2202148 1 405 +1358 1 405 +404589 1 405 +2126346 1 404 +223967 1 404 +1725 1 404 +1694 1 402 +1624 1 401 +321983 1 399 +1423 1 399 +2068654 1 398 +49319 1 396 +1912 1 395 +1356852 1 395 +347534 1 394 +55518 1 394 +2694929 1 394 +528244 1 394 +946333 1 393 +337 1 392 +1134687 1 392 +116090 1 391 +2448483 1 391 +2507159 1 389 +2081703 1 388 +1297742 1 388 +1871034 1 388 +2571749 1 387 +1689 1 387 +2607656 1 386 +443143 1 386 +57704 1 385 +861 1 384 +179636 1 384 +2488560 1 383 +847 1 383 +1838286 1 383 +794903 1 383 +1710 1 383 +119219 1 381 +1836 1 380 +1282 1 380 +2303331 1 379 +1166950 1 379 +34105 1 379 +1658665 1 379 +1108595 1 378 +930166 1 378 +94 1 377 +2292766 1 377 +55583 1 376 +1823 1 374 +1498 1 373 +296842 1 373 +298654 1 372 +1716543 1 371 +1661 1 370 +879274 1 369 +217204 1 368 +553814 1 368 +927083 1 366 +2681766 1 366 +1536772 1 366 +53408 1 366 +2293838 1 365 +2681549 1 364 +1972068 1 363 +2220096 1 363 +1804986 1 363 +1433126 1 363 +2678688 1 362 +2507161 1 362 +35806 1 362 +1555112 1 359 +947516 1 358 +1334193 1 358 +29430 1 357 +2546351 1 357 +36809 1 356 +2014887 1 356 +446 1 356 +84531 1 355 +346 1 355 +2697032 1 355 +332101 1 354 +68570 1 354 +285 1 354 +1855912 1 353 +1421 1 353 +1703 1 352 +208962 1 350 +339 1 347 +1494 1 346 +571 1 346 +946334 1 346 +353 1 346 +550 1 345 +1889813 1 344 +2711231 1 344 +2704463 1 342 +2024580 1 342 +65741 1 341 +2509456 1 341 +1590 1 341 +926 1 340 +1182571 1 340 +1851544 1 340 +582 1 340 +78587 1 339 +859 1 339 +1462996 1 338 +2697043 1 338 +1915 1 337 +2010972 1 337 +1525 1 336 +1397 1 336 +128780 1 336 +29552 1 336 +1391654 1 335 +106592 1 332 +1575 1 331 +68895 1 331 +2604421 1 329 +2602070 1 328 +470934 1 328 +1408 1 328 +1126833 1 328 +1314 1 328 +1404 1 327 +542 1 327 +35760 1 326 +1311 1 326 +1670830 1 325 +2136401 1 325 +536 1 325 +1305 1 324 +1824 1 323 +110937 1 323 +1519 1 323 +709810 1 323 +1677857 1 321 +1896961 1 321 +2497861 1 321 +47671 1 320 +996 1 319 +2045452 1 317 +79263 1 317 +2072590 1 316 +1749 1 315 +1598 1 315 +1307763 1 315 +2548456 1 314 +1047172 1 314 +1842727 1 314 +623 1 313 +747 1 313 +189381 1 313 +945844 1 313 +2126319 1 313 +864828 1 313 +274 1 312 +1613 1 312 +28450 1 311 +652764 1 311 +55508 1 311 +1401 1 311 +380 1 311 +237610 1 311 +584 1 311 +2315862 1 310 +2109913 1 309 +2502781 1 309 +871742 1 308 +1404864 1 308 +1678028 1 307 +2319843 1 307 +477641 1 307 +1644131 1 307 +1484118 1 307 +321984 1 306 +426117 1 305 +1823759 1 305 +2560053 1 304 +158822 1 303 +1967 1 302 +1411621 1 302 +393921 1 301 +1909732 1 301 +83655 1 301 +1927 1 300 +2061 1 300 +463025 1 300 +1632864 1 299 +2653200 1 299 +455432 1 299 +1562970 1 299 +66425 1 299 +2583243 1 299 +408015 1 298 +1914461 1 298 +1758194 1 297 +57975 1 297 +1608957 1 297 +414771 1 297 +2233999 1 297 +1208324 1 296 +1079 1 296 +2074 1 295 +68270 1 294 +66871 1 294 +1779382 1 293 +1748 1 293 +2173034 1 293 +2026787 1 292 +311180 1 292 +68214 1 292 +1571 1 291 +661488 1 291 +69666 1 290 +47880 1 290 +1986146 1 289 +94624 1 289 +37928 1 289 +947919 1 289 +358220 1 289 +37927 1 289 +745310 1 288 +324057 1 288 +2585135 1 288 +1901 1 287 +1616117 1 287 +40318 1 286 +47885 1 286 +649196 1 286 +2116657 1 286 +1168034 1 286 +1077946 1 285 +2282738 1 285 +564 1 284 +93220 1 284 +259 1 283 +59814 1 283 +2560010 1 283 +1940610 1 282 +146919 1 282 +1907 1 281 +2502779 1 281 +75105 1 280 +1260 1 280 +1679497 1 280 +39645 1 279 +2686304 1 279 +511 1 279 +964 1 279 +1885 1 279 +1582259 1 278 +2283195 1 278 +2528008 1 278 +1497 1 278 +674703 1 277 +1851505 1 277 +120683 1 277 +76759 1 277 +147802 1 277 +2652443 1 277 +92947 1 276 +795665 1 275 +1250539 1 275 +396 1 275 +2712223 1 274 +2498135 1 274 +2602015 1 274 +157687 1 274 +580165 1 274 +38289 1 274 +2528009 1 274 +61435 1 273 +2599640 1 273 +436515 1 272 +1788 1 272 +1416803 1 272 +1536774 1 272 +218208 1 272 +1842534 1 271 +323450 1 270 +1141883 1 269 +76853 1 269 +169430 1 269 +2528010 1 269 +29449 1 268 +556288 1 268 +2059672 1 268 +1830 1 268 +470 1 268 +756892 1 268 +1603606 1 268 +1810504 1 267 +1940 1 267 +2597701 1 267 +254 1 267 +656366 1 267 +76731 1 267 +1658672 1 266 +1616788 1 266 +1765049 1 266 +1736675 1 266 +449461 1 266 +2496836 1 265 +33959 1 265 +1077935 1 264 +571177 1 264 +2027405 1 264 +443144 1 264 +38300 1 264 +68209 1 264 +1387353 1 264 +51669 1 263 +2377 1 263 +2005884 1 263 +1257021 1 263 +1870820 1 263 +511062 1 262 +123899 1 262 +33050 1 262 +1799 1 262 +1349819 1 261 +1806905 1 261 +748280 1 261 +2579935 1 261 +2483799 1 261 +2009329 1 260 +538381 1 260 +345632 1 260 +1612173 1 259 +1658671 1 259 +1385663 1 259 +2789776 1 259 +471827 1 259 +155892 1 259 +670 1 259 +549 1 258 +1335 1 258 +375 1 258 +652 1 258 +2055 1 257 +2006 1 257 +158847 1 257 +1850093 1 256 +1583098 1 255 +2571747 1 255 +1893 1 254 +1792 1 254 +279826 1 254 +196162 1 253 +463040 1 253 +1750 1 253 +67267 1 252 +530584 1 252 +85643 1 252 +1714373 1 252 +1639348 1 251 +1679444 1 251 +2584940 1 251 +1772 1 251 +29382 1 251 +1663591 1 251 +2066070 1 251 +1416806 1 251 +1173025 1 251 +147645 1 251 +288000 1 250 +2488809 1 250 +661481 1 250 +481743 1 250 +1484116 1 249 +47883 1 249 +2161816 1 249 +1870819 1 249 +400092 1 248 +43675 1 248 +536441 1 248 +1940612 1 248 +151783 1 247 +47850 1 247 +1463165 1 247 +31958 1 247 +193462 1 247 +236753 1 247 +244566 1 247 +1511 1 246 +68175 1 246 +682998 1 246 +1452 1 246 +2594003 1 246 +1390395 1 246 +2610896 1 245 +298653 1 245 +36818 1 245 +347 1 245 +51101 1 244 +84022 1 244 +2109914 1 244 +2233851 1 244 +2527982 1 244 +686597 1 244 +1620215 1 244 +1579979 1 243 +1930071 1 243 +41899 1 243 +208439 1 242 +83263 1 242 +257708 1 242 +1379270 1 242 +1795355 1 242 +392734 1 242 +1792508 1 242 +1589 1 242 +28035 1 242 +41977 1 242 +1331682 1 241 +47917 1 241 +28197 1 241 +36874 1 241 +1906741 1 240 +29542 1 240 +304207 1 239 +2598579 1 239 +1391653 1 239 +76758 1 239 +366602 1 239 +1867715 1 238 +60890 1 238 +1889 1 238 +1868589 1 238 +1592106 1 238 +556499 1 238 +28251 1 237 +2528018 1 237 +758793 1 237 +54291 1 237 +1548548 1 237 +2712222 1 237 +1850374 1 237 +29391 1 237 +2045208 1 237 +1850238 1 236 +266 1 236 +2703788 1 236 +1082851 1 236 +1552759 1 236 +1420917 1 236 +101571 1 236 +437897 1 236 +1178516 1 235 +2572036 1 235 +1849032 1 235 +82541 1 235 +1405 1 235 +1842539 1 235 +445710 1 234 +33945 1 234 +399497 1 234 +664962 1 234 +1290 1 234 +162496 1 234 +445576 1 234 +488447 1 234 +1882682 1 233 +2067960 1 233 +1134435 1 233 +1771 1 233 +274537 1 233 +2584944 1 233 +1977088 1 233 +1908 1 233 +162 1 233 +67780 1 232 +1905 1 232 +158836 1 232 +2478662 1 232 +2603206 1 232 +435897 1 232 +221822 1 232 +1756988 1 231 +216929 1 231 +42197 1 231 +2662361 1 230 +34004 1 230 +1248727 1 230 +2528035 1 230 +1332264 1 230 +564064 1 230 +1903186 1 230 +74829 1 230 +39152 1 230 +1191459 1 230 +1469502 1 229 +312306 1 228 +672 1 228 +1610493 1 228 +2060312 1 228 +33970 1 227 +1717717 1 227 +109328 1 227 +1541959 1 227 +2057025 1 227 +1355477 1 227 +2500532 1 227 +388950 1 226 +1690815 1 226 +748247 1 226 +2509675 1 226 +158500 1 226 +459526 1 225 +160799 1 225 +1176587 1 225 +2202826 1 225 +1389713 1 225 +2054 1 225 +571913 1 225 +392593 1 225 +35814 1 224 +180282 1 224 +2202254 1 223 +33940 1 223 +727 1 223 +1539298 1 223 +1549858 1 223 +362257 1 223 +1670831 1 222 +2213200 1 222 +2706887 1 222 +2202828 1 222 +1094 1 222 +1904616 1 222 +1309 1 222 +495 1 221 +2508168 1 221 +39695 1 221 +285570 1 221 +47421 1 221 +2561924 1 221 +33059 1 220 +33905 1 220 +1179673 1 220 +2665642 1 220 +2493672 1 220 +897 1 220 +134601 1 220 +2691571 1 219 +585455 1 219 +1932 1 219 +2057741 1 219 +33899 1 219 +2584122 1 219 +2171623 1 219 +296591 1 219 +262324 1 218 +68249 1 218 +1960 1 217 +2079536 1 217 +2490858 1 217 +2596828 1 217 +2304600 1 216 +450378 1 216 +1437453 1 216 +2211210 1 216 +168471 1 216 +959 1 216 +379347 1 216 +889453 1 216 +1061 1 216 +2293317 1 215 +698828 1 215 +2341117 1 215 +2675754 1 215 +540747 1 215 +2496837 1 215 +561061 1 215 +373 1 214 +329 1 214 +398053 1 214 +1560345 1 214 +319707 1 214 +61652 1 214 +1486262 1 214 +2174846 1 214 +1783515 1 214 +1340425 1 214 +178899 1 213 +1197460 1 213 +1778678 1 213 +479978 1 213 +2304594 1 212 +2045 1 212 +1697043 1 212 +1664069 1 212 +227605 1 212 +1858609 1 212 +69218 1 212 +2599293 1 211 +206506 1 211 +453783 1 211 +2591109 1 211 +2497860 1 211 +2184053 1 211 +2493669 1 210 +36813 1 210 +2704465 1 210 +2382161 1 210 +182710 1 210 +84135 1 210 +1426 1 210 +2707005 1 210 +28141 1 210 +2212991 1 209 +250 1 209 +146923 1 209 +2520506 1 209 +2599297 1 208 +1193713 1 208 +553510 1 208 +2562284 1 208 +335406 1 208 +1354 1 208 +66269 1 208 +238 1 207 +1176536 1 207 +1302 1 207 +2605945 1 207 +1482074 1 207 +477680 1 207 +2604832 1 207 +1357915 1 207 +1965282 1 207 +1912856 1 207 +2080419 1 207 +68203 1 207 +539 1 206 +162209 1 206 +484770 1 206 +2591145 1 206 +1690221 1 206 +2615203 1 205 +42253 1 205 +2033869 1 205 +442 1 205 +39960 1 205 +2672569 1 205 +1707785 1 204 +2651974 1 204 +2596949 1 204 +1617283 1 204 +216778 1 204 +2071627 1 203 +2571029 1 203 +482957 1 203 +53462 1 203 +2508882 1 203 +37919 1 203 +34085 1 203 +682798 1 203 +1916 1 203 +2057800 1 203 +158899 1 203 +1288 1 203 +364410 1 203 +2589074 1 202 +1404367 1 202 +45398 1 202 +279113 1 202 +414996 1 202 +1793 1 201 +1842536 1 201 +413882 1 201 +2686094 1 201 +2017484 1 201 +553 1 201 +2582905 1 201 +192812 1 201 +1276 1 201 +1842533 1 200 +1335048 1 200 +43767 1 200 +1659 1 200 +2259340 1 200 +189425 1 200 +1526658 1 200 diff --git a/data/hmp_templates/gut/SRS1041031-sim.yml b/data/hmp_templates/gut/SRS1041031-sim.yml new file mode 100644 index 0000000..07c8db3 --- /dev/null +++ b/data/hmp_templates/gut/SRS1041031-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS1041031-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS1041031-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 11 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS1041129-sim.tsv b/data/hmp_templates/gut/SRS1041129-sim.tsv new file mode 100644 index 0000000..eca3355 --- /dev/null +++ b/data/hmp_templates/gut/SRS1041129-sim.tsv @@ -0,0 +1,719 @@ +TaxonomyInput nb_genomes Reads +821 1 1575095 +820 1 741725 +817 1 607591 +818 1 551080 +853 1 434808 +28116 1 194573 +28118 1 166080 +823 1 146400 +2585119 1 128884 +239935 1 128802 +357276 1 127178 +371601 1 118565 +2528203 1 109742 +28901 1 87286 +2364787 1 85050 +2585118 1 72630 +301301 1 43970 +244366 1 42077 +1160721 1 39316 +1796613 1 39187 +2093857 1 30354 +1550024 1 29168 +292800 1 27242 +2093856 1 26388 +47678 1 20130 +2109688 1 20087 +1496 1 19090 +649756 1 17372 +329854 1 15130 +2479767 1 15108 +2086584 1 14664 +626937 1 14451 +2109691 1 13611 +33038 1 13402 +294 1 12606 +74426 1 12291 +2109687 1 11924 +2025876 1 11725 +2564099 1 8915 +2610895 1 8438 +246787 1 8321 +208479 1 7004 +39488 1 6876 +2109690 1 6719 +28113 1 5984 +33035 1 5612 +2499213 1 5247 +1352 1 4879 +1297617 1 4558 +2584943 1 4392 +2662363 1 4162 +29347 1 3745 +1871021 1 3563 +84030 1 3143 +2584944 1 3073 +1834198 1 2959 +1491 1 2853 +2086585 1 2645 +876 1 2608 +28119 1 2531 +28131 1 2423 +2606626 1 2394 +1912897 1 2393 +1834196 1 2280 +84112 1 2042 +2507162 1 2016 +1736 1 1918 +253239 1 1826 +2490939 1 1770 +2093742 1 1661 +1492 1 1595 +1335613 1 1484 +2109692 1 1456 +837 1 1446 +562 1 1438 +1834207 1 1379 +56 1 1373 +2041044 1 1355 +1796616 1 1297 +2576606 1 1273 +303 1 1258 +40324 1 1242 +54571 1 1226 +1572656 1 1201 +712710 1 1169 +1502 1 1167 +644 1 1151 +2696063 1 1132 +1307 1 1122 +85698 1 1100 +2494234 1 1093 +1796646 1 1070 +316 1 1054 +1679444 1 1044 +1076 1 1025 +851 1 1004 +1042156 1 989 +79329 1 981 +34073 1 967 +384 1 963 +192 1 957 +2173 1 948 +185007 1 946 +1642647 1 930 +185008 1 923 +28026 1 921 +28129 1 896 +1642646 1 887 +587753 1 854 +216816 1 849 +2109685 1 844 +53442 1 840 +28037 1 831 +305 1 829 +615 1 821 +29343 1 796 +1685 1 790 +1932 1 775 +28132 1 773 +358 1 763 +197 1 762 +29549 1 762 +29430 1 752 +1396 1 742 +1712675 1 723 +1358 1 715 +617123 1 700 +287 1 699 +28095 1 699 +2133944 1 693 +861299 1 693 +1501 1 693 +1702221 1 690 +1197717 1 670 +1280 1 670 +1911586 1 665 +2144175 1 657 +195 1 642 +2448483 1 642 +84110 1 638 +300 1 633 +907 1 625 +1852373 1 622 +1428 1 611 +847 1 601 +2003188 1 593 +253 1 584 +36745 1 583 +184914 1 582 +1216932 1 579 +1561 1 579 +901 1 577 +1542 1 570 +1229621 1 568 +1423 1 564 +46867 1 563 +79880 1 557 +2487118 1 539 +756892 1 535 +1356852 1 525 +2321403 1 523 +82633 1 519 +87883 1 513 +712991 1 513 +2583377 1 510 +1912 1 509 +47763 1 508 +1520 1 506 +459526 1 501 +542 1 499 +1351 1 493 +217203 1 489 +2094242 1 488 +1909395 1 488 +502558 1 487 +39152 1 486 +1513 1 479 +406548 1 479 +114 1 478 +95486 1 477 +1639 1 474 +1888 1 472 +382 1 471 +106590 1 470 +29552 1 469 +2604421 1 465 +2694929 1 464 +317 1 460 +1398 1 460 +28135 1 459 +1534 1 458 +2663009 1 456 +28448 1 455 +1282 1 454 +1624 1 452 +169679 1 451 +1484116 1 451 +2320868 1 449 +1406 1 448 +292 1 448 +1484118 1 447 +888845 1 443 +654 1 443 +755731 1 437 +1313 1 437 +103733 1 435 +856 1 435 +2014 1 435 +33033 1 434 +33959 1 434 +182773 1 433 +210 1 432 +1314 1 431 +44742 1 424 +1830 1 423 +2697030 1 421 +1686 1 420 +28251 1 419 +2656914 1 415 +1679721 1 413 +2596890 1 412 +996 1 410 +48 1 410 +2082193 1 409 +135487 1 408 +1575 1 407 +1838286 1 406 +1969 1 401 +84032 1 397 +2607656 1 394 +34 1 393 +1311 1 392 +29341 1 389 +37326 1 388 +1391654 1 388 +1517 1 385 +47671 1 385 +1393 1 384 +2507160 1 382 +1590 1 380 +1411621 1 378 +960 1 375 +2711231 1 374 +1680 1 373 +69 1 373 +2583823 1 372 +64104 1 371 +197614 1 370 +1452 1 370 +83656 1 369 +2599308 1 367 +2528023 1 366 +794903 1 365 +864828 1 365 +337 1 365 +394958 1 364 +1063 1 364 +2494373 1 364 +37329 1 362 +2598579 1 361 +13690 1 361 +2576307 1 358 +1117645 1 358 +1408 1 356 +1636152 1 351 +2496866 1 350 +1505 1 350 +2173034 1 350 +550 1 347 +28198 1 347 +1804986 1 346 +1648 1 346 +103855 1 345 +94132 1 343 +96344 1 343 +223967 1 343 +78587 1 342 +321983 1 342 +573 1 342 +713030 1 340 +1359 1 340 +2202148 1 338 +189381 1 337 +380021 1 337 +1541959 1 335 +1304 1 335 +528244 1 333 +446 1 333 +1376 1 332 +1884263 1 331 +121719 1 331 +1421 1 330 +1681 1 330 +633807 1 328 +712411 1 325 +1716143 1 324 +323450 1 323 +2528018 1 322 +76123 1 322 +661488 1 321 +119219 1 321 +582 1 320 +649831 1 319 +1433126 1 319 +2068654 1 318 +79263 1 318 +35 1 318 +1297742 1 317 +2653857 1 317 +1504 1 317 +51515 1 317 +1178516 1 316 +1586287 1 315 +128780 1 315 +1779382 1 315 +1411117 1 315 +34105 1 314 +2681766 1 314 +33970 1 313 +729 1 313 +43 1 312 +179636 1 312 +1424294 1 312 +29385 1 312 +433296 1 310 +51669 1 309 +1275 1 309 +75105 1 307 +147802 1 307 +93220 1 307 +2071627 1 306 +1308 1 306 +44251 1 306 +2502781 1 306 +1598 1 306 +182710 1 305 +641491 1 305 +329 1 305 +1177574 1 305 +871742 1 305 +55518 1 303 +29391 1 303 +408 1 302 +169760 1 301 +1141883 1 301 +1562970 1 300 +32002 1 299 +414771 1 299 +201 1 298 +147645 1 297 +1531429 1 297 +1850093 1 297 +67304 1 296 +1465 1 296 +1613 1 296 +1299 1 295 +1079 1 295 +1498 1 295 +1791 1 295 +861 1 294 +945844 1 294 +859 1 293 +1488 1 293 +2024580 1 293 +1841249 1 292 +496014 1 290 +54006 1 290 +1911683 1 289 +116090 1 289 +712538 1 289 +353 1 289 +61647 1 288 +1806508 1 288 +2045 1 288 +2602015 1 287 +2704462 1 287 +1679497 1 287 +187327 1 287 +298654 1 287 +558152 1 286 +34085 1 286 +1703 1 286 +33050 1 286 +1302 1 286 +158822 1 285 +1245 1 285 +436515 1 284 +49319 1 284 +1402861 1 283 +1270 1 283 +324057 1 283 +339 1 283 +927083 1 282 +392734 1 282 +1168034 1 281 +146919 1 281 +263 1 280 +110937 1 280 +55508 1 278 +2026787 1 277 +89152 1 277 +2126319 1 277 +553814 1 277 +1716543 1 276 +985762 1 276 +1219491 1 276 +155892 1 276 +443144 1 275 +529 1 275 +2081703 1 275 +2070347 1 274 +332101 1 274 +237258 1 274 +2662397 1 273 +75985 1 273 +1303 1 273 +2015076 1 273 +1710 1 272 +1707785 1 271 +66871 1 271 +36809 1 270 +1482 1 270 +1660 1 269 +1404 1 269 +1254 1 269 +2018025 1 269 +54005 1 269 +1397 1 268 +1482074 1 268 +571 1 268 +1903704 1 268 +53408 1 267 +2184053 1 267 +2315862 1 267 +1166950 1 267 +60890 1 266 +1761012 1 266 +2571746 1 265 +1927 1 265 +1548 1 265 +511 1 265 +314722 1 265 +470 1 264 +980427 1 264 +1911684 1 264 +2496028 1 264 +76853 1 264 +393921 1 264 +453783 1 264 +408015 1 263 +94 1 263 +1494 1 263 +2233999 1 263 +1608957 1 261 +254 1 261 +106592 1 261 +1644131 1 261 +1915 1 261 +1387353 1 261 +29382 1 261 +2072590 1 260 +1914461 1 260 +2520506 1 260 +1290 1 260 +784 1 259 +37332 1 259 +28450 1 259 +39960 1 258 +2045208 1 257 +79604 1 256 +68570 1 256 +47466 1 256 +2584940 1 256 +217204 1 255 +45398 1 255 +39645 1 255 +1855912 1 255 +470934 1 255 +1462996 1 255 +1677857 1 254 +255507 1 254 +47917 1 254 +264636 1 253 +380 1 253 +437897 1 252 +1727164 1 252 +2009329 1 252 +2674991 1 252 +162 1 252 +2572036 1 251 +2502779 1 251 +926 1 250 +562959 1 250 +2483110 1 250 +1847728 1 249 +68895 1 249 +28110 1 248 +61435 1 248 +2571749 1 248 +2582419 1 248 +1583098 1 248 +28025 1 247 +548 1 247 +346 1 247 +477641 1 247 +443143 1 246 +445710 1 246 +1905 1 246 +1211326 1 246 +2663022 1 245 +709810 1 245 +2583851 1 245 +1907 1 245 +2706887 1 245 +76759 1 244 +2602016 1 244 +274537 1 244 +2292766 1 244 +860235 1 244 +2282738 1 244 +1658665 1 244 +2507159 1 243 +1852022 1 243 +1328 1 243 +946333 1 243 +463040 1 242 +1525 1 242 +1509 1 241 +57975 1 241 +2057025 1 241 +1324352 1 241 +2584122 1 240 +2528009 1 240 +571177 1 240 +536441 1 240 +1736675 1 240 +31958 1 240 +2377 1 239 +2212991 1 238 +947919 1 238 +2014887 1 238 +1725 1 237 +879274 1 236 +1940610 1 236 +2057741 1 236 +109328 1 236 +173366 1 236 +666 1 235 +1896961 1 235 +1283 1 235 +1717717 1 235 +250 1 235 +2560053 1 235 +114527 1 235 +582702 1 235 +959 1 234 +1334193 1 234 +1288 1 233 +2579971 1 233 +1296 1 232 +455432 1 232 +2220096 1 232 +1744 1 232 +1852374 1 231 +52 1 231 +2583243 1 231 +2527962 1 230 +244566 1 230 +588932 1 229 +2303331 1 229 +2488809 1 229 +1126833 1 229 +1536774 1 229 +1571 1 229 +1901 1 228 +404589 1 228 +270351 1 228 +1836 1 228 +1823759 1 228 +463025 1 228 +2507161 1 228 +496057 1 228 +283686 1 227 +652764 1 227 +2528035 1 227 +1176533 1 226 +347 1 226 +1616117 1 226 +2602070 1 226 +1587 1 226 +1783515 1 226 +96345 1 225 +686597 1 225 +266 1 225 +1355477 1 225 +482462 1 225 +2713573 1 224 +74829 1 224 +57704 1 224 +157687 1 224 +1694 1 224 +1405 1 223 +2498135 1 223 +2074 1 223 +1960 1 223 +84531 1 223 +208439 1 222 +312306 1 222 +2419771 1 222 +1748 1 222 +1823 1 222 +101571 1 222 +2704463 1 221 +173 1 221 +1379909 1 221 +238 1 221 +158500 1 221 +1470176 1 220 +33932 1 220 +375 1 220 +1108595 1 220 +176102 1 220 +36874 1 219 +889453 1 219 +1257021 1 218 +538381 1 218 +29466 1 218 +2675754 1 217 +2497860 1 217 +584 1 217 +2548456 1 217 +281093 1 216 +738 1 216 +37482 1 216 +347534 1 216 +652 1 215 +1867715 1 215 +672 1 215 +285 1 215 +1400053 1 215 +795665 1 215 +862126 1 215 +60552 1 214 +2697032 1 214 +1511 1 214 +2055 1 214 +47885 1 214 +1889813 1 214 +564064 1 213 +2692425 1 213 +111015 1 212 +61652 1 212 +2061 1 212 +1870820 1 212 +36818 1 212 +174633 1 212 +1255 1 211 +645 1 211 +39950 1 211 +80869 1 211 +104087 1 211 +1930071 1 211 +682998 1 211 +1077935 1 211 +1664069 1 210 +2054 1 210 +158836 1 210 +1834205 1 210 +561879 1 210 +1653480 1 210 +1851544 1 209 +109790 1 209 +35623 1 209 +1493872 1 209 +1905847 1 208 +2269374 1 208 +1702325 1 208 +400770 1 208 +1539298 1 208 +1871025 1 208 +2567934 1 208 +426117 1 207 +2582905 1 207 +748247 1 207 +1176587 1 207 +2488560 1 207 +2652443 1 206 +76731 1 206 +82541 1 206 +2010972 1 205 +192812 1 205 +488447 1 205 +1519 1 204 +2718 1 204 +2594003 1 204 +41899 1 204 +134601 1 204 +123899 1 204 +68270 1 204 +342113 1 203 +1842727 1 203 +2508882 1 203 +2419841 1 202 +2209 1 202 +39778 1 202 +2583452 1 202 +1191459 1 202 +2686304 1 202 +2528010 1 202 +2027860 1 202 +83655 1 201 +1401 1 201 +1426 1 201 +298653 1 201 +492670 1 201 +1416806 1 200 +946334 1 200 +73044 1 200 diff --git a/data/hmp_templates/gut/SRS1041129-sim.yml b/data/hmp_templates/gut/SRS1041129-sim.yml new file mode 100644 index 0000000..62ab56f --- /dev/null +++ b/data/hmp_templates/gut/SRS1041129-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS1041129-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS1041129-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 12 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS104912-sim.tsv b/data/hmp_templates/gut/SRS104912-sim.tsv new file mode 100644 index 0000000..acaf0c5 --- /dev/null +++ b/data/hmp_templates/gut/SRS104912-sim.tsv @@ -0,0 +1,1705 @@ +TaxonomyInput nb_genomes Reads +821 1 4835490 +853 1 456702 +820 1 412536 +357276 1 307114 +1160721 1 283872 +817 1 200698 +371601 1 176642 +28116 1 107974 +2093856 1 103672 +818 1 98052 +28131 1 77401 +1796613 1 73270 +823 1 70993 +28026 1 58948 +28129 1 50181 +2093857 1 42975 +2364787 1 40265 +729 1 40024 +28118 1 39888 +2606626 1 37978 +2109688 1 37578 +2585118 1 37426 +28132 1 35423 +28135 1 35317 +2109691 1 34199 +2528203 1 33761 +2585119 1 33235 +2494234 1 30720 +329854 1 30132 +1550024 1 25629 +301301 1 24901 +292800 1 23683 +74426 1 23106 +2479767 1 22545 +76123 1 19364 +1496 1 18431 +1796646 1 17045 +2109687 1 16520 +2133944 1 16519 +1642646 1 15730 +2584944 1 14418 +1177574 1 14306 +246787 1 14191 +2576606 1 13806 +28113 1 12324 +28119 1 12310 +47678 1 11206 +589873 1 11126 +2610895 1 10216 +39488 1 10036 +2086584 1 9895 +1686 1 9581 +1297617 1 7948 +626937 1 7844 +2662363 1 7574 +239935 1 7488 +837 1 6766 +33035 1 6556 +2109690 1 6320 +208479 1 5974 +712710 1 5793 +649756 1 5633 +33038 1 5286 +1871021 1 4888 +1912897 1 4694 +2564099 1 4663 +216816 1 4592 +2025876 1 4387 +28251 1 4363 +33025 1 4220 +29347 1 4218 +1017 1 4132 +1642647 1 3618 +2584943 1 3530 +1433126 1 2960 +79329 1 2925 +303 1 2889 +84030 1 2862 +1834196 1 2828 +1834198 1 2821 +1352 1 2719 +253 1 2710 +492670 1 2698 +2086585 1 2637 +36874 1 2605 +1042156 1 2574 +56 1 2479 +1150389 1 2456 +40324 1 2452 +316 1 2387 +1491 1 2347 +393921 1 2338 +34073 1 2213 +1736 1 2210 +1562970 1 2190 +847 1 2158 +384 1 2103 +2041044 1 2075 +1076 1 1982 +197 1 1973 +654 1 1972 +1796616 1 1965 +294 1 1896 +54571 1 1870 +562 1 1864 +1681 1 1808 +2093742 1 1805 +85698 1 1796 +1685 1 1710 +1311 1 1689 +587753 1 1655 +2094242 1 1639 +1492 1 1586 +1173026 1 1567 +2607656 1 1546 +731 1 1546 +358 1 1543 +2507162 1 1542 +84112 1 1524 +28901 1 1518 +287 1 1496 +1396 1 1469 +1168034 1 1393 +253239 1 1352 +644 1 1340 +727 1 1338 +39778 1 1325 +1400053 1 1312 +192 1 1290 +305 1 1284 +2175091 1 1280 +185007 1 1261 +2694929 1 1258 +1411621 1 1245 +1356852 1 1221 +726 1 1211 +388950 1 1209 +1680 1 1199 +184914 1 1192 +2321403 1 1179 +2604421 1 1177 +1166950 1 1169 +536441 1 1161 +1639133 1 1155 +889453 1 1149 +28095 1 1146 +400092 1 1132 +119219 1 1123 +2696063 1 1119 +907 1 1109 +1117645 1 1107 +2582419 1 1105 +300 1 1105 +2161821 1 1088 +29549 1 1087 +2502781 1 1080 +1178516 1 1064 +2109692 1 1062 +45243 1 1057 +2003188 1 1054 +82633 1 1038 +615 1 1028 +1834207 1 1025 +95486 1 1025 +217203 1 1017 +851 1 1014 +2706887 1 1012 +649196 1 1011 +84110 1 1010 +2319843 1 1006 +1484118 1 1005 +1019 1 1005 +1850093 1 985 +1398 1 979 +1229621 1 974 +996 1 972 +661488 1 959 +2144175 1 959 +39950 1 954 +2057025 1 947 +512763 1 946 +1572656 1 945 +210 1 943 +2109685 1 937 +2615203 1 914 +382 1 909 +41977 1 906 +2201271 1 903 +1335613 1 901 +1484116 1 899 +2584940 1 899 +34 1 898 +195 1 894 +2259595 1 891 +1712675 1 891 +61435 1 889 +1358 1 888 +87883 1 886 +185008 1 877 +317 1 877 +1502 1 872 +323450 1 871 +47671 1 868 +1314 1 868 +861299 1 865 +1307839 1 864 +2496028 1 855 +28025 1 849 +1307 1 849 +223967 1 845 +888845 1 838 +1660 1 838 +1428 1 829 +1385664 1 828 +43675 1 827 +1758194 1 825 +337 1 819 +2315862 1 815 +1751286 1 810 +249188 1 806 +69 1 804 +1852373 1 795 +2681766 1 794 +285 1 791 +29466 1 783 +712991 1 783 +43 1 778 +1063 1 773 +573 1 772 +1834519 1 768 +901 1 767 +670 1 766 +1305 1 765 +53442 1 761 +470 1 758 +2502779 1 758 +2507160 1 756 +106590 1 755 +511 1 751 +946333 1 749 +1520 1 733 +2694930 1 730 +1197717 1 730 +78587 1 730 +1280 1 727 +1501 1 726 +2711231 1 724 +101571 1 724 +1639 1 722 +96344 1 721 +2520506 1 719 +2305508 1 719 +398053 1 719 +94132 1 717 +2282170 1 716 +459526 1 713 +93220 1 712 +48 1 711 +797291 1 706 +1855912 1 704 +1888 1 702 +1534 1 700 +1393 1 700 +496056 1 693 +964 1 691 +1909395 1 691 +2663009 1 691 +1747 1 690 +1391654 1 690 +2029983 1 688 +1903704 1 688 +2294119 1 685 +487 1 685 +564064 1 685 +106592 1 683 +1141883 1 683 +2527962 1 681 +1304 1 680 +135487 1 679 +617123 1 677 +1679444 1 676 +446 1 676 +292 1 675 +1499308 1 672 +408 1 668 +1703 1 667 +1379910 1 665 +470934 1 665 +238 1 664 +29449 1 663 +28188 1 661 +146919 1 661 +794903 1 660 +75105 1 658 +2488560 1 657 +2212991 1 657 +1191459 1 656 +188932 1 655 +549 1 653 +36745 1 650 +959 1 650 +375 1 648 +1299 1 646 +1406 1 645 +114 1 644 +329 1 644 +672 1 643 +1257021 1 643 +2583377 1 641 +561061 1 641 +1729720 1 641 +179636 1 639 +516051 1 639 +28448 1 637 +738 1 635 +1717717 1 635 +2590021 1 634 +274537 1 633 +495 1 633 +1823759 1 631 +2494373 1 629 +747 1 628 +2703787 1 627 +1590 1 627 +2713573 1 626 +1838286 1 626 +36809 1 624 +1385663 1 622 +380 1 622 +1351 1 622 +463040 1 621 +1469502 1 620 +2320868 1 617 +1702221 1 616 +1683 1 613 +2268026 1 613 +1893 1 613 +47763 1 609 +2547394 1 608 +29343 1 607 +1806508 1 607 +548 1 606 +1282 1 602 +57975 1 600 +1986952 1 598 +79263 1 598 +1624 1 596 +76731 1 595 +103855 1 595 +85643 1 594 +67304 1 594 +2005884 1 593 +2576891 1 590 +1891094 1 587 +661481 1 586 +2676868 1 583 +748247 1 583 +1531429 1 582 +2172098 1 579 +897 1 577 +34085 1 577 +28450 1 577 +80869 1 575 +550 1 574 +927083 1 574 +1513 1 573 +2303331 1 571 +84531 1 569 +38313 1 567 +32002 1 567 +2704465 1 567 +254 1 566 +1216932 1 563 +2027860 1 562 +477680 1 562 +2528023 1 559 +1313 1 556 +1707785 1 555 +1423 1 554 +582 1 554 +1644131 1 553 +1550579 1 553 +47885 1 552 +443143 1 552 +1421 1 552 +496057 1 552 +2578106 1 551 +55508 1 550 +371142 1 550 +168471 1 547 +35 1 546 +347534 1 546 +2292766 1 541 +2341117 1 541 +2094025 1 539 +392734 1 539 +1912 1 538 +502558 1 536 +862126 1 536 +250 1 535 +856 1 534 +121719 1 532 +1658665 1 531 +83771 1 530 +553814 1 529 +105219 1 529 +53408 1 529 +1675686 1 529 +162 1 528 +1930273 1 527 +2698458 1 527 +511435 1 526 +926 1 526 +443144 1 525 +463025 1 524 +616991 1 524 +1267423 1 522 +1211326 1 522 +2021234 1 521 +1636152 1 520 +103733 1 520 +576610 1 520 +2654982 1 517 +1379870 1 517 +864828 1 516 +1661 1 516 +1355477 1 514 +84032 1 514 +2162713 1 512 +406548 1 512 +1884263 1 511 +2518177 1 510 +2171623 1 510 +795665 1 510 +61647 1 509 +37332 1 509 +1969 1 508 +2718 1 508 +79604 1 505 +876 1 505 +2209 1 504 +1416627 1 503 +1813871 1 503 +320787 1 502 +74829 1 500 +37326 1 499 +2557994 1 499 +2045208 1 498 +1303 1 497 +1297742 1 496 +28037 1 496 +859 1 494 +945844 1 494 +1592106 1 493 +404589 1 493 +380021 1 493 +29341 1 491 +413882 1 490 +930166 1 490 +217204 1 489 +44251 1 488 +2704462 1 486 +321983 1 485 +1904640 1 484 +1416806 1 484 +2571746 1 484 +1492737 1 484 +1465 1 483 +584 1 482 +2704463 1 482 +1505 1 480 +1906741 1 480 +93218 1 477 +57704 1 476 +2015076 1 476 +1379909 1 476 +37329 1 476 +1316596 1 476 +221822 1 474 +756892 1 474 +433296 1 474 +29494 1 473 +339 1 473 +463014 1 473 +542 1 473 +61652 1 473 +2527975 1 472 +732 1 471 +1526658 1 470 +1404 1 469 +645 1 468 +2562284 1 467 +1637999 1 466 +94 1 465 +63186 1 465 +128780 1 464 +13690 1 463 +2487118 1 463 +346 1 463 +2109913 1 460 +2605747 1 460 +2653857 1 460 +1334193 1 459 +1536772 1 459 +2066070 1 459 +2656914 1 458 +314722 1 456 +2202148 1 455 +112234 1 455 +1538644 1 455 +1972068 1 454 +674703 1 453 +1736675 1 453 +2058175 1 452 +1492898 1 452 +980427 1 451 +1678028 1 450 +1648923 1 448 +430522 1 448 +2061 1 448 +1120045 1 447 +259 1 446 +1613 1 446 +28449 1 446 +1265445 1 446 +2599401 1 446 +529 1 445 +1608957 1 444 +2027857 1 444 +1234841 1 443 +1108595 1 443 +279058 1 442 +1179672 1 442 +1536773 1 442 +1176587 1 441 +2527990 1 441 +347 1 440 +1891926 1 440 +39777 1 440 +879274 1 440 +1416803 1 440 +1842727 1 439 +1586287 1 439 +2183896 1 439 +52 1 438 +1779382 1 438 +2027405 1 438 +114527 1 438 +198107 1 438 +1404864 1 437 +1296669 1 437 +1219491 1 436 +1745854 1 436 +713030 1 436 +947919 1 436 +28197 1 436 +655015 1 436 +65058 1 435 +2126319 1 434 +1082851 1 434 +2712222 1 434 +190721 1 434 +2528026 1 433 +172042 1 432 +353 1 432 +94624 1 432 +1735038 1 431 +980251 1 430 +1940762 1 430 +1632864 1 430 +436515 1 428 +324057 1 427 +2014 1 427 +1679721 1 426 +110937 1 425 +1748 1 425 +2069432 1 425 +445710 1 424 +2014887 1 424 +2571748 1 424 +1677857 1 424 +1079 1 424 +43306 1 423 +1575 1 423 +356837 1 423 +2601898 1 423 +748280 1 422 +281093 1 422 +1795355 1 422 +712538 1 422 +488447 1 421 +1744 1 420 +1493872 1 420 +83656 1 419 +712411 1 419 +180282 1 418 +996801 1 418 +169430 1 417 +321984 1 416 +1716143 1 416 +530584 1 415 +437900 1 415 +2585771 1 415 +2507161 1 415 +717785 1 414 +588932 1 414 +2496266 1 414 +2594003 1 414 +571177 1 413 +1548 1 412 +1014 1 412 +1447062 1 411 +1010 1 411 +2015316 1 411 +1571 1 410 +2026787 1 410 +2697032 1 409 +197614 1 409 +2231055 1 408 +158822 1 407 +173 1 407 +55518 1 407 +187327 1 405 +1850526 1 405 +363852 1 405 +237609 1 404 +1697043 1 404 +83655 1 404 +438 1 403 +1807691 1 403 +666 1 402 +62322 1 402 +1792508 1 401 +1761453 1 401 +2602016 1 400 +421525 1 400 +1324352 1 399 +755731 1 399 +1882682 1 399 +573737 1 398 +1940612 1 398 +1784714 1 398 +145261 1 398 +96345 1 397 +1868589 1 397 +60847 1 397 +279113 1 397 +2678688 1 396 +123899 1 396 +496014 1 396 +1401 1 396 +2528018 1 396 +1694 1 395 +546 1 395 +182773 1 395 +83263 1 394 +2233851 1 394 +169760 1 394 +199 1 394 +2589080 1 393 +1308 1 393 +2211140 1 392 +1580 1 392 +442 1 392 +76758 1 391 +1235591 1 391 +1678728 1 391 +1915 1 390 +1270 1 390 +33050 1 389 +2545455 1 389 +528244 1 389 +2697043 1 389 +1714848 1 388 +2598579 1 387 +536 1 387 +28189 1 387 +2371 1 386 +2072590 1 386 +104087 1 386 +521 1 386 +1841249 1 386 +172045 1 386 +1124835 1 385 +60890 1 385 +1585976 1 385 +1542 1 385 +712310 1 385 +1926494 1 385 +57320 1 385 +1848904 1 384 +2528009 1 384 +189381 1 383 +2047 1 383 +39695 1 383 +204773 1 383 +1328 1 382 +538381 1 382 +1134435 1 381 +1842540 1 381 +683124 1 380 +1605891 1 380 +2597769 1 380 +349221 1 379 +312306 1 379 +2662364 1 379 +1197460 1 379 +2703789 1 378 +173366 1 378 +189426 1 378 +649831 1 377 +266 1 377 +1603606 1 377 +1834205 1 376 +1804624 1 376 +1768242 1 376 +69666 1 376 +49319 1 376 +871742 1 375 +93221 1 375 +66871 1 374 +2548456 1 374 +1710 1 374 +1630693 1 374 +426117 1 374 +2057741 1 374 +1829 1 374 +1679497 1 373 +157687 1 373 +52773 1 372 +35760 1 372 +562959 1 372 +396 1 371 +237610 1 371 +1664069 1 371 +51101 1 371 +2303538 1 371 +82996 1 371 +682798 1 371 +1791 1 370 +65741 1 370 +2528008 1 370 +1411117 1 370 +1772 1 370 +1804986 1 369 +2183911 1 369 +2571747 1 369 +201 1 368 +93222 1 368 +2054 1 367 +48296 1 367 +2713414 1 366 +34004 1 366 +1851148 1 366 +2591634 1 366 +46680 1 366 +169765 1 366 +758793 1 365 +147645 1 365 +1609758 1 365 +2579971 1 364 +1938605 1 363 +2674991 1 362 +33905 1 362 +2583823 1 362 +2686365 1 362 +481743 1 362 +1940610 1 362 +2509459 1 362 +539 1 361 +2233999 1 360 +571 1 359 +288000 1 359 +54006 1 358 +1598 1 358 +1869241 1 358 +2489595 1 358 +449461 1 358 +1306519 1 358 +2487072 1 357 +43768 1 357 +1836 1 357 +860235 1 356 +76759 1 356 +82380 1 356 +1702325 1 356 +2478552 1 356 +345632 1 355 +455432 1 354 +311180 1 353 +342113 1 353 +1463165 1 353 +2528010 1 352 +2663022 1 352 +44742 1 352 +1749 1 351 +656179 1 351 +82541 1 351 +1387353 1 351 +242600 1 350 +2582905 1 350 +2496847 1 350 +2499144 1 350 +373994 1 349 +1851544 1 348 +435897 1 348 +46867 1 348 +1309 1 348 +1892855 1 348 +2528021 1 348 +1686310 1 347 +358220 1 347 +1094 1 347 +151783 1 346 +2126553 1 346 +1210884 1 346 +446679 1 345 +2059672 1 345 +627192 1 345 +340345 1 345 +482957 1 345 +31958 1 344 +1823 1 344 +986 1 344 +1218801 1 344 +1689 1 344 +1850374 1 344 +47880 1 343 +2183582 1 343 +1325107 1 343 +1176533 1 342 +1379270 1 342 +1391653 1 342 +1462996 1 342 +204042 1 342 +1725 1 342 +2599640 1 342 +1248727 1 341 +2109914 1 341 +651561 1 341 +2045 1 340 +1933220 1 340 +2697030 1 340 +2579250 1 339 +412690 1 339 +2021862 1 339 +1930071 1 339 +2011159 1 338 +2572036 1 338 +2651974 1 338 +89152 1 338 +861 1 337 +1896961 1 337 +68895 1 337 +1420917 1 337 +122 1 337 +680 1 336 +946334 1 336 +709810 1 336 +35554 1 336 +587 1 336 +1561 1 336 +1907 1 335 +2703788 1 335 +1221500 1 335 +1214604 1 335 +414771 1 335 +379684 1 335 +1870820 1 335 +263 1 334 +59814 1 334 +221027 1 333 +2067960 1 333 +1986146 1 332 +38293 1 332 +298653 1 332 +115808 1 332 +1357916 1 331 +2602015 1 331 +237258 1 331 +271865 1 331 +1851395 1 331 +225992 1 331 +35806 1 331 +387093 1 331 +1905 1 330 +1579979 1 330 +108981 1 330 +676 1 329 +2587853 1 328 +37919 1 328 +1286181 1 328 +2518645 1 328 +1457365 1 328 +2282738 1 327 +2487892 1 327 +317577 1 327 +169427 1 327 +714 1 327 +2082386 1 326 +170679 1 326 +1716543 1 326 +207745 1 326 +39645 1 326 +244366 1 325 +2602769 1 325 +417280 1 325 +2593958 1 325 +47883 1 324 +2018025 1 324 +169679 1 324 +1761016 1 324 +1525 1 324 +1250539 1 323 +2651334 1 323 +1909732 1 323 +2527978 1 322 +1842539 1 322 +2705547 1 322 +1914471 1 322 +34062 1 322 +1795631 1 322 +1727164 1 321 +1241981 1 321 +106649 1 320 +1486262 1 320 +158500 1 320 +158836 1 320 +2699395 1 320 +326522 1 320 +2220096 1 319 +1390 1 319 +47917 1 319 +67267 1 319 +111781 1 318 +779 1 318 +51515 1 318 +71999 1 318 +41899 1 318 +2527995 1 318 +162209 1 318 +656366 1 318 +1936003 1 318 +1080068 1 317 +1359 1 317 +47421 1 317 +451876 1 316 +641491 1 316 +585455 1 316 +2603277 1 316 +913109 1 316 +2024580 1 316 +1921087 1 316 +1238 1 315 +2489213 1 315 +1821621 1 315 +33010 1 314 +1978339 1 314 +33932 1 314 +2060726 1 314 +396595 1 314 +68270 1 313 +1867715 1 313 +2213200 1 313 +2009329 1 312 +43770 1 312 +255247 1 312 +2528020 1 312 +1778540 1 312 +2527972 1 312 +935222 1 312 +1761012 1 311 +696485 1 311 +444444 1 311 +1911175 1 310 +2377 1 310 +445709 1 310 +332411 1 309 +2170729 1 309 +1136941 1 309 +68214 1 309 +2341112 1 309 +1255 1 308 +159 1 308 +1719 1 307 +1536774 1 306 +342950 1 306 +1985873 1 306 +160799 1 306 +93219 1 306 +2478663 1 305 +1842533 1 304 +1803846 1 304 +674 1 304 +260552 1 304 +43263 1 304 +1727163 1 304 +1494 1 303 +475662 1 303 +1750 1 302 +1126833 1 302 +687 1 302 +39960 1 302 +2712223 1 302 +68209 1 302 +2010972 1 301 +1302 1 300 +33059 1 300 +558152 1 300 +2488809 1 300 +208962 1 300 +1870819 1 300 +1077946 1 299 +630 1 299 +75985 1 299 +2528035 1 299 +279826 1 299 +2211108 1 298 +947516 1 298 +2565555 1 298 +1712516 1 298 +60548 1 298 +2126346 1 298 +126385 1 298 +298654 1 298 +327575 1 298 +1537400 1 298 +2495582 1 298 +1241982 1 298 +1452 1 297 +669041 1 297 +666685 1 297 +1685010 1 297 +1075 1 297 +54291 1 297 +1355330 1 296 +2587844 1 296 +42256 1 296 +2702 1 296 +1335048 1 296 +1331682 1 296 +56448 1 296 +1426 1 295 +374606 1 295 +313603 1 295 +104268 1 295 +1486034 1 295 +56454 1 294 +119858 1 294 +1908 1 294 +1901 1 294 +218208 1 294 +2497861 1 293 +453783 1 293 +437897 1 293 +400770 1 293 +876364 1 293 +1517 1 293 +1871025 1 293 +53462 1 293 +33959 1 293 +67780 1 293 +1534348 1 292 +2789776 1 292 +652 1 292 +434009 1 292 +1408 1 292 +1967 1 292 +483547 1 292 +2055 1 291 +1405 1 291 +1275 1 291 +79880 1 291 +69964 1 291 +29484 1 291 +1842537 1 290 +2527982 1 290 +453304 1 290 +29495 1 290 +379347 1 290 +2692425 1 289 +1663591 1 289 +69218 1 288 +1830 1 288 +206506 1 288 +682998 1 288 +28108 1 287 +1940 1 287 +2487063 1 287 +1724 1 287 +247 1 287 +531844 1 287 +2599297 1 286 +2249356 1 286 +44008 1 286 +2614639 1 286 +484 1 286 +2021 1 286 +2516557 1 285 +174633 1 285 +584609 1 285 +1842534 1 285 +1482074 1 285 +244734 1 285 +1529068 1 285 +2074 1 284 +1488 1 284 +158899 1 284 +192812 1 283 +2518599 1 283 +2509455 1 283 +702745 1 283 +161899 1 283 +69539 1 282 +2490858 1 282 +190148 1 282 +198620 1 282 +283686 1 282 +2602070 1 282 +1402135 1 282 +134537 1 281 +273384 1 281 +1383885 1 281 +69665 1 281 +1389192 1 281 +2487064 1 281 +96773 1 280 +1938606 1 280 +1778678 1 280 +1424294 1 280 +1930275 1 279 +1089444 1 279 +67260 1 279 +190893 1 279 +38300 1 279 +2202828 1 279 +1236179 1 278 +640512 1 278 +270351 1 278 +1260 1 278 +582702 1 278 +1539298 1 278 +1286180 1 278 +76853 1 278 +1774273 1 278 +2293317 1 277 +1853278 1 277 +109328 1 277 +1510150 1 277 +38289 1 277 +296842 1 277 +274 1 277 +2654191 1 277 +2082188 1 276 +434008 1 276 +262209 1 276 +1885 1 276 +434010 1 276 +146017 1 276 +2599308 1 275 +366602 1 275 +362257 1 275 +2662397 1 275 +1825976 1 274 +2184053 1 274 +1927 1 274 +663 1 274 +2597701 1 273 +1938604 1 273 +69395 1 273 +1494608 1 273 +1178778 1 272 +2420509 1 272 +297 1 272 +47770 1 272 +1560345 1 272 +1824 1 272 +2591109 1 271 +2596828 1 271 +1316593 1 271 +1549858 1 271 +95485 1 271 +1307763 1 271 +2509614 1 271 +1765049 1 271 +636 1 270 +2547600 1 270 +454 1 270 +2496866 1 270 +2496837 1 270 +645517 1 270 +1670830 1 270 +2704467 1 269 +689 1 269 +2033869 1 269 +1842536 1 269 +2202254 1 269 +160699 1 269 +45398 1 269 +2017482 1 268 +492 1 268 +1571470 1 268 +2494375 1 268 +420953 1 268 +1836467 1 267 +2049881 1 267 +2211211 1 267 +2282656 1 267 +1670831 1 267 +414996 1 266 +2419842 1 266 +2576307 1 266 +394958 1 266 +2594795 1 266 +1827580 1 266 +2202825 1 266 +1897729 1 266 +1478 1 265 +2173034 1 265 +2497860 1 265 +1653480 1 265 +290335 1 265 +2507538 1 265 +863372 1 264 +1508404 1 264 +540747 1 264 +1497 1 264 +1376 1 264 +2605945 1 264 +1261031 1 264 +511062 1 264 +134601 1 264 +68175 1 264 +2496836 1 263 +182710 1 263 +1176536 1 263 +1241978 1 263 +40214 1 262 +68570 1 262 +75385 1 262 +111015 1 261 +1758178 1 261 +424800 1 261 +686597 1 261 +33033 1 261 +1047172 1 261 +2591145 1 261 +1678128 1 261 +48074 1 261 +1081866 1 261 +1610 1 260 +193462 1 260 +155892 1 260 +2419771 1 260 +2026199 1 260 +1504 1 259 +1315974 1 259 +2571749 1 259 +518 1 259 +285570 1 259 +1031542 1 259 +2293862 1 259 +381 1 258 +104336 1 258 +1349819 1 258 +2497863 1 258 +2662261 1 258 +2017483 1 257 +29391 1 257 +1286 1 257 +39152 1 256 +2601677 1 256 +1354 1 256 +29430 1 255 +137591 1 255 +702113 1 254 +407035 1 253 +66425 1 253 +56459 1 253 +1397 1 253 +80842 1 253 +553 1 253 +55211 1 252 +2708117 1 252 +2509456 1 252 +321846 1 252 +1850238 1 252 +588 1 251 +1134687 1 251 +1390395 1 251 +742013 1 251 +337191 1 251 +488 1 251 +1582 1 251 +2711215 1 251 +251701 1 250 +2173169 1 250 +2597660 1 250 +1325095 1 250 +553510 1 250 +1740163 1 249 +2478662 1 249 +1609977 1 249 +356322 1 249 +56458 1 249 +1903186 1 249 +2498135 1 249 +40216 1 249 +1758689 1 249 +265959 1 248 +465721 1 248 +79967 1 248 +2681549 1 248 +1173025 1 248 +158847 1 248 +1889813 1 248 +1332264 1 248 +1935379 1 247 +2545799 1 247 +2599293 1 247 +1670641 1 247 +1402861 1 247 +2584122 1 247 +196 1 247 +392593 1 247 +257708 1 246 +135719 1 246 +1784713 1 246 +2604832 1 246 +43661 1 246 +391905 1 246 +1936080 1 246 +376489 1 246 +1515612 1 246 +1529069 1 245 +1717 1 245 +913107 1 245 +2487065 1 245 +319707 1 245 +29314 1 245 +2070347 1 245 +1851505 1 245 +2006110 1 245 +1714373 1 245 +1658672 1 244 +93064 1 244 +1283 1 244 +1536770 1 244 +2686094 1 244 +2665642 1 244 +2594269 1 244 +1582259 1 244 +408139 1 244 +1282737 1 244 +1868325 1 243 +207340 1 243 +655307 1 243 +1335 1 243 +300019 1 243 +656178 1 243 +61646 1 243 +60552 1 243 +1555112 1 243 +56455 1 243 +2528642 1 242 +739141 1 242 +1860122 1 242 +2572594 1 242 +1536771 1 242 +1914525 1 242 +2282475 1 242 +364410 1 242 +665099 1 241 +1936081 1 241 +2565782 1 241 +1570 1 241 +2161816 1 241 +1792307 1 241 +1788 1 241 +2653200 1 241 +335406 1 241 +1974213 1 240 +2109915 1 240 +2025949 1 240 +40576 1 240 +1784719 1 240 +304207 1 240 +82367 1 240 +1030157 1 240 +1850246 1 240 +129921 1 239 +262324 1 239 +482462 1 239 +1705617 1 239 +2382161 1 239 +692036 1 239 +1543721 1 239 +28141 1 239 +2605946 1 239 +2082949 1 239 +1072256 1 239 +1641402 1 238 +1482 1 238 +2529032 1 238 +1578165 1 238 +504 1 238 +45658 1 238 +2082385 1 238 +568768 1 238 +983548 1 238 +1327635 1 237 +556288 1 237 +252967 1 237 +1616117 1 237 +2500547 1 237 +1208324 1 237 +1193713 1 237 +296591 1 237 +2082193 1 237 +33945 1 237 +1597 1 237 +2712698 1 237 +1325115 1 237 +1442136 1 237 +147802 1 236 +1670800 1 236 +2660750 1 236 +227605 1 236 +2494374 1 236 +662548 1 236 +1581557 1 236 +980254 1 235 +1915078 1 235 +2304600 1 235 +477641 1 235 +1938607 1 235 +35814 1 235 +1333996 1 234 +51669 1 234 +2560053 1 234 +28151 1 234 +1523415 1 234 +399497 1 234 +2161747 1 233 +1612173 1 233 +744985 1 233 +1182571 1 233 +63 1 233 +454601 1 232 +1650658 1 232 +1389713 1 232 +266779 1 232 +556499 1 232 +1453352 1 232 +53346 1 232 +1149133 1 232 +1620215 1 231 +70775 1 231 +1340425 1 231 +2202149 1 231 +1639348 1 230 +2507159 1 230 +1810504 1 230 +1061 1 230 +43767 1 229 +292462 1 229 +2010829 1 229 +673 1 229 +1396826 1 229 +1325564 1 229 +1583098 1 229 +488729 1 228 +42253 1 228 +2565556 1 228 +409322 1 228 +248903 1 228 +408015 1 228 +319706 1 228 +29542 1 228 +194963 1 228 +2304594 1 228 +745310 1 228 +614 1 228 +2483798 1 227 +499555 1 227 +2587163 1 227 +13689 1 227 +36818 1 226 +1871037 1 226 +39691 1 226 +1476901 1 226 +119981 1 226 +1470176 1 226 +56453 1 226 +1891279 1 226 +2698684 1 225 +1968541 1 225 +2220095 1 225 +2057800 1 225 +1338368 1 224 +368607 1 224 +1917485 1 224 +1922217 1 224 +1768108 1 224 +2490851 1 224 +1617283 1 224 +2652443 1 223 +2018065 1 223 +314281 1 223 +1914461 1 223 +1534349 1 223 +2528007 1 223 +45634 1 223 +2496867 1 222 +35841 1 222 +1241979 1 222 +1771309 1 222 +36822 1 222 +260554 1 222 +319236 1 222 +1446794 1 222 +1944646 1 222 +2686366 1 222 +2600320 1 222 +60550 1 221 +1756988 1 221 +248315 1 221 +314275 1 221 +712633 1 221 +86185 1 221 +2609252 1 220 +313598 1 220 +1771 1 220 +46679 1 220 +2163011 1 220 +28258 1 220 +2511995 1 220 +2058134 1 220 +2201350 1 220 +1516059 1 220 +120107 1 219 +692370 1 219 +1941349 1 219 +1434191 1 219 +960 1 219 +2509341 1 219 +48665 1 218 +1813879 1 218 +116090 1 218 +1904944 1 218 +2662361 1 218 +1763535 1 218 +2666100 1 218 +64104 1 217 +47960 1 217 +37928 1 217 +91604 1 217 +208439 1 217 +157783 1 217 +1281 1 217 +1325090 1 216 +2041 1 216 +2116657 1 216 +1600 1 216 +2708301 1 216 +1548548 1 216 +2509457 1 216 +93378 1 216 +2686361 1 215 +571913 1 215 +1833852 1 215 +546160 1 215 +1742359 1 215 +133448 1 215 +28172 1 215 +28894 1 215 +76936 1 215 +2483033 1 215 +1178482 1 215 +1473112 1 214 +1630135 1 214 +1930276 1 214 +2419774 1 214 +652764 1 214 +196162 1 214 +2067957 1 214 +2033033 1 214 +1580596 1 214 +332101 1 214 +2509675 1 214 +1173027 1 214 +233316 1 213 +1249553 1 213 +47715 1 213 +373 1 213 +881260 1 213 +2675225 1 213 +320497 1 213 +2005469 1 213 +216929 1 212 +1806905 1 212 +2493673 1 212 +2546351 1 212 +33899 1 212 +1402 1 212 +134 1 212 +2202827 1 212 +326523 1 212 +1648404 1 212 +99598 1 212 +1016987 1 211 +2603276 1 211 +157779 1 211 +664962 1 211 +279828 1 211 +1783515 1 211 +1736674 1 211 +1548547 1 211 +2583851 1 211 +266749 1 210 +703 1 210 +1604 1 210 +357808 1 210 +2048283 1 209 +2686304 1 209 +2594265 1 209 +2590016 1 209 +2589797 1 209 +2583243 1 209 +712357 1 209 +739 1 208 +1078471 1 208 +88688 1 208 +1411902 1 208 +2136401 1 208 +38310 1 208 +288004 1 208 +1982042 1 208 +84022 1 207 +116188 1 207 +106370 1 207 +2026785 1 207 +1965282 1 207 +2583587 1 207 +2587855 1 207 +1891644 1 207 +1498 1 206 +445576 1 206 +1867719 1 206 +1616788 1 206 +1223566 1 206 +758802 1 206 +74033 1 205 +1714849 1 205 +2676077 1 205 +1783501 1 205 +2558918 1 205 +2603206 1 205 +2169400 1 205 +1312183 1 204 +1708 1 204 +1535768 1 204 +183795 1 204 +359 1 204 +330214 1 204 +1855875 1 204 +1909293 1 204 +61645 1 204 +199591 1 204 +580165 1 204 +857417 1 203 +1348774 1 203 +1812935 1 203 +1599 1 203 +290400 1 203 +82348 1 202 +48664 1 202 +1323375 1 202 +2596890 1 202 +1458426 1 202 +2068654 1 201 +2028345 1 201 +2572577 1 201 +2579935 1 201 +1076596 1 201 +2518370 1 201 +2594004 1 201 +1302308 1 201 +1811807 1 201 +204038 1 201 +1288 1 201 +471827 1 201 +1229727 1 201 +670052 1 201 +2487074 1 200 +1932 1 200 +1808979 1 200 +1007105 1 200 +2293838 1 200 +145458 1 200 +2575375 1 200 +2495645 1 200 +1028416 1 200 +930805 1 200 diff --git a/data/hmp_templates/gut/SRS104912-sim.yml b/data/hmp_templates/gut/SRS104912-sim.yml new file mode 100644 index 0000000..ce15c9b --- /dev/null +++ b/data/hmp_templates/gut/SRS104912-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS104912-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS104912-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 13 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS105082-sim.tsv b/data/hmp_templates/gut/SRS105082-sim.tsv new file mode 100644 index 0000000..64a60c8 --- /dev/null +++ b/data/hmp_templates/gut/SRS105082-sim.tsv @@ -0,0 +1,2084 @@ +TaxonomyInput nb_genomes Reads +821 1 4203054 +853 1 633163 +818 1 478099 +47678 1 474680 +820 1 388871 +28116 1 334328 +817 1 273002 +357276 1 248530 +823 1 191643 +371601 1 166727 +28118 1 161698 +145261 1 153538 +2109688 1 152878 +589873 1 118388 +2364787 1 113803 +2528203 1 104898 +28132 1 101540 +2109691 1 101431 +2585118 1 98227 +28131 1 96749 +1796613 1 88721 +28135 1 68515 +2093856 1 63336 +246787 1 62362 +28129 1 59926 +2585119 1 53060 +2494234 1 51945 +329854 1 51151 +1177574 1 46499 +244366 1 43371 +1550024 1 42612 +301301 1 40687 +28113 1 36538 +292800 1 33842 +95486 1 33147 +74426 1 33022 +1681 1 32361 +28119 1 29481 +2093857 1 28266 +2584944 1 25195 +76123 1 24434 +1160721 1 24411 +2606626 1 22012 +562 1 20502 +1639133 1 20474 +2479767 1 19945 +239935 1 19729 +1680 1 19077 +901 1 18807 +39488 1 18786 +2025876 1 18682 +2109687 1 17806 +10847 1 16054 +216816 1 15263 +837 1 14236 +2133944 1 11975 +2662363 1 11621 +2576606 1 10835 +2610895 1 10699 +33038 1 10497 +208479 1 9322 +2109690 1 9319 +1496 1 9077 +1796646 1 8276 +1686 1 7948 +649756 1 7875 +712710 1 7683 +573 1 7033 +79329 1 6924 +1642647 1 6690 +1642646 1 6297 +2086584 1 5845 +2584943 1 5576 +33035 1 4860 +36874 1 4720 +729 1 4685 +1871021 1 4562 +661488 1 4207 +1400053 1 3922 +1685 1 3864 +29347 1 3799 +2093742 1 3745 +303 1 3576 +253 1 3339 +1834196 1 3319 +2706887 1 3299 +84030 1 3187 +1017 1 3176 +1168034 1 3093 +1562970 1 3069 +1297617 1 3035 +393921 1 3016 +512763 1 2957 +400092 1 2941 +40324 1 2941 +1491 1 2860 +2604421 1 2827 +85698 1 2772 +626937 1 2739 +316 1 2714 +1912897 1 2623 +2703787 1 2597 +1433126 1 2593 +294 1 2549 +743722 1 2540 +1117645 1 2514 +1834207 1 2503 +45243 1 2501 +28251 1 2483 +2315862 1 2465 +185007 1 2422 +1834198 1 2407 +550 1 2369 +1191459 1 2336 +2520506 1 2325 +56 1 2319 +28901 1 2276 +2341117 1 2239 +2086585 1 2231 +2305508 1 2230 +358 1 2226 +430522 1 2174 +34073 1 2144 +2704465 1 2109 +2578106 1 2108 +2713573 1 2087 +2057025 1 2084 +536441 1 2034 +2696063 1 2026 +1493872 1 2019 +477680 1 2009 +584 1 2009 +384 1 1995 +587753 1 1991 +1736 1 1991 +2268026 1 1978 +1076 1 1972 +1042156 1 1969 +84112 1 1968 +496057 1 1960 +238 1 1926 +847 1 1906 +1550579 1 1876 +2681766 1 1872 +1796616 1 1839 +250 1 1839 +363852 1 1834 +1411621 1 1833 +188932 1 1825 +192 1 1821 +2027860 1 1814 +54571 1 1807 +1484118 1 1795 +34085 1 1761 +797291 1 1747 +1265445 1 1740 +542 1 1729 +889453 1 1720 +996 1 1711 +2557994 1 1694 +259 1 1693 +1019 1 1689 +305 1 1685 +1352 1 1676 +1727164 1 1666 +907 1 1660 +2201271 1 1653 +87883 1 1639 +1257021 1 1629 +1406 1 1626 +254 1 1608 +446 1 1605 +661481 1 1599 +2175091 1 1598 +459526 1 1595 +986 1 1591 +2183896 1 1588 +287 1 1587 +1306519 1 1573 +1124835 1 1559 +1211326 1 1553 +2489595 1 1549 +2029983 1 1530 +83771 1 1511 +2502779 1 1498 +1834519 1 1489 +2694929 1 1488 +253239 1 1486 +388950 1 1481 +1010 1 1476 +1526658 1 1468 +185008 1 1467 +1385664 1 1450 +1492 1 1450 +371142 1 1445 +1617283 1 1445 +53442 1 1441 +2041044 1 1432 +561061 1 1413 +210 1 1410 +511 1 1407 +323450 1 1393 +1717717 1 1381 +1379909 1 1372 +38293 1 1371 +217203 1 1361 +1176587 1 1347 +644 1 1346 +1850093 1 1332 +2259595 1 1332 +2607656 1 1326 +2564099 1 1326 +158822 1 1314 +1396 1 1310 +651561 1 1307 +82633 1 1300 +582 1 1299 +1393 1 1293 +654 1 1286 +2321403 1 1283 +29549 1 1283 +300 1 1279 +398053 1 1278 +1986952 1 1266 +2676868 1 1257 +1850526 1 1250 +28095 1 1249 +1538644 1 1247 +1126833 1 1246 +2494373 1 1236 +28188 1 1234 +1316596 1 1234 +2502781 1 1229 +1229621 1 1228 +380021 1 1217 +511435 1 1213 +851 1 1212 +862126 1 1204 +496056 1 1197 +1813871 1 1184 +39778 1 1183 +1379910 1 1181 +1356852 1 1175 +2027857 1 1173 +1729720 1 1172 +959 1 1168 +1747 1 1166 +184914 1 1163 +1178516 1 1148 +2496028 1 1147 +28025 1 1145 +2545455 1 1142 +2172098 1 1137 +670 1 1132 +1679444 1 1126 +1834205 1 1125 +84110 1 1122 +2294119 1 1115 +2584940 1 1111 +756892 1 1107 +548 1 1100 +119219 1 1100 +1868325 1 1095 +317 1 1087 +106592 1 1084 +247 1 1081 +2583851 1 1076 +2507162 1 1073 +1398 1 1072 +1391654 1 1070 +2584122 1 1070 +179636 1 1064 +382 1 1058 +103855 1 1057 +712411 1 1044 +442 1 1043 +1484116 1 1042 +1678728 1 1034 +1307 1 1030 +197 1 1025 +112234 1 1022 +41977 1 1021 +2069432 1 1020 +615 1 1018 +2015076 1 1017 +2518177 1 1015 +1335613 1 1015 +93220 1 1014 +2694930 1 1014 +564064 1 1014 +2003121 1 1010 +492670 1 1009 +339 1 1004 +2126553 1 1002 +94132 1 1001 +2576891 1 1000 +121719 1 998 +2109692 1 994 +1644130 1 986 +1385663 1 985 +55508 1 984 +2698458 1 982 +649196 1 982 +630 1 978 +2602769 1 974 +946333 1 972 +1469502 1 966 +1761453 1 963 +189426 1 959 +2583377 1 958 +1234841 1 957 +1933220 1 955 +2144175 1 955 +1324352 1 936 +36745 1 936 +2319843 1 935 +2487118 1 934 +2094242 1 931 +2496866 1 930 +2686365 1 930 +2591634 1 929 +2605747 1 926 +1179672 1 925 +114 1 923 +2488560 1 919 +1870820 1 913 +1358 1 913 +470934 1 910 +313603 1 909 +1420917 1 905 +314275 1 905 +160799 1 905 +1331910 1 900 +1502 1 897 +1166950 1 895 +83655 1 891 +1450761 1 887 +1408 1 886 +44742 1 885 +1838286 1 880 +794903 1 879 +2058134 1 873 +980427 1 873 +1683 1 873 +2026785 1 872 +748247 1 871 +190893 1 870 +443144 1 869 +1351 1 868 +237610 1 867 +107401 1 867 +76731 1 866 +617123 1 865 +1453352 1 863 +28448 1 862 +1280 1 859 +106590 1 857 +2162713 1 856 +1141883 1 855 +1795355 1 853 +135487 1 853 +446679 1 849 +248903 1 847 +645 1 845 +292 1 843 +279058 1 842 +585 1 840 +553814 1 839 +1014 1 837 +1428 1 837 +78587 1 834 +2708117 1 834 +1379870 1 833 +672 1 830 +2003188 1 829 +2109685 1 823 +240427 1 821 +1307839 1 819 +1463165 1 818 +28026 1 818 +1355330 1 815 +285 1 812 +63186 1 812 +1305 1 810 +2615203 1 810 +584609 1 807 +1421 1 805 +2662364 1 804 +1534 1 802 +1774273 1 801 +571 1 798 +2547600 1 798 +2697043 1 793 +1913577 1 791 +516051 1 791 +75105 1 790 +878220 1 790 +1848904 1 789 +1585976 1 786 +84531 1 785 +481743 1 784 +2594004 1 782 +329 1 779 +34062 1 779 +190895 1 776 +2487064 1 773 +2608982 1 773 +2487063 1 773 +195 1 771 +1520 1 769 +1255 1 767 +1536772 1 766 +2583587 1 765 +2527978 1 764 +80869 1 763 +408 1 762 +321983 1 760 +52 1 759 +48296 1 758 +104268 1 754 +2478663 1 753 +1108595 1 751 +321984 1 742 +1903704 1 741 +28110 1 739 +438 1 738 +926 1 737 +80842 1 736 +1660 1 735 +1639 1 734 +84022 1 734 +2712698 1 732 +387093 1 730 +1768242 1 726 +1473112 1 725 +682998 1 723 +1302 1 723 +347 1 722 +676 1 721 +682956 1 721 +1637999 1 721 +2681984 1 720 +2201181 1 719 +1712675 1 718 +463014 1 718 +101571 1 716 +2711231 1 715 +96344 1 714 +1803846 1 714 +28108 1 713 +380 1 711 +1868589 1 711 +1481923 1 711 +856 1 710 +158899 1 709 +435897 1 709 +727 1 708 +2487065 1 708 +1492737 1 707 +2703789 1 707 +204042 1 707 +237609 1 705 +2233999 1 702 +1807691 1 701 +562959 1 701 +2231055 1 699 +1836467 1 699 +1492898 1 698 +1572656 1 698 +47917 1 697 +2565555 1 697 +1803865 1 697 +2591109 1 693 +76853 1 693 +1424294 1 692 +48 1 692 +2585771 1 692 +876 1 691 +2587862 1 691 +159 1 690 +1404 1 690 +1063 1 690 +2507160 1 690 +755731 1 689 +29343 1 689 +51515 1 687 +948519 1 685 +2282170 1 684 +192812 1 684 +38313 1 683 +1465 1 680 +1888 1 679 +28141 1 678 +871742 1 676 +2005884 1 675 +60847 1 674 +738 1 672 +28189 1 671 +2049305 1 671 +2528023 1 670 +1282 1 670 +1906741 1 670 +2027405 1 670 +123899 1 668 +2220095 1 668 +44251 1 667 +2704462 1 666 +1736674 1 665 +2496865 1 665 +2494375 1 665 +79880 1 665 +747 1 664 +1936081 1 663 +666 1 662 +1267423 1 662 +1756150 1 661 +947919 1 661 +82996 1 658 +84032 1 658 +888845 1 658 +158836 1 656 +221822 1 656 +616991 1 654 +433296 1 654 +47885 1 654 +1806508 1 653 +1784714 1 653 +379684 1 652 +470 1 652 +265959 1 652 +1779382 1 652 +40215 1 651 +1969 1 648 +2582419 1 647 +2516557 1 646 +2651334 1 644 +1134435 1 644 +2487072 1 643 +32002 1 643 +1842534 1 642 +564 1 641 +168471 1 641 +964 1 640 +1740262 1 639 +55518 1 638 +53408 1 637 +53346 1 635 +187327 1 634 +47763 1 632 +337 1 632 +217204 1 631 +404589 1 631 +1735038 1 631 +1636152 1 631 +2571746 1 631 +170679 1 627 +731 1 626 +1717 1 625 +1481663 1 624 +128780 1 623 +1784713 1 622 +56454 1 622 +85643 1 621 +795665 1 620 +2591633 1 620 +1644131 1 619 +979 1 618 +897 1 617 +29494 1 617 +1526571 1 617 +536 1 617 +43674 1 616 +1314 1 616 +1814290 1 615 +1940612 1 615 +45065 1 615 +320787 1 614 +79604 1 614 +2509675 1 613 +546 1 613 +347534 1 611 +1661 1 610 +2601898 1 610 +82987 1 608 +1616788 1 608 +2703788 1 607 +413882 1 606 +2058175 1 606 +1416806 1 605 +1136941 1 605 +207745 1 604 +1501 1 603 +66269 1 603 +223967 1 603 +2496266 1 601 +2583823 1 600 +2109914 1 599 +1622118 1 598 +1689 1 598 +502558 1 597 +33940 1 597 +935222 1 597 +1658665 1 597 +1823759 1 596 +2292766 1 595 +1716143 1 595 +29484 1 594 +2547394 1 593 +1561 1 592 +703 1 592 +1238 1 591 +1547445 1 591 +1936003 1 591 +531844 1 591 +673 1 590 +65058 1 590 +396 1 589 +1917485 1 589 +1613 1 589 +292913 1 587 +463040 1 586 +47715 1 585 +1702221 1 585 +1821621 1 585 +2109913 1 584 +463025 1 584 +1462996 1 584 +1416627 1 584 +1178778 1 584 +558152 1 583 +47880 1 582 +2654982 1 582 +375 1 581 +40214 1 581 +1457365 1 580 +36809 1 580 +1094 1 579 +2249356 1 578 +158500 1 577 +37332 1 576 +946334 1 576 +2704463 1 575 +1930273 1 575 +2478552 1 575 +1383885 1 574 +79263 1 573 +2528010 1 573 +487 1 573 +64104 1 572 +2545799 1 572 +1316593 1 572 +1328 1 570 +1891094 1 570 +2594003 1 570 +160453 1 570 +2498704 1 569 +246 1 569 +1331682 1 568 +443143 1 568 +1707785 1 568 +57975 1 567 +33069 1 567 +680 1 567 +1628392 1 566 +146919 1 566 +549 1 565 +1297742 1 565 +61647 1 564 +2590021 1 564 +69 1 564 +47883 1 564 +1619313 1 562 +13690 1 561 +190721 1 561 +702745 1 560 +1909395 1 559 +29341 1 558 +1716543 1 557 +571177 1 555 +1359 1 554 +2602015 1 554 +43 1 553 +1697053 1 553 +945844 1 552 +1579 1 551 +33050 1 551 +34 1 551 +1892855 1 551 +29466 1 549 +2707005 1 548 +588932 1 547 +2209 1 547 +1283 1 547 +1675686 1 545 +2575375 1 545 +37326 1 545 +674 1 545 +2570561 1 544 +2303331 1 544 +1891926 1 543 +160699 1 543 +714 1 543 +1286 1 542 +1679721 1 542 +2597701 1 541 +1842539 1 541 +1032623 1 540 +930166 1 539 +281093 1 539 +47671 1 539 +1705 1 538 +346 1 538 +2560057 1 537 +190148 1 535 +218284 1 535 +2698684 1 535 +264697 1 534 +198107 1 534 +2663009 1 533 +2507161 1 533 +189381 1 533 +576610 1 533 +1965282 1 532 +1978339 1 532 +1912 1 531 +1241978 1 529 +1842727 1 529 +1744 1 527 +1940610 1 525 +271865 1 525 +471827 1 524 +1909293 1 523 +1904640 1 523 +230089 1 522 +2183582 1 521 +1612173 1 521 +406548 1 520 +204 1 519 +1791 1 518 +669041 1 516 +35 1 515 +1531429 1 514 +57704 1 514 +683124 1 513 +364410 1 512 +1592106 1 511 +1334193 1 510 +54291 1 510 +1705617 1 510 +1792307 1 510 +68570 1 508 +324057 1 507 +1575 1 507 +2528026 1 506 +860235 1 506 +1537400 1 506 +877500 1 505 +180282 1 505 +29495 1 505 +2653857 1 505 +2697032 1 504 +1901 1 503 +1860122 1 503 +1432792 1 503 +414771 1 503 +314722 1 502 +2565558 1 502 +33059 1 501 +28172 1 501 +1763534 1 501 +1536771 1 501 +2507538 1 500 +2681549 1 500 +297 1 500 +2675298 1 500 +1763538 1 500 +1632864 1 500 +260554 1 499 +83656 1 499 +1927 1 497 +162209 1 497 +2320868 1 496 +1678129 1 496 +1703 1 494 +1871037 1 493 +1870819 1 492 +1921087 1 492 +666685 1 492 +1792508 1 490 +326522 1 490 +2213202 1 490 +2599640 1 489 +33959 1 488 +712991 1 487 +1410383 1 487 +353 1 486 +1218801 1 486 +2202148 1 485 +649831 1 484 +2291597 1 484 +1313 1 483 +321846 1 483 +437897 1 483 +1197717 1 483 +290335 1 482 +421525 1 481 +237258 1 481 +1405 1 480 +980251 1 480 +1896961 1 480 +2163011 1 479 +2527984 1 479 +930805 1 478 +221027 1 478 +1679497 1 478 +2592345 1 478 +716816 1 478 +39152 1 477 +1714849 1 477 +38300 1 475 +62322 1 475 +539 1 475 +1915 1 474 +1882682 1 474 +155077 1 474 +327575 1 473 +279113 1 473 +2608981 1 472 +1355477 1 472 +1340425 1 471 +46867 1 471 +1936080 1 471 +980254 1 471 +1719 1 470 +2233851 1 470 +40216 1 470 +40269 1 469 +454 1 468 +105219 1 467 +2183911 1 467 +2529032 1 466 +553 1 466 +29449 1 465 +2045208 1 465 +2021862 1 463 +1736675 1 463 +1452 1 463 +1478 1 462 +2593958 1 462 +1842533 1 462 +51665 1 462 +1727163 1 462 +656366 1 462 +260552 1 461 +55213 1 461 +82380 1 460 +1571470 1 460 +1758194 1 460 +2057741 1 460 +1296669 1 460 +650378 1 459 +342113 1 459 +2497861 1 459 +2018025 1 459 +1839801 1 459 +2602070 1 458 +76759 1 457 +1082851 1 457 +552 1 456 +37329 1 456 +712538 1 456 +2571748 1 456 +2705547 1 456 +1972068 1 455 +35554 1 454 +1423 1 454 +208962 1 454 +1571 1 454 +1397 1 453 +861 1 453 +1241981 1 453 +2656914 1 452 +1555112 1 451 +1598 1 450 +1748 1 450 +2126319 1 450 +713030 1 450 +1788 1 449 +726 1 449 +158847 1 448 +37928 1 448 +266779 1 446 +70775 1 445 +182710 1 445 +488447 1 445 +75588 1 445 +169760 1 443 +1938606 1 443 +401471 1 443 +436515 1 443 +1304 1 442 +1756988 1 442 +1426 1 441 +709810 1 440 +674703 1 440 +2014887 1 440 +1884263 1 440 +1652495 1 440 +67304 1 439 +82541 1 439 +1299 1 439 +332411 1 438 +2304594 1 438 +33074 1 438 +655015 1 437 +67780 1 437 +232537 1 437 +492 1 436 +2702 1 436 +614 1 436 +104087 1 436 +160660 1 436 +2028345 1 436 +1325107 1 435 +110937 1 435 +60890 1 435 +1853278 1 435 +507626 1 435 +1914471 1 435 +28450 1 434 +68895 1 434 +1476901 1 433 +61645 1 433 +379347 1 431 +61435 1 430 +927083 1 430 +266749 1 428 +137591 1 428 +1686310 1 428 +1590 1 428 +199 1 427 +2712222 1 427 +315405 1 426 +1416803 1 426 +1482074 1 425 +1830 1 425 +2571747 1 425 +53462 1 425 +1077946 1 424 +115808 1 424 +540747 1 424 +1340 1 424 +2213200 1 424 +1974213 1 423 +641491 1 422 +24 1 422 +2061 1 422 +1270 1 421 +2614693 1 421 +1308 1 420 +1725 1 420 +2173169 1 420 +28173 1 420 +169679 1 420 +1182571 1 419 +2527972 1 419 +2686366 1 419 +2072590 1 419 +1288 1 419 +1401 1 418 +1150389 1 418 +151783 1 418 +2605946 1 418 +861299 1 418 +2708301 1 418 +1420916 1 417 +665913 1 417 +1697043 1 417 +51669 1 416 +68214 1 416 +859 1 416 +1917421 1 416 +172713 1 416 +2014 1 415 +2507935 1 415 +2059672 1 415 +1843368 1 415 +1714860 1 415 +93219 1 414 +1387353 1 414 +173 1 414 +1309 1 414 +2565782 1 414 +356837 1 413 +196 1 413 +187493 1 413 +582702 1 412 +2220096 1 411 +2661553 1 411 +69666 1 411 +758793 1 409 +333962 1 409 +409322 1 409 +1303 1 409 +1219491 1 409 +298653 1 408 +1804986 1 408 +2590785 1 408 +2565781 1 408 +2572594 1 407 +844 1 407 +208223 1 407 +528244 1 407 +687 1 407 +652 1 407 +1335757 1 407 +1247 1 406 +580165 1 406 +1710 1 406 +83263 1 406 +61652 1 405 +94624 1 405 +82348 1 405 +1312183 1 405 +392734 1 405 +69964 1 404 +1790137 1 404 +2213194 1 403 +538381 1 403 +996801 1 403 +2662261 1 403 +366602 1 403 +519081 1 403 +2488809 1 403 +511062 1 402 +483547 1 402 +1987723 1 402 +1851544 1 401 +1823 1 401 +2662397 1 401 +1079 1 400 +93218 1 400 +2651974 1 399 +337191 1 399 +359370 1 399 +2599401 1 399 +1354 1 399 +1907578 1 399 +864828 1 398 +1842536 1 398 +2528018 1 398 +669 1 398 +2496837 1 398 +169430 1 396 +1176649 1 396 +279826 1 396 +270351 1 396 +274537 1 396 +1795631 1 395 +39691 1 395 +1407071 1 395 +79967 1 394 +758802 1 394 +748280 1 394 +312306 1 394 +2283194 1 394 +1752063 1 394 +46126 1 393 +2558361 1 393 +662548 1 393 +43657 1 392 +49319 1 392 +426117 1 392 +2211108 1 391 +1538553 1 391 +45972 1 391 +1855823 1 391 +455432 1 390 +2021234 1 390 +1310 1 390 +1867715 1 390 +652764 1 390 +2509614 1 389 +1938604 1 389 +67345 1 389 +1615909 1 388 +1851148 1 388 +587 1 388 +2587844 1 388 +879274 1 388 +1678028 1 387 +93221 1 387 +43770 1 387 +434009 1 386 +1670831 1 386 +434008 1 386 +74829 1 386 +675 1 385 +1241982 1 385 +2066070 1 385 +273384 1 385 +1513 1 384 +134537 1 384 +1702325 1 384 +2591463 1 383 +206506 1 383 +445710 1 383 +2580412 1 382 +1193713 1 381 +1915078 1 381 +330214 1 381 +1610 1 381 +1245 1 380 +1685010 1 380 +1536773 1 379 +2033033 1 379 +960 1 379 +480 1 379 +252514 1 378 +742013 1 378 +38289 1 377 +573737 1 377 +1149133 1 377 +1517 1 377 +1604 1 376 +55211 1 376 +1349819 1 375 +1249553 1 375 +39688 1 375 +2511069 1 374 +1884905 1 373 +2161821 1 373 +1855912 1 372 +2597769 1 372 +1721091 1 372 +107637 1 372 +266951 1 371 +29562 1 371 +108981 1 371 +1080068 1 370 +1761012 1 370 +1742359 1 370 +1250539 1 370 +484770 1 370 +1077935 1 370 +96942 1 369 +2590016 1 369 +39765 1 369 +2579935 1 368 +702967 1 367 +1564681 1 367 +57320 1 367 +2583243 1 366 +172045 1 366 +400770 1 366 +2341112 1 366 +2017483 1 365 +1404864 1 365 +93064 1 365 +68270 1 365 +65741 1 365 +2589080 1 365 +1411117 1 364 +2500547 1 363 +692036 1 362 +1806885 1 362 +2599297 1 362 +2562282 1 361 +2161816 1 361 +368607 1 361 +1505 1 360 +656519 1 360 +1543721 1 360 +2711156 1 359 +784 1 359 +1833852 1 359 +274 1 359 +2527962 1 358 +162 1 358 +245188 1 358 +1648923 1 357 +2528008 1 357 +1442136 1 357 +135719 1 357 +2010829 1 357 +103690 1 357 +374606 1 357 +95485 1 357 +35755 1 356 +2509456 1 356 +1714373 1 356 +106634 1 356 +298657 1 356 +453304 1 356 +114527 1 356 +450 1 356 +1836 1 356 +60552 1 355 +2579971 1 355 +1579979 1 355 +345632 1 355 +83683 1 354 +200451 1 354 +1587 1 354 +565033 1 354 +1911175 1 354 +486398 1 354 +204773 1 353 +655307 1 353 +1826607 1 353 +147645 1 352 +41899 1 352 +1327635 1 352 +718192 1 352 +103733 1 351 +2026199 1 351 +2010972 1 351 +2527995 1 351 +863372 1 351 +1391653 1 351 +985762 1 351 +1120045 1 350 +296591 1 350 +754409 1 350 +656179 1 349 +2686361 1 349 +1486034 1 349 +2082385 1 349 +2259340 1 349 +1930276 1 349 +1335048 1 348 +1853276 1 348 +1871025 1 348 +445709 1 347 +1926494 1 347 +1197460 1 346 +2599308 1 346 +358220 1 345 +495 1 345 +1712516 1 345 +2592655 1 344 +373994 1 343 +673372 1 343 +419475 1 343 +45668 1 343 +2528009 1 343 +2528021 1 343 +881260 1 343 +1332080 1 343 +1940 1 343 +1933880 1 342 +1275 1 342 +2528020 1 342 +1772 1 342 +2718 1 342 +1897729 1 342 +197614 1 342 +82985 1 341 +1542 1 341 +1954171 1 341 +1336794 1 341 +2293838 1 340 +1810504 1 340 +2109915 1 339 +39695 1 339 +304207 1 339 +685565 1 339 +283686 1 339 +392593 1 338 +2419771 1 338 +2420509 1 338 +2700081 1 338 +1379270 1 338 +2211140 1 337 +518 1 337 +2202254 1 337 +34004 1 337 +356322 1 337 +592316 1 336 +664426 1 336 +2594795 1 336 +94 1 336 +1751046 1 335 +947515 1 335 +2713414 1 334 +206 1 334 +1389004 1 334 +2080757 1 333 +1536769 1 333 +682798 1 332 +2591606 1 332 +2171623 1 332 +1016987 1 332 +1603606 1 332 +2070539 1 331 +29382 1 331 +2045 1 331 +115561 1 330 +320497 1 330 +2593973 1 329 +2067960 1 329 +2548456 1 329 +47421 1 328 +2011159 1 328 +640512 1 328 +119981 1 328 +1482 1 328 +1274631 1 327 +983548 1 327 +2594005 1 327 +1381081 1 327 +1608957 1 326 +1905730 1 326 +1581 1 326 +28087 1 326 +54006 1 326 +2756 1 326 +1229727 1 326 +521 1 325 +311230 1 325 +1624 1 325 +68175 1 325 +1763883 1 325 +2282475 1 325 +2026787 1 324 +735 1 324 +2699395 1 324 +2575699 1 324 +1986146 1 323 +2479546 1 323 +2419774 1 323 +2005462 1 323 +2304600 1 323 +1075 1 323 +39777 1 323 +651 1 322 +2107713 1 322 +2026189 1 322 +2518599 1 321 +1930593 1 321 +1858609 1 321 +2527968 1 320 +1720344 1 320 +311180 1 319 +2126346 1 319 +1583100 1 319 +255247 1 319 +2054 1 318 +2068654 1 318 +2478662 1 318 +137545 1 318 +2678688 1 317 +171865 1 317 +2605945 1 317 +1536774 1 316 +93466 1 316 +1670830 1 316 +1235591 1 316 +2509458 1 316 +46680 1 315 +1930557 1 315 +1590041 1 315 +2055 1 315 +2607911 1 315 +2268451 1 315 +2282309 1 315 +2496847 1 315 +1850238 1 314 +349221 1 314 +2377 1 314 +665914 1 314 +656178 1 314 +51101 1 314 +2596890 1 314 +696485 1 314 +1715989 1 313 +1504 1 313 +39692 1 313 +529 1 312 +2697030 1 312 +73010 1 312 +1640 1 312 +2489213 1 311 +1178515 1 311 +1390395 1 311 +1765049 1 310 +45398 1 310 +53336 1 310 +913107 1 310 +2683272 1 309 +254247 1 309 +28037 1 309 +35841 1 309 +373 1 309 +67267 1 309 +2498135 1 308 +2604047 1 308 +2604832 1 308 +1325115 1 308 +451 1 308 +48664 1 308 +1885 1 307 +1841249 1 306 +2303538 1 306 +2057808 1 306 +2493639 1 306 +2528035 1 306 +1960 1 305 +129921 1 305 +628 1 305 +2527982 1 305 +28898 1 305 +1805933 1 305 +1208324 1 305 +565 1 304 +699433 1 304 +1812935 1 304 +1710541 1 304 +54736 1 304 +1570 1 303 +2116657 1 303 +253703 1 303 +76761 1 303 +61648 1 302 +1389192 1 302 +1510150 1 302 +1337936 1 301 +1893 1 301 +2572577 1 301 +2072025 1 301 +475662 1 301 +1230341 1 300 +1889813 1 300 +96345 1 300 +1980001 1 300 +1338368 1 300 +2082386 1 300 +496014 1 300 +260364 1 299 +418699 1 299 +13689 1 299 +1307761 1 299 +594679 1 299 +2497863 1 298 +29391 1 298 +585455 1 298 +53409 1 298 +2382161 1 298 +2598579 1 298 +33011 1 298 +1458426 1 298 +122 1 298 +376489 1 297 +2267264 1 297 +2691571 1 297 +1525 1 297 +477641 1 296 +561879 1 296 +407035 1 296 +2518645 1 296 +219572 1 295 +158823 1 295 +298654 1 295 +1431246 1 295 +1072463 1 295 +1528099 1 295 +636 1 294 +471223 1 294 +1663591 1 294 +225848 1 294 +1509 1 294 +1495144 1 294 +75985 1 293 +1582259 1 293 +2528642 1 293 +1336795 1 293 +2202825 1 293 +33945 1 293 +717785 1 292 +36818 1 292 +28035 1 292 +1908 1 292 +111015 1 292 +1771309 1 292 +1315974 1 291 +1750 1 291 +2015316 1 291 +68202 1 291 +1617448 1 291 +42253 1 291 +2161747 1 290 +29379 1 290 +1515612 1 290 +317577 1 290 +465721 1 289 +82367 1 289 +408139 1 289 +267363 1 288 +1905 1 288 +42197 1 288 +288000 1 288 +157783 1 287 +1138822 1 287 +1536770 1 287 +1357916 1 287 +296842 1 287 +35814 1 287 +318683 1 287 +1246 1 287 +2527990 1 287 +482957 1 287 +420953 1 287 +2025949 1 286 +156978 1 286 +1789224 1 286 +283734 1 286 +627192 1 286 +2666100 1 285 +1597 1 285 +2664893 1 284 +1824 1 284 +1620215 1 284 +1752064 1 284 +187452 1 283 +1581011 1 283 +2674991 1 283 +1653480 1 283 +1750719 1 283 +2528007 1 283 +319236 1 283 +2547601 1 283 +1591409 1 283 +2602016 1 282 +556054 1 282 +139 1 282 +1047172 1 282 +1883414 1 282 +2024580 1 281 +1586287 1 281 +1891675 1 281 +46127 1 281 +2492396 1 281 +1883416 1 280 +2564040 1 280 +1618 1 279 +299767 1 279 +2598453 1 279 +1529068 1 278 +1158459 1 278 +1850374 1 278 +1334 1 278 +2006110 1 278 +76758 1 278 +135740 1 277 +1560345 1 277 +2057246 1 277 +408015 1 277 +195105 1 277 +69373 1 276 +1609966 1 276 +71997 1 276 +2033869 1 276 +2169583 1 276 +2058136 1 275 +1616117 1 275 +37919 1 275 +78327 1 275 +944547 1 275 +29486 1 275 +1434191 1 275 +2587850 1 274 +2082188 1 274 +2662028 1 274 +60548 1 274 +1768108 1 273 +2170729 1 273 +553239 1 273 +2211211 1 273 +546367 1 273 +1286180 1 273 +947516 1 273 +126385 1 273 +1778678 1 272 +639310 1 272 +1930071 1 272 +54005 1 272 +1767 1 271 +35806 1 271 +1495638 1 271 +1096243 1 271 +1348774 1 271 +180957 1 271 +2562284 1 270 +2080742 1 270 +451876 1 270 +1842540 1 270 +29552 1 270 +1261031 1 270 +1050174 1 270 +2047 1 270 +1582 1 269 +2202828 1 269 +2712223 1 269 +1945512 1 269 +2789776 1 269 +387092 1 269 +413503 1 269 +453783 1 269 +505249 1 269 +263819 1 269 +1348 1 268 +28264 1 268 +876364 1 268 +2026 1 268 +1968541 1 268 +1260 1 268 +1740163 1 268 +2371 1 268 +449461 1 268 +2005469 1 268 +1783515 1 267 +47878 1 267 +1842537 1 267 +2594269 1 267 +2048283 1 267 +2487074 1 266 +1179669 1 266 +2057800 1 266 +1325095 1 266 +1811807 1 266 +340345 1 265 +82688 1 265 +1336806 1 265 +2259620 1 265 +424800 1 265 +2172103 1 265 +116090 1 265 +96773 1 265 +2094025 1 264 +56455 1 264 +173366 1 264 +2560053 1 264 +1552759 1 264 +1664069 1 264 +157687 1 263 +2587861 1 263 +434010 1 263 +2490858 1 263 +174633 1 262 +1404768 1 262 +1223566 1 262 +84096 1 262 +2293862 1 262 +1634516 1 261 +2169540 1 261 +1759059 1 261 +244566 1 261 +2496836 1 261 +2172099 1 261 +1783501 1 261 +69395 1 261 +454601 1 261 +2033032 1 261 +553510 1 260 +386891 1 260 +59814 1 260 +294699 1 260 +1967 1 259 +553151 1 259 +241368 1 259 +29575 1 259 +2594265 1 259 +196162 1 258 +2572036 1 258 +93222 1 257 +665099 1 257 +218208 1 257 +698828 1 257 +1173027 1 257 +155892 1 256 +2067957 1 256 +2576307 1 255 +1497 1 255 +52773 1 255 +282217 1 255 +53345 1 255 +2567934 1 254 +2099789 1 254 +2527975 1 254 +1214604 1 254 +664962 1 254 +1402 1 254 +1078471 1 254 +68209 1 253 +2079575 1 253 +2080419 1 253 +257708 1 253 +2683284 1 253 +1763363 1 253 +2268087 1 253 +732 1 253 +2501295 1 252 +2587855 1 252 +1335 1 252 +66871 1 252 +39645 1 252 +914153 1 252 +859143 1 252 +1904616 1 251 +157692 1 251 +362257 1 251 +686597 1 251 +530584 1 251 +1580 1 251 +2017482 1 251 +2282738 1 251 +2071621 1 250 +251701 1 250 +247481 1 250 +1511 1 250 +2211210 1 250 +2571750 1 249 +2490851 1 249 +2051957 1 249 +1038856 1 249 +1932 1 248 +293387 1 248 +2074 1 248 +2672569 1 248 +2174846 1 248 +1938605 1 248 +543877 1 248 +1030157 1 247 +120683 1 247 +1737490 1 247 +2676077 1 247 +1286181 1 247 +201 1 247 +1769779 1 247 +1638 1 246 +680279 1 246 +225992 1 246 +216929 1 246 +1089444 1 246 +1642 1 246 +375286 1 246 +1694 1 246 +1827469 1 245 +2060726 1 245 +29542 1 245 +47490 1 245 +2587846 1 245 +28031 1 245 +670052 1 244 +290400 1 244 +55601 1 244 +1549858 1 244 +1031538 1 243 +39950 1 243 +702113 1 243 +1631871 1 243 +2060312 1 243 +486 1 242 +33033 1 242 +74033 1 242 +313598 1 242 +1938607 1 242 +2483799 1 241 +618 1 241 +1249552 1 241 +2498451 1 241 +75385 1 241 +162496 1 240 +708131 1 240 +1714848 1 240 +2652443 1 240 +691 1 240 +2511995 1 240 +2653200 1 240 +1054217 1 240 +571913 1 240 +1718 1 240 +1605891 1 239 +187491 1 238 +1879049 1 238 +1471761 1 238 +2686304 1 238 +134601 1 238 +1829 1 238 +1458355 1 238 +1302308 1 238 +1852373 1 238 +2009329 1 238 +43662 1 238 +2663022 1 238 +1641402 1 238 +1609758 1 238 +454662 1 237 +1850246 1 237 +212767 1 237 +2018065 1 237 +1486262 1 236 +1363 1 236 +1539298 1 236 +299262 1 236 +43767 1 236 +2493669 1 235 +1940762 1 235 +1940790 1 235 +254246 1 234 +1708 1 234 +755307 1 234 +1447062 1 234 +2675218 1 234 +171281 1 234 +68173 1 234 +1891644 1 234 +1978566 1 233 +134534 1 233 +1111760 1 232 +208439 1 232 +2587163 1 232 +262324 1 232 +33899 1 232 +266 1 232 +1182568 1 231 +61646 1 231 +2561924 1 231 +1458425 1 231 +263 1 231 +66821 1 230 +2589797 1 230 +56453 1 230 +2571749 1 230 +2562449 1 230 +2006 1 230 +1580596 1 230 +314282 1 229 +351671 1 229 +488729 1 229 +182773 1 228 +1376 1 228 +285570 1 228 +689 1 228 +1903686 1 228 +2653932 1 227 +395961 1 227 +261292 1 227 +2483798 1 227 +2596828 1 227 +1331258 1 227 +1827580 1 227 +1535768 1 227 +2599293 1 227 +1244531 1 227 +2558360 1 227 +2587865 1 227 +242600 1 227 +2587852 1 227 +1658672 1 226 +576611 1 226 +56460 1 226 +1758689 1 226 +1588 1 226 +37372 1 226 +29430 1 226 +66425 1 225 +2675219 1 225 +2546351 1 225 +93378 1 225 +2201350 1 225 +1704499 1 225 +1325564 1 224 +63 1 224 +1519 1 224 +2041 1 224 +28197 1 224 +1705310 1 224 +1677857 1 224 +712357 1 224 +200991 1 224 +153496 1 224 +1516059 1 224 +1795630 1 224 +2662361 1 223 +104336 1 223 +1548548 1 223 +414778 1 223 +1137606 1 223 +2610902 1 222 +571256 1 222 +1248727 1 222 +1920749 1 222 +2686363 1 222 +76258 1 222 +43990 1 222 +1670641 1 222 +2042057 1 222 +759620 1 221 +1069220 1 221 +48074 1 221 +904291 1 221 +317936 1 221 +1210884 1 221 +1390 1 221 +40576 1 221 +2234087 1 221 +571915 1 221 +152682 1 221 +152480 1 220 +2692425 1 220 +88688 1 220 +2610896 1 220 +391905 1 220 +1216932 1 220 +1884907 1 220 +1879050 1 219 +191292 1 219 +2173034 1 219 +2686094 1 219 +1601 1 219 +2210 1 219 +1766 1 219 +47716 1 219 +1932360 1 219 +2495582 1 219 +269673 1 218 +28084 1 218 +751944 1 218 +1470176 1 218 +141451 1 218 +658629 1 218 +1348114 1 218 +1581680 1 218 +199441 1 218 +28894 1 217 +1583 1 217 +663364 1 217 +1799 1 217 +1784719 1 217 +122355 1 217 +1945657 1 217 +2500532 1 217 +1333996 1 217 +1907 1 217 +2184053 1 216 +55583 1 216 +712122 1 216 +47850 1 216 +1630135 1 216 +1960083 1 216 +657387 1 216 +36813 1 215 +194963 1 215 +1338 1 215 +31958 1 214 +1109412 1 214 +1072256 1 214 +56458 1 214 +2603292 1 214 +33936 1 214 +2303332 1 214 +120107 1 213 +2600320 1 213 +1028416 1 213 +38301 1 213 +29447 1 213 +2496265 1 213 +2045452 1 213 +359 1 213 +2579977 1 213 +1389713 1 213 +262209 1 213 +109328 1 212 +29385 1 212 +1241979 1 212 +1548 1 212 +33905 1 212 +2582905 1 212 +1775 1 212 +588 1 212 +1523415 1 212 +1914461 1 211 +2358187 1 211 +92401 1 211 +37482 1 211 +1402861 1 211 +2202827 1 210 +2382163 1 210 +2493672 1 210 +2602067 1 209 +1632865 1 209 +2049881 1 209 +2601677 1 209 +1061 1 209 +2070347 1 209 +1503054 1 209 +86106 1 209 +2058625 1 209 +1134687 1 209 +2047966 1 208 +577 1 208 +1324350 1 208 +563 1 208 +1402135 1 208 +351679 1 208 +60481 1 208 +111781 1 208 +481146 1 208 +1678128 1 208 +298596 1 208 +1112 1 207 +81479 1 207 +877455 1 207 +119858 1 207 +28198 1 207 +240521 1 207 +44577 1 207 +1930275 1 207 +157779 1 207 +1658671 1 206 +1911587 1 206 +1444770 1 206 +79883 1 206 +1852377 1 206 +827 1 206 +1904441 1 205 +2419842 1 205 +2052837 1 205 +715 1 205 +1670800 1 205 +1763535 1 205 +1886 1 205 +363835 1 205 +2162 1 205 +139021 1 204 +222136 1 204 +106648 1 204 +1488 1 204 +308754 1 204 +160808 1 204 +182337 1 204 +1341 1 204 +1653831 1 203 +402297 1 203 +2607663 1 203 +1850250 1 203 +1749 1 203 +456327 1 203 +1396826 1 203 +246432 1 203 +47960 1 203 +244734 1 203 +1188319 1 203 +693444 1 202 +80852 1 202 +1941349 1 202 +375175 1 202 +2202826 1 202 +2004647 1 202 +1437453 1 202 +1808979 1 202 +582419 1 202 +1330330 1 202 +69539 1 201 +76832 1 201 +241244 1 201 +2020486 1 201 +930806 1 201 +2704467 1 201 +1307763 1 201 +645517 1 201 +2597659 1 201 +243899 1 200 +2560010 1 200 +2136401 1 200 +200 1 200 +342950 1 200 +1357915 1 200 +1589 1 200 +2003370 1 200 +72361 1 200 +399497 1 200 diff --git a/data/hmp_templates/gut/SRS105082-sim.yml b/data/hmp_templates/gut/SRS105082-sim.yml new file mode 100644 index 0000000..390737c --- /dev/null +++ b/data/hmp_templates/gut/SRS105082-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS105082-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS105082-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 14 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: False +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' diff --git a/data/hmp_templates/gut/SRS147088-sim.tsv b/data/hmp_templates/gut/SRS147088-sim.tsv new file mode 100644 index 0000000..80df8ed --- /dev/null +++ b/data/hmp_templates/gut/SRS147088-sim.tsv @@ -0,0 +1,227 @@ +TaxonomyInput nb_genomes Reads +1405 1 577345 +28116 1 501076 +28132 1 358855 +39778 1 260731 +145261 1 202977 +820 1 165607 +39777 1 94901 +488 1 93056 +246787 1 81882 +29466 1 79975 +729 1 61360 +821 1 57751 +371601 1 54708 +2610896 1 51949 +28449 1 50478 +1304 1 39916 +732 1 39709 +2528203 1 38069 +1177574 1 33830 +817 1 29707 +818 1 29376 +244366 1 27270 +2047 1 23242 +1660 1 23122 +43675 1 22501 +1796613 1 22111 +2663009 1 22071 +47678 1 21306 +199 1 20994 +357276 1 19951 +1305 1 19418 +1303 1 18558 +114527 1 16843 +495 1 16213 +712357 1 16147 +2109688 1 15145 +1017 1 14531 +712633 1 14138 +43768 1 14116 +484 1 13441 +550 1 13016 +823 1 11353 +1302 1 10943 +487 1 10817 +84135 1 10332 +28131 1 9721 +617123 1 9283 +1019 1 9228 +329854 1 7945 +589873 1 7239 +853 1 6671 +172042 1 6466 +544580 1 5015 +113107 1 5008 +860 1 4984 +33025 1 4733 +2093856 1 4594 +1655 1 4467 +28129 1 4410 +1335 1 4283 +851 1 4187 +95486 1 3904 +292800 1 3786 +28037 1 3579 +78535 1 3507 +28113 1 3474 +486 1 3278 +2109691 1 3220 +248315 1 3182 +539 1 2844 +1839799 1 2814 +1550024 1 2728 +1852377 1 2694 +2545799 1 2676 +1750 1 2672 +483 1 2550 +1313 1 2540 +109328 1 2415 +327575 1 2392 +28135 1 2322 +301301 1 2296 +28119 1 2233 +45634 1 2154 +1656 1 2144 +485 1 2111 +2382163 1 2106 +726 1 2038 +76123 1 1978 +712710 1 1887 +1583098 1 1886 +727 1 1877 +554406 1 1806 +2708117 1 1757 +47671 1 1725 +28118 1 1681 +1156433 1 1679 +68570 1 1529 +655307 1 1500 +739 1 1495 +1338 1 1472 +1526658 1 1429 +573 1 1330 +2598453 1 1315 +157687 1 1305 +1156431 1 1292 +489 1 1288 +52773 1 1275 +204 1 1252 +837 1 1224 +157688 1 1117 +33038 1 1097 +2572088 1 1075 +249188 1 1017 +2479767 1 991 +1496 1 971 +1316596 1 967 +216816 1 935 +1308 1 934 +1379 1 902 +1343 1 892 +29391 1 890 +2718 1 817 +649756 1 812 +39488 1 810 +2109690 1 784 +1307 1 773 +2585119 1 773 +208479 1 772 +2585118 1 764 +120683 1 759 +1433513 1 726 +504 1 703 +2025876 1 672 +1491 1 638 +713059 1 625 +1639133 1 593 +2086584 1 583 +1796646 1 583 +714 1 551 +1539298 1 543 +1328 1 538 +2662363 1 526 +1853278 1 515 +1912897 1 513 +712310 1 511 +157692 1 509 +29347 1 500 +33035 1 494 +1642647 1 483 +1834196 1 457 +2610895 1 449 +2093857 1 447 +2528037 1 446 +713030 1 421 +1871021 1 415 +824 1 414 +1352 1 412 +492 1 408 +33033 1 407 +562 1 400 +712991 1 399 +84030 1 399 +712538 1 398 +2666100 1 394 +2133944 1 379 +2606626 1 377 +239935 1 375 +326522 1 371 +1884263 1 364 +1316593 1 362 +185007 1 358 +1358 1 353 +1042156 1 352 +157691 1 350 +79329 1 347 +197 1 341 +1705617 1 332 +1351 1 329 +84112 1 325 +747 1 324 +2576606 1 324 +1853276 1 323 +2093742 1 322 +1796616 1 318 +1492 1 313 +1642646 1 308 +393921 1 306 +2584943 1 293 +1613 1 293 +326523 1 289 +2081702 1 287 +253 1 287 +76860 1 287 +493 1 269 +1297617 1 264 +1785996 1 262 +1314 1 261 +1340 1 256 +542 1 254 +36874 1 247 +1280 1 247 +221027 1 246 +1562970 1 245 +2696063 1 244 +45243 1 243 +28901 1 242 +1309 1 238 +1639 1 237 +1747 1 237 +1834207 1 235 +75985 1 233 +712122 1 233 +640030 1 231 +626937 1 222 +185008 1 216 +34085 1 215 +28091 1 214 +238 1 213 +1117645 1 213 +1624 1 212 +2109687 1 208 +57706 1 207 +1428 1 207 +1396 1 205 +1311 1 203 +1905 1 202 diff --git a/data/hmp_templates/gut/SRS147088-sim.yml b/data/hmp_templates/gut/SRS147088-sim.yml new file mode 100644 index 0000000..62c023e --- /dev/null +++ b/data/hmp_templates/gut/SRS147088-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS147088-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS147088-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 20 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS014475-sim.tsv b/data/hmp_templates/throat/SRS014475-sim.tsv new file mode 100644 index 0000000..2ee983e --- /dev/null +++ b/data/hmp_templates/throat/SRS014475-sim.tsv @@ -0,0 +1,72 @@ +TaxonomyInput nb_genomes Reads +487 1 183496 +727 1 88104 +729 1 15020 +28132 1 7641 +39778 1 6955 +1660 1 6904 +28037 1 5683 +2610896 1 5257 +712633 1 4542 +43675 1 4254 +39777 1 4247 +1177574 1 3514 +28449 1 3396 +589873 1 3244 +1304 1 3168 +2663009 1 3003 +1303 1 2499 +84135 1 2406 +28131 1 2330 +199 1 2314 +29466 1 2228 +486 1 2026 +1313 1 1942 +157687 1 1813 +655307 1 1615 +114527 1 1423 +43990 1 1409 +1379 1 1166 +485 1 1156 +484 1 1018 +113107 1 978 +483 1 906 +1433513 1 884 +495 1 832 +2047 1 806 +2598453 1 725 +1305 1 675 +28135 1 637 +617123 1 635 +489 1 635 +726 1 619 +78535 1 606 +52773 1 590 +851 1 589 +95486 1 582 +172042 1 574 +712357 1 537 +860 1 507 +488 1 507 +248315 1 504 +28129 1 492 +1302 1 446 +1852377 1 432 +45634 1 422 +33033 1 402 +539 1 392 +1747 1 380 +43768 1 353 +2382163 1 327 +712368 1 311 +735 1 295 +732 1 289 +249188 1 277 +40215 1 263 +29391 1 259 +712710 1 246 +1335 1 237 +712310 1 230 +1156431 1 226 +1839799 1 218 +1156433 1 216 diff --git a/data/hmp_templates/throat/SRS014475-sim.yml b/data/hmp_templates/throat/SRS014475-sim.yml new file mode 100644 index 0000000..e2432ca --- /dev/null +++ b/data/hmp_templates/throat/SRS014475-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS014475-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS014475-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 2 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS014689-sim.tsv b/data/hmp_templates/throat/SRS014689-sim.tsv new file mode 100644 index 0000000..81e300b --- /dev/null +++ b/data/hmp_templates/throat/SRS014689-sim.tsv @@ -0,0 +1,345 @@ +TaxonomyInput nb_genomes Reads +39778 1 888462 +28132 1 720463 +39777 1 516461 +1660 1 412854 +1177574 1 391624 +2663009 1 272429 +199 1 256635 +1304 1 227466 +29466 1 164345 +712633 1 141582 +2610896 1 132794 +28131 1 121362 +43675 1 109258 +860 1 101109 +114527 1 70255 +28037 1 52214 +1303 1 46609 +1852377 1 34681 +28135 1 34549 +729 1 33842 +84135 1 27456 +52773 1 25853 +851 1 23208 +1313 1 20308 +28129 1 20141 +1839799 1 17631 +113107 1 16979 +617123 1 15772 +76123 1 11464 +2598453 1 11198 +248315 1 10794 +1433513 1 9469 +1302 1 9185 +78535 1 8237 +157687 1 8196 +712538 1 8174 +1335 1 7862 +712357 1 6969 +1583098 1 6881 +713030 1 6698 +1379 1 6690 +2382163 1 6355 +1747 1 6247 +1308 1 5389 +2047 1 5114 +1156433 1 4377 +1156431 1 3977 +1305 1 3974 +29391 1 3819 +713059 1 3641 +28113 1 3295 +837 1 3134 +1343 1 3099 +1307 1 3077 +45634 1 2953 +33033 1 2827 +2144175 1 2766 +907 1 2651 +204 1 2644 +1328 1 2444 +2133944 1 2434 +1019 1 2257 +1338 1 2214 +1667168 1 2138 +712710 1 2138 +2572088 1 1949 +817 1 1858 +589873 1 1836 +726 1 1765 +727 1 1608 +712991 1 1586 +1017 1 1470 +1491 1 1398 +357276 1 1247 +28119 1 1185 +1884263 1 1179 +712310 1 1172 +43768 1 1161 +1661 1 1153 +853 1 1040 +1496 1 1019 +111015 1 1016 +1314 1 1007 +52774 1 983 +109328 1 980 +120683 1 948 +1309 1 945 +554406 1 938 +1351 1 909 +1659 1 889 +2708117 1 831 +84030 1 829 +859 1 820 +2606626 1 791 +1624 1 791 +1311 1 790 +39950 1 783 +34105 1 764 +824 1 753 +33035 1 720 +249188 1 712 +2109691 1 702 +185007 1 697 +2081702 1 693 +644007 1 682 +1334 1 675 +818 1 672 +1785996 1 671 +1358 1 658 +28188 1 647 +1656 1 644 +28264 1 625 +147802 1 618 +29347 1 608 +95486 1 604 +2576606 1 603 +1340 1 601 +44008 1 595 +1352 1 589 +172042 1 588 +821 1 576 +1539298 1 574 +1834196 1 569 +1639 1 567 +157691 1 558 +732 1 542 +52771 1 538 +87541 1 529 +1492 1 519 +157688 1 508 +544580 1 505 +2079536 1 502 +1042156 1 495 +2479767 1 494 +1280 1 492 +1428 1 487 +712122 1 486 +1502 1 479 +28116 1 479 +185008 1 478 +82348 1 477 +1655 1 474 +856 1 474 +33905 1 472 +1888195 1 470 +400065 1 469 +208479 1 463 +2057798 1 462 +562 1 462 +301301 1 458 +2560010 1 452 +39488 1 443 +2057800 1 442 +649756 1 439 +315405 1 438 +33040 1 432 +2545799 1 431 +28118 1 430 +2109690 1 429 +556499 1 426 +327575 1 419 +28901 1 415 +33945 1 415 +216816 1 410 +1598 1 404 +1310 1 402 +1744 1 401 +33038 1 399 +1825069 1 394 +303 1 393 +1903686 1 379 +2021971 1 378 +1348 1 378 +2702 1 372 +158847 1 370 +53346 1 368 +1349 1 368 +437897 1 362 +1851395 1 361 +2748 1 361 +1912795 1 360 +1346 1 360 +84112 1 359 +1260 1 353 +197 1 352 +1520 1 348 +210 1 345 +1960083 1 344 +74426 1 342 +1396 1 338 +712368 1 338 +1871021 1 338 +1703 1 337 +1354 1 335 +2093742 1 335 +1421 1 335 +64104 1 333 +1599 1 333 +1750 1 333 +1796646 1 331 +1855823 1 329 +246787 1 324 +2696063 1 318 +1465 1 313 +1282 1 312 +2718 1 312 +519472 1 311 +712411 1 311 +2751 1 310 +1501 1 308 +1316593 1 307 +393921 1 307 +1912897 1 301 +2571750 1 300 +1814128 1 299 +1329 1 298 +747 1 297 +149016 1 295 +2562451 1 295 +253 1 293 +1679721 1 293 +1705617 1 293 +739 1 292 +2081703 1 292 +1255 1 292 +54571 1 290 +89152 1 289 +2086584 1 287 +329854 1 285 +1796616 1 285 +656366 1 284 +820 1 283 +861 1 281 +1286 1 281 +1717 1 280 +1870984 1 278 +1404 1 278 +1423 1 275 +34085 1 273 +1406 1 272 +1534 1 272 +1571 1 272 +2420310 1 271 +1326 1 270 +47678 1 270 +76860 1 269 +1648 1 269 +36874 1 267 +46867 1 267 +2756 1 266 +33959 1 265 +1911586 1 265 +51669 1 264 +316 1 263 +157692 1 262 +1834207 1 260 +2051 1 259 +1736 1 258 +1871034 1 258 +1117645 1 256 +33010 1 256 +1282737 1 255 +2005703 1 254 +1316596 1 254 +1229621 1 254 +162 1 253 +1482 1 252 +53345 1 251 +294 1 251 +119206 1 251 +2564099 1 249 +731 1 248 +169679 1 247 +76853 1 247 +36745 1 246 +580165 1 246 +1397 1 245 +1590 1 244 +51665 1 244 +2364787 1 244 +1561 1 242 +1542 1 242 +287 1 241 +1270 1 241 +1076 1 241 +273384 1 241 +79329 1 240 +40324 1 240 +708126 1 237 +33970 1 237 +1681 1 236 +1505 1 236 +1630135 1 235 +1366 1 235 +2320868 1 234 +1785995 1 233 +47715 1 233 +1796613 1 233 +2582419 1 231 +2496265 1 231 +1685 1 230 +1363 1 230 +54005 1 230 +28038 1 228 +646413 1 226 +1393 1 225 +2572087 1 224 +1405 1 224 +2496867 1 223 +1712675 1 223 +237258 1 222 +1288 1 222 +200 1 221 +1564681 1 220 +1408 1 220 +1398 1 218 +1613 1 216 +738 1 216 +2161821 1 216 +1548 1 216 +43770 1 215 +417368 1 215 +1359 1 213 +1748 1 213 +2585118 1 212 +43674 1 212 +1245 1 211 +56 1 210 +1376 1 209 +1160721 1 209 +2697030 1 209 +2495645 1 208 +2589797 1 208 +47671 1 206 +239935 1 206 +996 1 205 +1852374 1 205 +2093856 1 204 +670 1 203 +2377 1 203 +584 1 202 +1513 1 201 +484770 1 201 +1642647 1 201 diff --git a/data/hmp_templates/throat/SRS014689-sim.yml b/data/hmp_templates/throat/SRS014689-sim.yml new file mode 100644 index 0000000..80be485 --- /dev/null +++ b/data/hmp_templates/throat/SRS014689-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS014689-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS014689-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 3 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS065335-sim.tsv b/data/hmp_templates/throat/SRS065335-sim.tsv new file mode 100644 index 0000000..4d4b195 --- /dev/null +++ b/data/hmp_templates/throat/SRS065335-sim.tsv @@ -0,0 +1,150 @@ +TaxonomyInput nb_genomes Reads +199 1 192015 +28132 1 166398 +28449 1 152986 +729 1 137771 +2663009 1 127771 +1177574 1 112863 +39777 1 83177 +157687 1 80753 +28131 1 59142 +39778 1 47565 +484 1 40427 +712633 1 36973 +860 1 26455 +712538 1 24642 +1304 1 21928 +713030 1 18449 +1019 1 18293 +1017 1 18033 +29466 1 16119 +43675 1 14898 +327575 1 12871 +2545799 1 12824 +249188 1 12660 +487 1 11500 +84135 1 11238 +851 1 8693 +1660 1 7735 +712357 1 7702 +617123 1 7434 +2610896 1 7114 +1280 1 7077 +28135 1 6870 +28037 1 6513 +28129 1 6457 +589873 1 5635 +726 1 4761 +1303 1 4547 +486 1 4404 +157691 1 4311 +495 1 4221 +727 1 4180 +76123 1 3889 +488 1 3441 +2708117 1 3432 +1313 1 3297 +837 1 3195 +483 1 3090 +95486 1 2776 +204 1 2525 +712368 1 2501 +113107 1 2491 +1583098 1 2461 +1884263 1 2216 +109328 1 2214 +712710 1 2208 +712310 1 2124 +1852377 1 2048 +732 1 2020 +2598453 1 1982 +248315 1 1834 +1316596 1 1763 +735 1 1697 +1785996 1 1678 +1335 1 1636 +485 1 1603 +1433513 1 1204 +1379 1 1190 +489 1 1174 +114527 1 1168 +157688 1 1084 +817 1 1068 +1539298 1 1058 +1302 1 1040 +120683 1 973 +78535 1 903 +1839799 1 888 +739 1 812 +713059 1 739 +1305 1 724 +554406 1 708 +2382163 1 701 +539 1 664 +28113 1 664 +29391 1 652 +2133944 1 645 +1491 1 594 +1705617 1 572 +52773 1 568 +28119 1 567 +1328 1 556 +1316593 1 556 +36874 1 554 +818 1 546 +357276 1 527 +821 1 514 +1308 1 506 +492670 1 502 +859 1 460 +853 1 440 +45634 1 440 +1244531 1 436 +747 1 421 +1156433 1 416 +1496 1 404 +33033 1 394 +393921 1 374 +731 1 372 +1156431 1 369 +1307 1 355 +712991 1 347 +2606626 1 342 +2572088 1 339 +45243 1 334 +1343 1 323 +172042 1 323 +738 1 314 +75985 1 311 +34085 1 305 +2576606 1 303 +189834 1 290 +28118 1 287 +157692 1 285 +1340 1 278 +28116 1 272 +1338 1 271 +1428 1 264 +2585118 1 256 +1585976 1 256 +196 1 253 +85404 1 247 +84030 1 239 +730 1 236 +824 1 230 +1492 1 222 +246787 1 222 +562 1 222 +2572087 1 217 +329854 1 216 +714 1 215 +715 1 214 +43768 1 214 +2666100 1 214 +820 1 212 +1502 1 211 +504 1 211 +1311 1 210 +28188 1 209 +1796646 1 203 +47678 1 203 diff --git a/data/hmp_templates/throat/SRS065335-sim.yml b/data/hmp_templates/throat/SRS065335-sim.yml new file mode 100644 index 0000000..10c2645 --- /dev/null +++ b/data/hmp_templates/throat/SRS065335-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS065335-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS065335-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 8 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS143032-sim.tsv b/data/hmp_templates/throat/SRS143032-sim.tsv new file mode 100644 index 0000000..28f1d0e --- /dev/null +++ b/data/hmp_templates/throat/SRS143032-sim.tsv @@ -0,0 +1,40 @@ +TaxonomyInput nb_genomes Reads +589873 1 63425 +573 1 40147 +145261 1 32069 +28132 1 9578 +1177574 1 7402 +95486 1 7006 +244366 1 6931 +39778 1 4146 +39777 1 3070 +821 1 3021 +1304 1 2976 +28116 1 2742 +729 1 2033 +562 1 1546 +43675 1 1453 +10847 1 1415 +29466 1 986 +199 1 903 +28131 1 835 +2610896 1 633 +28037 1 614 +735 1 600 +712633 1 565 +550 1 471 +371601 1 469 +357276 1 351 +1660 1 312 +542 1 275 +1303 1 267 +114527 1 263 +853 1 259 +488 1 237 +1313 1 234 +546 1 226 +726 1 220 +28135 1 220 +28129 1 219 +1526658 1 217 +1433513 1 205 diff --git a/data/hmp_templates/throat/SRS143032-sim.yml b/data/hmp_templates/throat/SRS143032-sim.yml new file mode 100644 index 0000000..c9d8ebf --- /dev/null +++ b/data/hmp_templates/throat/SRS143032-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS143032-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS143032-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 16 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS144378-sim.tsv b/data/hmp_templates/throat/SRS144378-sim.tsv new file mode 100644 index 0000000..f7cdff6 --- /dev/null +++ b/data/hmp_templates/throat/SRS144378-sim.tsv @@ -0,0 +1,86 @@ +TaxonomyInput nb_genomes Reads +589873 1 166850 +145261 1 74136 +1379 1 60158 +487 1 57190 +727 1 33120 +1328 1 19977 +28037 1 19459 +43675 1 14584 +95486 1 13951 +244366 1 13047 +1304 1 11645 +1313 1 9921 +39778 1 9651 +851 1 8656 +28131 1 8078 +1660 1 6504 +39777 1 6144 +732 1 5670 +2610896 1 5008 +28132 1 4365 +504 1 4351 +486 1 3869 +562 1 3689 +726 1 3638 +729 1 3582 +1177574 1 3316 +10847 1 3292 +1433513 1 2832 +29391 1 2604 +712633 1 2342 +29466 1 2342 +84135 1 2314 +1303 1 2309 +485 1 2156 +1338 1 2053 +33033 1 1994 +2598453 1 1899 +489 1 1767 +573 1 1508 +199 1 1421 +28449 1 1380 +157687 1 1340 +1335 1 1189 +2663009 1 1185 +483 1 1055 +550 1 1003 +539 1 932 +735 1 932 +113107 1 873 +484 1 726 +1747 1 687 +120683 1 677 +1839799 1 645 +488 1 627 +1343 1 618 +542 1 577 +114527 1 546 +1539298 1 536 +1316593 1 526 +1583098 1 524 +712310 1 486 +1705617 1 461 +1305 1 453 +52773 1 446 +495 1 440 +204 1 400 +1302 1 398 +76123 1 391 +1852377 1 360 +1308 1 348 +78535 1 337 +617123 1 324 +712538 1 276 +712357 1 251 +2382163 1 248 +2047 1 244 +837 1 243 +197575 1 237 +1613 1 232 +45634 1 222 +713030 1 210 +28135 1 208 +10665 1 207 +1307 1 201 +28129 1 200 diff --git a/data/hmp_templates/throat/SRS144378-sim.yml b/data/hmp_templates/throat/SRS144378-sim.yml new file mode 100644 index 0000000..b1b71cd --- /dev/null +++ b/data/hmp_templates/throat/SRS144378-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS144378-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS144378-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 18 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS097885-sim.tsv b/data/hmp_templates/vagina/SRS097885-sim.tsv new file mode 100644 index 0000000..a6367f8 --- /dev/null +++ b/data/hmp_templates/vagina/SRS097885-sim.tsv @@ -0,0 +1,27 @@ +TaxonomyInput nb_genomes Reads +47770 1 645231 +145261 1 33481 +244366 1 9698 +2047 1 6687 +589873 1 3662 +544580 1 3180 +1655 1 2838 +43768 1 2697 +1596 1 2601 +29466 1 1838 +1303 1 1461 +1587 1 1402 +1633 1 1281 +1305 1 1144 +1656 1 775 +1604 1 632 +2107999 1 417 +52242 1 413 +1302 1 400 +729 1 399 +172042 1 380 +573 1 342 +1579 1 291 +33959 1 240 +10847 1 213 +1747 1 210 diff --git a/data/hmp_templates/vagina/SRS097885-sim.yml b/data/hmp_templates/vagina/SRS097885-sim.yml new file mode 100644 index 0000000..e71fff1 --- /dev/null +++ b/data/hmp_templates/vagina/SRS097885-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS097885-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS097885-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 9 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS098585-sim.tsv b/data/hmp_templates/vagina/SRS098585-sim.tsv new file mode 100644 index 0000000..f585d16 --- /dev/null +++ b/data/hmp_templates/vagina/SRS098585-sim.tsv @@ -0,0 +1,108 @@ +TaxonomyInput nb_genomes Reads +47770 1 435567 +145261 1 277486 +244366 1 128739 +821 1 42560 +589873 1 33385 +853 1 19867 +28116 1 14696 +729 1 13129 +2528203 1 10992 +28037 1 9421 +823 1 7932 +1587 1 7815 +2109691 1 7022 +292800 1 3466 +1313 1 3445 +28118 1 3125 +1520 1 3042 +357276 1 3036 +818 1 3019 +726 1 3018 +371601 1 2922 +820 1 2843 +43768 1 2639 +2585119 1 2503 +817 1 2344 +1633 1 2284 +1433513 1 2242 +1303 1 2202 +28132 1 2155 +1177574 1 1775 +727 1 1616 +39778 1 1571 +1660 1 1506 +1379 1 1370 +29466 1 1266 +28131 1 1210 +2610895 1 1178 +39777 1 1120 +1304 1 1115 +2718 1 1076 +712310 1 1052 +2585118 1 1026 +199 1 1014 +2093856 1 997 +1604 1 976 +1796613 1 929 +43675 1 905 +2093857 1 853 +28135 1 853 +2598453 1 811 +2364787 1 760 +2109687 1 752 +2109688 1 749 +1550024 1 741 +157687 1 736 +2479767 1 687 +544580 1 667 +210 1 659 +95486 1 659 +74426 1 651 +2663009 1 643 +47678 1 602 +1305 1 598 +824 1 587 +1655 1 579 +1750 1 559 +301301 1 555 +2094242 1 552 +573 1 515 +68570 1 512 +851 1 505 +550 1 503 +28129 1 486 +246787 1 480 +2494234 1 470 +329854 1 459 +2610896 1 438 +2025876 1 402 +1309 1 401 +2133944 1 384 +712710 1 367 +39488 1 360 +1302 1 343 +1747 1 330 +712122 1 327 +113107 1 323 +52242 1 304 +1613 1 298 +712633 1 295 +28113 1 294 +33959 1 271 +1496 1 268 +2584944 1 266 +488 1 254 +76123 1 246 +249188 1 243 +114527 1 236 +1598 1 233 +837 1 227 +28119 1 227 +1852377 1 226 +1656 1 218 +33038 1 216 +2662363 1 215 +10847 1 208 +28449 1 202 +2584943 1 200 diff --git a/data/hmp_templates/vagina/SRS098585-sim.yml b/data/hmp_templates/vagina/SRS098585-sim.yml new file mode 100644 index 0000000..f9643cc --- /dev/null +++ b/data/hmp_templates/vagina/SRS098585-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS098585-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS098585-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 10 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS142957-sim.tsv b/data/hmp_templates/vagina/SRS142957-sim.tsv new file mode 100644 index 0000000..976cbc2 --- /dev/null +++ b/data/hmp_templates/vagina/SRS142957-sim.tsv @@ -0,0 +1,32 @@ +TaxonomyInput nb_genomes Reads +147802 1 853967 +109790 1 334437 +589873 1 14075 +145261 1 10395 +2057741 1 8965 +95486 1 8844 +47770 1 4935 +329 1 4050 +10847 1 1984 +1598 1 1962 +1197460 1 1335 +69666 1 810 +2702 1 702 +1679497 1 688 +2010972 1 648 +244366 1 638 +562 1 613 +492670 1 548 +366602 1 455 +87541 1 402 +1747 1 383 +155892 1 381 +88688 1 368 +1280 1 324 +69665 1 305 +69395 1 267 +1587 1 252 +2201350 1 245 +588932 1 241 +244734 1 231 +1076 1 203 diff --git a/data/hmp_templates/vagina/SRS142957-sim.yml b/data/hmp_templates/vagina/SRS142957-sim.yml new file mode 100644 index 0000000..59aeca9 --- /dev/null +++ b/data/hmp_templates/vagina/SRS142957-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS142957-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS142957-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 15 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS143776-sim.tsv b/data/hmp_templates/vagina/SRS143776-sim.tsv new file mode 100644 index 0000000..6c83565 --- /dev/null +++ b/data/hmp_templates/vagina/SRS143776-sim.tsv @@ -0,0 +1,42 @@ +TaxonomyInput nb_genomes Reads +47770 1 3095389 +145261 1 39245 +244366 1 30370 +589873 1 21670 +95486 1 18753 +863372 1 9461 +1587 1 6102 +40324 1 3943 +1604 1 3427 +1774 1 3306 +2082188 1 2922 +75588 1 2897 +1561223 1 2520 +2282124 1 1929 +52242 1 1768 +85698 1 1767 +550 1 1280 +1579 1 1214 +2107999 1 1184 +93064 1 1133 +33959 1 1118 +267818 1 802 +2690380 1 573 +1633 1 480 +1596 1 467 +1520670 1 410 +83683 1 381 +294 1 374 +47883 1 361 +109790 1 327 +217203 1 298 +32002 1 282 +964 1 265 +80842 1 254 +303 1 252 +1515612 1 243 +10847 1 242 +317 1 237 +82367 1 236 +1747 1 224 +68895 1 206 diff --git a/data/hmp_templates/vagina/SRS143776-sim.yml b/data/hmp_templates/vagina/SRS143776-sim.yml new file mode 100644 index 0000000..1acec0e --- /dev/null +++ b/data/hmp_templates/vagina/SRS143776-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS143776-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS143776-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 17 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS146847-sim.tsv b/data/hmp_templates/vagina/SRS146847-sim.tsv new file mode 100644 index 0000000..fe38785 --- /dev/null +++ b/data/hmp_templates/vagina/SRS146847-sim.tsv @@ -0,0 +1,40 @@ +TaxonomyInput nb_genomes Reads +147802 1 570559 +145261 1 48960 +589873 1 32189 +47770 1 17457 +244366 1 11516 +109790 1 11294 +95486 1 11123 +28037 1 8301 +1313 1 5946 +1774 1 3727 +134821 1 3127 +10847 1 2975 +1561223 1 2866 +562 1 2699 +1303 1 2660 +1311 1 2492 +1633 1 1719 +573 1 1153 +1596 1 1091 +1433513 1 1014 +2598453 1 905 +87541 1 858 +1870984 1 685 +550 1 635 +1328 1 585 +1520670 1 534 +1747 1 503 +1587 1 417 +1613 1 396 +542 1 395 +1314 1 358 +1379 1 323 +1282 1 307 +1260 1 281 +113107 1 245 +1598 1 245 +36809 1 222 +1526658 1 218 +236753 1 204 diff --git a/data/hmp_templates/vagina/SRS146847-sim.yml b/data/hmp_templates/vagina/SRS146847-sim.yml new file mode 100644 index 0000000..e6edb7b --- /dev/null +++ b/data/hmp_templates/vagina/SRS146847-sim.yml @@ -0,0 +1,20 @@ +input_table_path: SRS146847-sim.tsv +sd_read_num: 0 +replicates: 1 +community_name: SRS146847-sim +seq_tech: illumina +read_status: paired +illumina_sequencing_system: HS20 +illumina_read_len: 100 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 20 +set_seed: 19 +NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 +NCBI_email: farid.chaabane@unil.ch +complete_assemblies: True +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: 'None' \ No newline at end of file From 7a1c580420e82bc52890642bf7b98d6b8703f9f5 Mon Sep 17 00:00:00 2001 From: rdchaabafa Date: Wed, 23 Feb 2022 14:20:22 +0100 Subject: [PATCH 07/11] renamed headers --- README.md | 6 +++--- data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv | 2 +- data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv | 2 +- data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv | 2 +- data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv | 2 +- data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv | 2 +- data/hmp_templates/gut/SRS1041031-sim.tsv | 2 +- data/hmp_templates/gut/SRS1041129-sim.tsv | 2 +- data/hmp_templates/gut/SRS104912-sim.tsv | 2 +- data/hmp_templates/gut/SRS105082-sim.tsv | 2 +- data/hmp_templates/gut/SRS147088-sim.tsv | 2 +- data/hmp_templates/throat/SRS014475-sim.tsv | 2 +- data/hmp_templates/throat/SRS014689-sim.tsv | 2 +- data/hmp_templates/throat/SRS065335-sim.tsv | 2 +- data/hmp_templates/throat/SRS143032-sim.tsv | 2 +- data/hmp_templates/throat/SRS144378-sim.tsv | 2 +- data/hmp_templates/vagina/SRS097885-sim.tsv | 2 +- data/hmp_templates/vagina/SRS098585-sim.tsv | 2 +- data/hmp_templates/vagina/SRS142957-sim.tsv | 2 +- data/hmp_templates/vagina/SRS143776-sim.tsv | 2 +- data/hmp_templates/vagina/SRS146847-sim.tsv | 2 +- input_table.tsv | 2 +- mess/scripts/read_counts_table.py | 10 +++++----- 23 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 42e5f92..8cb8a3a 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ relative abundances. #### Read percentage Below is an example of input table where the user can set, for each entry, read percentages of the total metagenomic reads -TaxonomyInput | nb_genomes | PercentReads +Taxonomy | NbGenomes | PercentReads --- | --- | --- 1813735 | 1 | 0.3 114185 | 1 | 0.4 @@ -34,7 +34,7 @@ In the input table shown above, if no PercentReads is present, each entry will h #### Coverage values The user has also the option to set coverage values instead of %reads of the total metagenomic reads for each entry. -TaxonomyInput | nb_genomes | Coverage +Taxonomy | NbGenomes | Coverage --- | --- | --- 1813735 | 1 | 20 114185 | 1 | 30 @@ -45,7 +45,7 @@ In this case, all 3 assemblies found for ATCC_13985 will have the same coverage Alternatively, the user can specify relative proportions between assemblies. Given the total number of reads to be present in the metagenome, scripts will calculate coverage and read numbers respecting the relative proportions. -TaxonomyInput | nb_genomes | RelativeProp +Taxonomy | NbGenomes | RelativeProp --- | --- | --- 1813735 | 1 | 0.3 114185 | 1 | 0.4 diff --git a/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv index a930789..cfeae8a 100644 --- a/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv +++ b/data/hmp_templates/buccal_mucosa/SRS013506-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 28037 1 1314603 1313 1 505181 735 1 426416 diff --git a/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv index a0d82db..5becdc3 100644 --- a/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv +++ b/data/hmp_templates/buccal_mucosa/SRS019352-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 729 1 219672 1303 1 145134 28037 1 107476 diff --git a/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv index 8fdccf0..d14327a 100644 --- a/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv +++ b/data/hmp_templates/buccal_mucosa/SRS050029-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 729 1 4944831 43675 1 382534 28037 1 364409 diff --git a/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv index 2c84ba3..50cf78a 100644 --- a/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv +++ b/data/hmp_templates/buccal_mucosa/SRS054569-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 28037 1 2577789 1313 1 1010632 729 1 634149 diff --git a/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv b/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv index a6239be..c6b5f1b 100644 --- a/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv +++ b/data/hmp_templates/buccal_mucosa/SRS056042-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 28037 1 83452 729 1 82441 1303 1 40384 diff --git a/data/hmp_templates/gut/SRS1041031-sim.tsv b/data/hmp_templates/gut/SRS1041031-sim.tsv index 2c3be07..0a1ccc1 100644 --- a/data/hmp_templates/gut/SRS1041031-sim.tsv +++ b/data/hmp_templates/gut/SRS1041031-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 47678 1 1160133 246787 1 972403 820 1 838086 diff --git a/data/hmp_templates/gut/SRS1041129-sim.tsv b/data/hmp_templates/gut/SRS1041129-sim.tsv index eca3355..5f3b862 100644 --- a/data/hmp_templates/gut/SRS1041129-sim.tsv +++ b/data/hmp_templates/gut/SRS1041129-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 821 1 1575095 820 1 741725 817 1 607591 diff --git a/data/hmp_templates/gut/SRS104912-sim.tsv b/data/hmp_templates/gut/SRS104912-sim.tsv index acaf0c5..b98e035 100644 --- a/data/hmp_templates/gut/SRS104912-sim.tsv +++ b/data/hmp_templates/gut/SRS104912-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 821 1 4835490 853 1 456702 820 1 412536 diff --git a/data/hmp_templates/gut/SRS105082-sim.tsv b/data/hmp_templates/gut/SRS105082-sim.tsv index 64a60c8..2711803 100644 --- a/data/hmp_templates/gut/SRS105082-sim.tsv +++ b/data/hmp_templates/gut/SRS105082-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 821 1 4203054 853 1 633163 818 1 478099 diff --git a/data/hmp_templates/gut/SRS147088-sim.tsv b/data/hmp_templates/gut/SRS147088-sim.tsv index 80df8ed..4af04b4 100644 --- a/data/hmp_templates/gut/SRS147088-sim.tsv +++ b/data/hmp_templates/gut/SRS147088-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 1405 1 577345 28116 1 501076 28132 1 358855 diff --git a/data/hmp_templates/throat/SRS014475-sim.tsv b/data/hmp_templates/throat/SRS014475-sim.tsv index 2ee983e..fbdd672 100644 --- a/data/hmp_templates/throat/SRS014475-sim.tsv +++ b/data/hmp_templates/throat/SRS014475-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 487 1 183496 727 1 88104 729 1 15020 diff --git a/data/hmp_templates/throat/SRS014689-sim.tsv b/data/hmp_templates/throat/SRS014689-sim.tsv index 81e300b..e79a001 100644 --- a/data/hmp_templates/throat/SRS014689-sim.tsv +++ b/data/hmp_templates/throat/SRS014689-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 39778 1 888462 28132 1 720463 39777 1 516461 diff --git a/data/hmp_templates/throat/SRS065335-sim.tsv b/data/hmp_templates/throat/SRS065335-sim.tsv index 4d4b195..2af92bf 100644 --- a/data/hmp_templates/throat/SRS065335-sim.tsv +++ b/data/hmp_templates/throat/SRS065335-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 199 1 192015 28132 1 166398 28449 1 152986 diff --git a/data/hmp_templates/throat/SRS143032-sim.tsv b/data/hmp_templates/throat/SRS143032-sim.tsv index 28f1d0e..9dc92c2 100644 --- a/data/hmp_templates/throat/SRS143032-sim.tsv +++ b/data/hmp_templates/throat/SRS143032-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 589873 1 63425 573 1 40147 145261 1 32069 diff --git a/data/hmp_templates/throat/SRS144378-sim.tsv b/data/hmp_templates/throat/SRS144378-sim.tsv index f7cdff6..24caf13 100644 --- a/data/hmp_templates/throat/SRS144378-sim.tsv +++ b/data/hmp_templates/throat/SRS144378-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 589873 1 166850 145261 1 74136 1379 1 60158 diff --git a/data/hmp_templates/vagina/SRS097885-sim.tsv b/data/hmp_templates/vagina/SRS097885-sim.tsv index a6367f8..e2f7dd6 100644 --- a/data/hmp_templates/vagina/SRS097885-sim.tsv +++ b/data/hmp_templates/vagina/SRS097885-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 47770 1 645231 145261 1 33481 244366 1 9698 diff --git a/data/hmp_templates/vagina/SRS098585-sim.tsv b/data/hmp_templates/vagina/SRS098585-sim.tsv index f585d16..f90bdd6 100644 --- a/data/hmp_templates/vagina/SRS098585-sim.tsv +++ b/data/hmp_templates/vagina/SRS098585-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 47770 1 435567 145261 1 277486 244366 1 128739 diff --git a/data/hmp_templates/vagina/SRS142957-sim.tsv b/data/hmp_templates/vagina/SRS142957-sim.tsv index 976cbc2..de3e4ff 100644 --- a/data/hmp_templates/vagina/SRS142957-sim.tsv +++ b/data/hmp_templates/vagina/SRS142957-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 147802 1 853967 109790 1 334437 589873 1 14075 diff --git a/data/hmp_templates/vagina/SRS143776-sim.tsv b/data/hmp_templates/vagina/SRS143776-sim.tsv index 6c83565..8860641 100644 --- a/data/hmp_templates/vagina/SRS143776-sim.tsv +++ b/data/hmp_templates/vagina/SRS143776-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 47770 1 3095389 145261 1 39245 244366 1 30370 diff --git a/data/hmp_templates/vagina/SRS146847-sim.tsv b/data/hmp_templates/vagina/SRS146847-sim.tsv index fe38785..9f57c20 100644 --- a/data/hmp_templates/vagina/SRS146847-sim.tsv +++ b/data/hmp_templates/vagina/SRS146847-sim.tsv @@ -1,4 +1,4 @@ -TaxonomyInput nb_genomes Reads +Taxonomy NbGenomes Reads 147802 1 570559 145261 1 48960 589873 1 32189 diff --git a/input_table.tsv b/input_table.tsv index cb2e2ea..3044928 100644 --- a/input_table.tsv +++ b/input_table.tsv @@ -1,3 +1,3 @@ -TaxonomyInput nb_genomes +Taxonomy NbGenomes 1813735 1 114185 1 diff --git a/mess/scripts/read_counts_table.py b/mess/scripts/read_counts_table.py index ebde6ff..832c1ca 100644 --- a/mess/scripts/read_counts_table.py +++ b/mess/scripts/read_counts_table.py @@ -107,12 +107,12 @@ def calculate_reads_and_coverage(table, total, sd, read_len, pairing, rep, input common_col = intb.columns.intersection(astb.columns)[0] assemblies_with_val = astb.merge(intb, how='left', on=f'{common_col}') if inputval == 'Coverage' or inputval == 'Reads': - assemblies_with_val[f'{inputval}'] = np.int64(assemblies_with_val[f'{inputval}']/assemblies_with_val['nb_genomes']) - assemblies_with_val.drop(columns='nb_genomes', inplace=True) + assemblies_with_val[f'{inputval}'] = np.int64(assemblies_with_val[f'{inputval}']/assemblies_with_val['NbGenomes']) + assemblies_with_val.drop(columns='NbGenomes', inplace=True) elif inputval == 'ReadPercent' or inputval == 'RelativeProp': assemblies_with_val[f'{inputval}'] = np.float64(assemblies_with_val[f'{inputval}'] / - assemblies_with_val['nb_genomes']) - assemblies_with_val.drop(columns='nb_genomes', inplace=True) + assemblies_with_val['NbGenomes']) + assemblies_with_val.drop(columns='NbGenomes', inplace=True) elif inputval == 'even': assemblies_with_val = get_even_dist(astb) elif inputval == 'lognormal': @@ -123,6 +123,6 @@ def calculate_reads_and_coverage(table, total, sd, read_len, pairing, rep, input if inputval == 'Coverage' or inputval == 'Reads': cov_read_tb = cov_read_tb.drop(inputval, axis=1) mergedtb = assemblies_with_val.merge(cov_read_tb, how='left', on='AssemblyNames') -rc_tb = mergedtb.drop(['AssemblyInput', 'GbUid', 'FtpPath_GenBank', 'FtpPath_RefSeq', 'AsmReleaseDate_GenBank', +rc_tb = mergedtb.drop(['GbUid', 'FtpPath_GenBank', 'FtpPath_RefSeq', 'AsmReleaseDate_GenBank', 'ContigN50', 'ScaffoldN50', 'Assembly_coverage', 'Contig_count'], axis=1) rc_tb.to_csv(snakemake.output["rc_table"], sep='\t', index=False) From 76154a38c202f5d7e4ced9a2b845189f2dc433e4 Mon Sep 17 00:00:00 2001 From: rdchaabafa Date: Wed, 23 Feb 2022 14:48:21 +0100 Subject: [PATCH 08/11] updated readme, and corrected params in yml --- README.md | 75 +++++++++++-------- config.yml | 2 +- .../buccal_mucosa/SRS013506-sim.yml | 6 +- .../buccal_mucosa/SRS019352-sim.yml | 6 +- .../buccal_mucosa/SRS050029-sim.yml | 6 +- .../buccal_mucosa/SRS054569-sim.yml | 6 +- .../buccal_mucosa/SRS056042-sim.yml | 6 +- data/hmp_templates/gut/SRS1041031-sim.yml | 6 +- data/hmp_templates/gut/SRS1041129-sim.yml | 6 +- data/hmp_templates/gut/SRS104912-sim.yml | 6 +- data/hmp_templates/gut/SRS105082-sim.yml | 6 +- data/hmp_templates/gut/SRS147088-sim.yml | 6 +- data/hmp_templates/throat/SRS014475-sim.yml | 6 +- data/hmp_templates/throat/SRS014689-sim.yml | 6 +- data/hmp_templates/throat/SRS065335-sim.yml | 6 +- data/hmp_templates/throat/SRS143032-sim.yml | 6 +- data/hmp_templates/throat/SRS144378-sim.yml | 6 +- data/hmp_templates/vagina/SRS097885-sim.yml | 6 +- data/hmp_templates/vagina/SRS098585-sim.yml | 6 +- data/hmp_templates/vagina/SRS142957-sim.yml | 6 +- data/hmp_templates/vagina/SRS143776-sim.yml | 6 +- data/hmp_templates/vagina/SRS146847-sim.yml | 6 +- 22 files changed, 106 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index 8cb8a3a..0f96634 100644 --- a/README.md +++ b/README.md @@ -4,22 +4,28 @@ [![container](https://quay.io/repository/biocontainers/mess/status)](https://quay.io/repository/biocontainers/mess) The Metagenomic Sequence Simulator (MeSS) is a snakemake workflow used for simulating metagenomic mock communities. -## Installation -```bash -git clone https://github.com/metagenlab/MeSS.git -conda env create -f Mess/messenv.yml -``` -### Or + +# Installation [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/mess/README.html) + +In order to quickly get going with MeSS I recommend using the conda package manager and specifically mamba (a fast alternative to conda): ```bash -conda install -c conda-forge mamba +conda install -n base -c conda-forge mamba mamba create -c bioconda -n mess mess ``` -## Required files -### Input table examples + +# Quick start +To run mess, you simply have to provide a config.yml file with a list of parameters (explained in more details below) +```bash +mess run -f config.yml -c 10 +``` +Examples of config.yml files are provided in data/hmp_templates (parameters are explained below). + +# Required files +## Input table examples MeSS takes the same input as Assembly_finder, with an additional column for either coverage values, read percentages or relative abundances. -#### Read percentage +### Read percentage Below is an example of input table where the user can set, for each entry, read percentages of the total metagenomic reads Taxonomy | NbGenomes | PercentReads @@ -31,7 +37,7 @@ ATCC_13985 | 3 | 0.3 If the percent read column is not present, MeSS will generate an even distribution within superkingdoms. In the input table shown above, if no PercentReads is present, each entry will have a read percentage of 20% (as all entries belong to the same superkingdom: bacteria) -#### Coverage values +### Coverage values The user has also the option to set coverage values instead of %reads of the total metagenomic reads for each entry. Taxonomy | NbGenomes | Coverage @@ -41,7 +47,7 @@ Taxonomy | NbGenomes | Coverage ATCC_13985 | 3 | 20 In this case, all 3 assemblies found for ATCC_13985 will have the same coverage value of 20 -#### Relative proportions +### Relative proportions Alternatively, the user can specify relative proportions between assemblies. Given the total number of reads to be present in the metagenome, scripts will calculate coverage and read numbers respecting the relative proportions. @@ -52,7 +58,20 @@ Taxonomy | NbGenomes | RelativeProp ATCC_13985 | 3 | 0.3 For ATCC_13985, the 3 genomes will have a RelativeProp value of 0.1. -### Config file example + +### Read counts + +Finally, the user can define the raw reads to simulate per genome as shown below: + + Taxonomy | NbGenomes | Reads +--- | --- | --- +1813735 | 1 | 10000 +114185 | 1 | 10000 +ATCC_13985 | 3 | 30000 + +For ATCC_13985, 10000 reads will be simulated for each genome + +# Config file example ```yaml #MeSS parameters input_table_path: input_table.tsv @@ -93,31 +112,31 @@ representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' +Rank_to_filter_by: False ``` -#### Mess parameters +## Mess parameters The path to the input table can be set by the input_table_path parameter in the config file as shown above. MeSS offers the possibility to generate multiple mock communities using the same set of assembly files in the same directory. For this, the user has to set up one configuration file per mock community and change the community_name accordingly. -#### Replicates parameters +### Replicates parameters The user has the option to te create a set of replicates for one community. Each replicate read number can be drawn from a normal distribution with a standard deviation set in the sd_read_num parameter. -#### Random seeds +### Random seeds The MeSS workflow uses random seeds for read generation and read shuffling. To ensure reproducible results, one can give the seed parameter a fixed number. -#### Sequencing run params +### Sequencing run params MeSS offers the possibility to select art_illumina or pbsim2 to simulate illumina and long reads respecitvely. In addition, read pairing and the total amount of reads can be set using the read_status and total_reads parameters. -#### Illumina (art params) +### Illumina (art params) MeSS uses [art_illumina](https://academic.oup.com/bioinformatics/article/28/4/593/213322) to generate illumina reads, and the user can change parameters like read and fragment length under the art_illumina params section as shown the yaml file above. -#### Long reads (pbsim2 params) +### Long reads (pbsim2 params) For long read simulation, [pbsim2](https://github.com/yukiteruono/pbsim2) was integrated in the pipeline. pbsim2 randomly samples reads from a reference sequence following a gamma distribution, and errors are introduced following FIC-HMM models for different chemistries. @@ -128,12 +147,12 @@ As for a Nanopore sequencing run using a R9.4 flowcell, the user can set the val For more details check [pbsim2's documentation](https://github.com/yukiteruono/pbsim2/blob/master/README.md) -#### Assembly download +### Assembly download MeSS uses [Assembly_finder](https://github.com/metagenlab/assembly_finder) to download genomes, and requires the user to have an NCBI account. For more details on Assembly_finder parameters check its documentation. -## Running MeSS -### Snakemake command +# Running MeSS +## Snakemake command Here is an example command to run MeSS on the previously described config and input table. ```bash snakemake --snakefile path/to/MeSS/Snakefile --configfile config.yml \ @@ -147,13 +166,9 @@ Thus, for big genomes it is recommended to lower this parameter. **parallel_cat** controlls the number of genomes to be concatenated in parallel. For big genomes and computers with low memory, lowering this parameter lowers memory usage. -### Using the wrapper -```bash -mess run -f config.yml -p /path/to/conda/envs/ -c 10 -``` -Runs the Mess workflow using 10 cores. -## MeSS outputs -### Directory structure + +# MeSS outputs +## Directory structure After running MeSS for two replicates of the same metagenome with single end reads, your working directory should look like this: ``` diff --git a/config.yml b/config.yml index adabfb8..524e667 100644 --- a/config.yml +++ b/config.yml @@ -51,7 +51,7 @@ Genbank_assemblies: True Refseq_assemblies: True ##Parameters for the filtering function -Rank_to_filter_by: 'None' +Rank_to_filter_by: False #None: Assemblies are ranked by their assembly status (complete or not) #and Refseq category (reference, representative ...) #If you want to filter by species, set this parameter to 'species'. The filtering function will list all unique species diff --git a/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml b/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml index 9950320..8b09848 100644 --- a/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml +++ b/data/hmp_templates/buccal_mucosa/SRS013506-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 1 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml b/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml index 384ae3a..2bad610 100644 --- a/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml +++ b/data/hmp_templates/buccal_mucosa/SRS019352-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 4 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml b/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml index 437e63d..9af601e 100644 --- a/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml +++ b/data/hmp_templates/buccal_mucosa/SRS050029-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 5 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml b/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml index 0a85edb..45f0a96 100644 --- a/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml +++ b/data/hmp_templates/buccal_mucosa/SRS054569-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 6 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml b/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml index 09e3eda..ef4896e 100644 --- a/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml +++ b/data/hmp_templates/buccal_mucosa/SRS056042-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 7 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS1041031-sim.yml b/data/hmp_templates/gut/SRS1041031-sim.yml index 07c8db3..c929070 100644 --- a/data/hmp_templates/gut/SRS1041031-sim.yml +++ b/data/hmp_templates/gut/SRS1041031-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 11 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS1041129-sim.yml b/data/hmp_templates/gut/SRS1041129-sim.yml index 62ab56f..37bb44b 100644 --- a/data/hmp_templates/gut/SRS1041129-sim.yml +++ b/data/hmp_templates/gut/SRS1041129-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 12 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS104912-sim.yml b/data/hmp_templates/gut/SRS104912-sim.yml index ce15c9b..81919cb 100644 --- a/data/hmp_templates/gut/SRS104912-sim.yml +++ b/data/hmp_templates/gut/SRS104912-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 13 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/gut/SRS105082-sim.yml b/data/hmp_templates/gut/SRS105082-sim.yml index 390737c..7c4df6a 100644 --- a/data/hmp_templates/gut/SRS105082-sim.yml +++ b/data/hmp_templates/gut/SRS105082-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 14 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: False reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' +Rank_to_filter_by: False diff --git a/data/hmp_templates/gut/SRS147088-sim.yml b/data/hmp_templates/gut/SRS147088-sim.yml index 62c023e..ae158fe 100644 --- a/data/hmp_templates/gut/SRS147088-sim.yml +++ b/data/hmp_templates/gut/SRS147088-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 20 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS014475-sim.yml b/data/hmp_templates/throat/SRS014475-sim.yml index e2432ca..03cc88c 100644 --- a/data/hmp_templates/throat/SRS014475-sim.yml +++ b/data/hmp_templates/throat/SRS014475-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 2 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS014689-sim.yml b/data/hmp_templates/throat/SRS014689-sim.yml index 80be485..3fc4c9a 100644 --- a/data/hmp_templates/throat/SRS014689-sim.yml +++ b/data/hmp_templates/throat/SRS014689-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 3 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS065335-sim.yml b/data/hmp_templates/throat/SRS065335-sim.yml index 10c2645..b9b0463 100644 --- a/data/hmp_templates/throat/SRS065335-sim.yml +++ b/data/hmp_templates/throat/SRS065335-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 8 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS143032-sim.yml b/data/hmp_templates/throat/SRS143032-sim.yml index c9d8ebf..31a7131 100644 --- a/data/hmp_templates/throat/SRS143032-sim.yml +++ b/data/hmp_templates/throat/SRS143032-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 16 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/throat/SRS144378-sim.yml b/data/hmp_templates/throat/SRS144378-sim.yml index b1b71cd..51c73a9 100644 --- a/data/hmp_templates/throat/SRS144378-sim.yml +++ b/data/hmp_templates/throat/SRS144378-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 18 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS097885-sim.yml b/data/hmp_templates/vagina/SRS097885-sim.yml index e71fff1..084288f 100644 --- a/data/hmp_templates/vagina/SRS097885-sim.yml +++ b/data/hmp_templates/vagina/SRS097885-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 9 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS098585-sim.yml b/data/hmp_templates/vagina/SRS098585-sim.yml index f9643cc..81899ee 100644 --- a/data/hmp_templates/vagina/SRS098585-sim.yml +++ b/data/hmp_templates/vagina/SRS098585-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 10 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS142957-sim.yml b/data/hmp_templates/vagina/SRS142957-sim.yml index 59aeca9..8e69656 100644 --- a/data/hmp_templates/vagina/SRS142957-sim.yml +++ b/data/hmp_templates/vagina/SRS142957-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 15 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS143776-sim.yml b/data/hmp_templates/vagina/SRS143776-sim.yml index 1acec0e..c054e16 100644 --- a/data/hmp_templates/vagina/SRS143776-sim.yml +++ b/data/hmp_templates/vagina/SRS143776-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 17 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file diff --git a/data/hmp_templates/vagina/SRS146847-sim.yml b/data/hmp_templates/vagina/SRS146847-sim.yml index e6edb7b..759938c 100644 --- a/data/hmp_templates/vagina/SRS146847-sim.yml +++ b/data/hmp_templates/vagina/SRS146847-sim.yml @@ -9,12 +9,12 @@ illumina_read_len: 100 illumina_mean_frag_len: 200 illumina_sd_frag_len: 20 set_seed: 19 -NCBI_key: 8dcddb56e866bec9d21c8f9e70aa4e1e8309 -NCBI_email: farid.chaabane@unil.ch +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email complete_assemblies: True reference_assemblies: False representative_assemblies: False exclude_from_metagenomes: True Genbank_assemblies: True Refseq_assemblies: True -Rank_to_filter_by: 'None' \ No newline at end of file +Rank_to_filter_by: False \ No newline at end of file From 5fc236f9f43b1c42b1f5b1ef4d6c59413c2d6ea2 Mon Sep 17 00:00:00 2001 From: rdchaabafa Date: Wed, 23 Feb 2022 14:55:40 +0100 Subject: [PATCH 09/11] updated readme --- README.md | 87 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 0f96634..ec73e08 100644 --- a/README.md +++ b/README.md @@ -15,12 +15,55 @@ mamba create -c bioconda -n mess mess ``` # Quick start -To run mess, you simply have to provide a config.yml file with a list of parameters (explained in more details below) +To run mess, you simply have to provide a config.yml file with a list of parameters: ```bash mess run -f config.yml -c 10 ``` Examples of config.yml files are provided in data/hmp_templates (parameters are explained below). +# Config file example +```yaml +#MeSS parameters +input_table_path: input_table.tsv +community_name: metagenome-sim +#Replicates parameters +replicates: 1 +sd_read_num: 0 + +#Random seeds +seed: 1 + +#Sequencing run params +seq_tech: ont #[illumina, ont, pacbio] +read_status: single +total_reads: 100000 + +#Illumina (art params) +illumina_sequencing_system: HSXt #HiSeqX TruSeq (read length:150bp) +illumina_read_len: 150 +illumina_mean_frag_len: 200 +illumina_sd_frag_len: 10 + +#Long reads (pbsim2 params) +chemistry: R94 +longreads_min_len: 100 +longreads_max_len: 1000000 +longreads_sd_len: 7000 +longreads_mean_len: 9000 +longreads_mean_acc: 85 +difference_ratio: "23:31:46" + +#Assembly download +NCBI_key: your_ncbi_key +NCBI_email: your_ncbi_email +complete_assemblies: False +reference_assemblies: False +representative_assemblies: False +exclude_from_metagenomes: True +Genbank_assemblies: True +Refseq_assemblies: True +Rank_to_filter_by: False +``` # Required files ## Input table examples MeSS takes the same input as Assembly_finder, with an additional column for either coverage values, read percentages or @@ -71,49 +114,7 @@ ATCC_13985 | 3 | 30000 For ATCC_13985, 10000 reads will be simulated for each genome -# Config file example -```yaml -#MeSS parameters -input_table_path: input_table.tsv -community_name: metagenome-sim -#Replicates parameters -replicates: 1 -sd_read_num: 0 - -#Random seeds -seed: 1 - -#Sequencing run params -seq_tech: ont #[illumina, ont, pacbio] -read_status: single -total_reads: 100000 - -#Illumina (art params) -illumina_sequencing_system: HSXt #HiSeqX TruSeq (read length:150bp) -illumina_read_len: 150 -illumina_mean_frag_len: 200 -illumina_sd_frag_len: 10 -#Long reads (pbsim2 params) -chemistry: R94 -longreads_min_len: 100 -longreads_max_len: 1000000 -longreads_sd_len: 7000 -longreads_mean_len: 9000 -longreads_mean_acc: 85 -difference_ratio: "23:31:46" - -#Assembly download -NCBI_key: your_ncbi_key -NCBI_email: your_ncbi_email -complete_assemblies: False -reference_assemblies: False -representative_assemblies: False -exclude_from_metagenomes: True -Genbank_assemblies: True -Refseq_assemblies: True -Rank_to_filter_by: False -``` ## Mess parameters The path to the input table can be set by the input_table_path parameter in the config file as shown above. From 04584f0ea0a9605a80f3d93d8cb5826fb24c0e33 Mon Sep 17 00:00:00 2001 From: rdchaabafa Date: Wed, 23 Feb 2022 14:58:41 +0100 Subject: [PATCH 10/11] added link to assembly_finder doc --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ec73e08..c6003f4 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ For more details check [pbsim2's documentation](https://github.com/yukiteruono/p ### Assembly download MeSS uses [Assembly_finder](https://github.com/metagenlab/assembly_finder) to download genomes, and requires the user -to have an NCBI account. For more details on Assembly_finder parameters check its documentation. +to have an NCBI account. For more details on Assembly_finder parameters check its [documentation](https://github.com/metagenlab/assembly_finder/blob/master/README.md). # Running MeSS ## Snakemake command From 2d8bbb1f0cc3d5b9074631993c92cbe22a252961 Mon Sep 17 00:00:00 2001 From: Trestan Pillonel Date: Wed, 23 Feb 2022 15:39:05 +0100 Subject: [PATCH 11/11] updated assembly download path, fixed missing AssemblyInput column --- mess/scripts/Snakefile | 6 +++--- mess/scripts/read_counts_table.py | 2 +- mess/scripts/simulate_reads.rules | 15 +++------------ 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/mess/scripts/Snakefile b/mess/scripts/Snakefile index 2d16c8e..2fa622e 100644 --- a/mess/scripts/Snakefile +++ b/mess/scripts/Snakefile @@ -36,9 +36,8 @@ def assembly_and_bam_list(wildcards): assemblies = expand(f'assembly_gz/{community_name}/{{i}}_genomic.fna', i=assemblynames) except KeyError: - checkpoint_output = checkpoints.download_assemblies.get(**wildcards).output[0] - directory = '/'.join((checkpoint_output.split('/')[0:2])) - assemblynames = glob_wildcards(os.path.join(directory,'{i}_genomic.fna.gz')).i + checkpoint_directory = f"assembly_gz/{community_name}/" + assemblynames = glob_wildcards(os.path.join(checkpoint_directory,'{i}_genomic.fna.gz')).i assemblies = expand(f'assembly_gz/{community_name}/{{i}}_genomic.fna.gz', i=assemblynames) @@ -48,6 +47,7 @@ def assembly_and_bam_list(wildcards): all = bam + assemblies else: all = assemblies + print("ALL", all) return all rule all_sim : diff --git a/mess/scripts/read_counts_table.py b/mess/scripts/read_counts_table.py index ebde6ff..2ab739d 100644 --- a/mess/scripts/read_counts_table.py +++ b/mess/scripts/read_counts_table.py @@ -123,6 +123,6 @@ def calculate_reads_and_coverage(table, total, sd, read_len, pairing, rep, input if inputval == 'Coverage' or inputval == 'Reads': cov_read_tb = cov_read_tb.drop(inputval, axis=1) mergedtb = assemblies_with_val.merge(cov_read_tb, how='left', on='AssemblyNames') -rc_tb = mergedtb.drop(['AssemblyInput', 'GbUid', 'FtpPath_GenBank', 'FtpPath_RefSeq', 'AsmReleaseDate_GenBank', +rc_tb = mergedtb.drop(['GbUid', 'FtpPath_GenBank', 'FtpPath_RefSeq', 'AsmReleaseDate_GenBank', 'ContigN50', 'ScaffoldN50', 'Assembly_coverage', 'Contig_count'], axis=1) rc_tb.to_csv(snakemake.output["rc_table"], sep='\t', index=False) diff --git a/mess/scripts/simulate_reads.rules b/mess/scripts/simulate_reads.rules index 445cd6c..4291088 100644 --- a/mess/scripts/simulate_reads.rules +++ b/mess/scripts/simulate_reads.rules @@ -82,7 +82,6 @@ rule decompress_assemblies: rule merge_contigs: - input: f'assembly_gz/{community_name}/{{assemblyname}}_genomic.fna' output: temp('assembly_gz/{community,[-_0-9a-zA-Z]+}-{assemblyname,[0-9a-zA-Z._-]+}.fa') @@ -103,7 +102,6 @@ if config['seq_tech']=='illumina': if config['read_status']=='paired': rule generate_illumina_paired_reads: - input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -133,7 +131,6 @@ if config['seq_tech']=='illumina': if config['read_status']=='single': rule generate_illumina_single_reads: - input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' @@ -166,7 +163,6 @@ if config['seq_tech']=='illumina': rule convert_sam_to_bam_files: - input: get_sam(config['read_status']) output: 'bam/{community,[-_0-9a-zA-Z]+}-{rep,[0-9]+}-{assemblyname,[0-9a-zA-Z._-]+}.bam' @@ -189,7 +185,6 @@ elif config['seq_tech']=='ont' or config['seq_tech']=='pacbio': return cov rule generate_long_reads: - input: fa='assembly_gz/{community}-{assemblyname}.fa', tab='readcounts-{community}-{rep}.tsv' output: temp('simreads/{community,[-_0-9a-zA-Z]+}-{rep,[0-9]+}-{assemblyname,[0-9a-zA-Z._-]+}_0001.fastq'), @@ -221,7 +216,6 @@ elif config['seq_tech']=='ont' or config['seq_tech']=='pacbio': --length-sd {params.sd_read_len} {input.fa} &> {log} """ rule convert_maf_to_bam_files: - input: 'simreads/{community}-{rep}-{assemblyname}_0001.maf' output: sam=temp('bam/{community,[-_0-9a-zA-Z]+}-{rep,[0-9]+}-{assemblyname,[0-9a-zA-Z._-]+}.sam'), @@ -241,9 +235,9 @@ def concat_fastq(wildcards): files = glob.glob(os.path.join(directory,'*_genomic.fna*')) assemblynames = [file.split('/')[-1].split('_genomic.fna')[0] for file in files] except KeyError: - checkpoint_output = checkpoints.download_assemblies.get(**wildcards).output[0] - directory = '/'.join((checkpoint_output.split('/')[0:2])) - assemblynames = glob_wildcards(os.path.join(directory,'{i}_genomic.fna.gz')).i + checkpoint_directory = f"assembly_gz/{community_name}/" + assemblynames = glob_wildcards(os.path.join(checkpoint_directory,'{i}_genomic.fna.gz')).i + print("assemblynames",assemblynames) if config['seq_tech'] == 'illumina': return expand('simreads/{community}-{{rep}}-{assemblyname}_{{rd}}.fq', community=community_name, assemblyname=assemblynames) @@ -267,7 +261,6 @@ rule concat_fastqs: rule shuffle_fastqs: - input: 'simreads/{community}-{rep}_{rd}.unshuffled.fq' output: 'simreads/{community}-{rep}_{rd}.fq.gz' @@ -287,7 +280,6 @@ rule shuffle_fastqs: rule get_taxonomic_profile: - input: reads='simreads/{community}-{rep}_{rd}.fq.gz', table='readcounts-{community}-{rep}.tsv' @@ -302,7 +294,6 @@ rule get_taxonomic_profile: rule create_krona_chart: - input: 'krona/{community}-{rep}_{rd}.txt' output: 'krona/{community}-{rep}_{rd}.html'