Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logic to allow samples with no reference hits to be analysed #141

Merged
merged 4 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Initial release of Joon-Klaps/viralgenie, created with the [nf-core](https://nf-
- Add read & contig decomplexification using prinseq++ ([#133](https://github.com/Joon-Klaps/viralgenie/pull/133))
- Add option to filter contig clusters based on cumulative read coverage ([#138](https://github.com/Joon-Klaps/viralgenie/pull/138))
- Adding mash-screen output to result table ([#140](https://github.com/Joon-Klaps/viralgenie/pull/140))
- Add logic to allow samples with no reference hits to be analysed ([#141](https://github.com/Joon-Klaps/viralgenie/pull/141))

### `Fixed`

Expand Down
11 changes: 11 additions & 0 deletions bin/blast_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,15 @@ def main(argv=None):
"""Coordinate argument parsing and program execution."""
args = parse_args(argv)
logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")

if args.blast is None and args.contigs.is_file():
logger.warning(f"No blast input was provide, just copying input file.")
with open(args.contigs, "r") as contigs_file:
contig_content = contigs_file.read()
with open(f"{args.prefix}_withref.fa", "w") as f:
f.write(contig_content)
return 0

if not args.blast.is_file():
logger.error(f"The given input file {args.blast} was not found!")
sys.exit(2)
Expand All @@ -193,6 +202,8 @@ def main(argv=None):

write_hits(df_filter, args.contigs, args.references, args.prefix)

return 0


if __name__ == "__main__":
sys.exit(main())
18 changes: 14 additions & 4 deletions bin/custom_multiqc_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,24 @@ def concat_table_files(table_files, **kwargs):
Returns:
pandas.DataFrame: The concatenated dataframe.
"""
df = pd.concat(
[
try:
valid_dfs = [
read_file_to_dataframe(file, **kwargs)
for file in table_files
if check_file_exists(file)
]
)
return df

if not valid_dfs:
logging.warning(f"Warning concatenating files: {table_files}")
logging.warning("No valid files found to concatenate.")
return pd.DataFrame()

df = pd.concat(valid_dfs)
return df

except ValueError as e:
logging.warning(f"Warning concatenating files: {table_files}")
return pd.DataFrame()


def read_in_quast(table_files):
Expand Down
1 change: 1 addition & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,7 @@ process {
}

withName: RENAME_FASTA_HEADER_SINGLETON {
ext.prefix = { "${meta.id}_singleton" } // DON'T CHANGE
publishDir = [
enabled: false
]
Expand Down
2 changes: 1 addition & 1 deletion conf/tests/test_fail_mapped.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ params {
mapping_constrains = "${projectDir}/assets/samplesheets/mapping_constrains_fail.tsv"

min_mapped_reads = 100
intermediate_mapping_stats = true
intermediate_mapping_stats = true
skip_checkv = true
}

Expand Down
10 changes: 6 additions & 4 deletions modules/local/blast_filter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ process BLAST_FILTER {
'biocontainers/mulled-v2-949aaaddebd054dc6bded102520daff6f0f93ce6:aa2a3707bfa0550fee316844baba7752eaab7802-0' }"

input:
tuple val(meta), path(blast), path(contigs)
tuple val(meta), path(blast)
tuple val(meta), path(contigs)
tuple val(meta2), path(db)

output:
tuple val(meta), path("*.hits.txt") , emit: hits
tuple val(meta), path("*.hits.txt") , emit: hits, optional: true
tuple val(meta), path("*.fa") , emit: sequence
tuple val(meta), path("*.filter.tsv"), emit: filter
tuple val(meta), path("*.filter.tsv"), emit: filter, optional: true
path "versions.yml" , emit: versions

when:
Expand All @@ -23,10 +24,11 @@ process BLAST_FILTER {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def blast_command = blast ? "-i ${blast}" : ""
"""
blast_filter.py \\
$args \\
-i ${blast} \\
${blast_command} \\
-c ${contigs} \\
-r ${db} \\
-p ${prefix}
Expand Down
12 changes: 9 additions & 3 deletions subworkflows/local/fasta_blast_refsel.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,17 @@ workflow FASTA_BLAST_REFSEL {
// Filter out false positve hits that based on query length, alignment length, identity, e-score & bit-score
ch_blast_txt
.hits
.join(fasta, by:[0], remainder:false)
.set{ hits_contigs }
.join(fasta, by:[0], remainder:true)
.multiMap{
meta, txt, fasta ->
hits : [meta, txt ? txt : []]
contigs : [meta, fasta]
}
.set{input_blast_filter}

BLAST_FILTER (
hits_contigs,
input_blast_filter.hits,
input_blast_filter.contigs,
blast_db_fasta
)
ch_versions = ch_versions.mix(BLAST_FILTER.out.versions.first())
Expand Down
Loading