Skip to content

Commit

Permalink
Merge pull request #141 from Joon-Klaps/no-ref-hits
Browse files Browse the repository at this point in the history
Add logic to allow samples with no reference hits to be analysed
  • Loading branch information
Joon-Klaps committed Aug 8, 2024
2 parents 7e9367e + d5c6265 commit 680be05
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Initial release of Joon-Klaps/viralgenie, created with the [nf-core](https://nf-
- Add read & contig decomplexification using prinseq++ ([#133](https://github.com/Joon-Klaps/viralgenie/pull/133))
- Add option to filter contig clusters based on cumulative read coverage ([#138](https://github.com/Joon-Klaps/viralgenie/pull/138))
- Adding mash-screen output to result table ([#140](https://github.com/Joon-Klaps/viralgenie/pull/140))
- Add logic to allow samples with no reference hits to be analysed ([#141](https://github.com/Joon-Klaps/viralgenie/pull/141))

### `Fixed`

Expand Down
11 changes: 11 additions & 0 deletions bin/blast_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,15 @@ def main(argv=None):
"""Coordinate argument parsing and program execution."""
args = parse_args(argv)
logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")

if args.blast is None and args.contigs.is_file():
logger.warning(f"No blast input was provide, just copying input file.")
with open(args.contigs, "r") as contigs_file:
contig_content = contigs_file.read()
with open(f"{args.prefix}_withref.fa", "w") as f:
f.write(contig_content)
return 0

if not args.blast.is_file():
logger.error(f"The given input file {args.blast} was not found!")
sys.exit(2)
Expand All @@ -193,6 +202,8 @@ def main(argv=None):

write_hits(df_filter, args.contigs, args.references, args.prefix)

return 0


if __name__ == "__main__":
sys.exit(main())
18 changes: 14 additions & 4 deletions bin/custom_multiqc_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,24 @@ def concat_table_files(table_files, **kwargs):
Returns:
pandas.DataFrame: The concatenated dataframe.
"""
df = pd.concat(
[
try:
valid_dfs = [
read_file_to_dataframe(file, **kwargs)
for file in table_files
if check_file_exists(file)
]
)
return df

if not valid_dfs:
logging.warning(f"Warning concatenating files: {table_files}")
logging.warning("No valid files found to concatenate.")
return pd.DataFrame()

df = pd.concat(valid_dfs)
return df

except ValueError as e:
logging.warning(f"Warning concatenating files: {table_files}")
return pd.DataFrame()


def read_in_quast(table_files):
Expand Down
1 change: 1 addition & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,7 @@ process {
}

withName: RENAME_FASTA_HEADER_SINGLETON {
ext.prefix = { "${meta.id}_singleton" } // DON'T CHANGE
publishDir = [
enabled: false
]
Expand Down
2 changes: 1 addition & 1 deletion conf/tests/test_fail_mapped.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ params {
mapping_constrains = "${projectDir}/assets/samplesheets/mapping_constrains_fail.tsv"

min_mapped_reads = 100
intermediate_mapping_stats = true
intermediate_mapping_stats = true
skip_checkv = true
}

Expand Down
10 changes: 6 additions & 4 deletions modules/local/blast_filter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ process BLAST_FILTER {
'biocontainers/mulled-v2-949aaaddebd054dc6bded102520daff6f0f93ce6:aa2a3707bfa0550fee316844baba7752eaab7802-0' }"

input:
tuple val(meta), path(blast), path(contigs)
tuple val(meta), path(blast)
tuple val(meta), path(contigs)
tuple val(meta2), path(db)

output:
tuple val(meta), path("*.hits.txt") , emit: hits
tuple val(meta), path("*.hits.txt") , emit: hits, optional: true
tuple val(meta), path("*.fa") , emit: sequence
tuple val(meta), path("*.filter.tsv"), emit: filter
tuple val(meta), path("*.filter.tsv"), emit: filter, optional: true
path "versions.yml" , emit: versions

when:
Expand All @@ -23,10 +24,11 @@ process BLAST_FILTER {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def blast_command = blast ? "-i ${blast}" : ""
"""
blast_filter.py \\
$args \\
-i ${blast} \\
${blast_command} \\
-c ${contigs} \\
-r ${db} \\
-p ${prefix}
Expand Down
12 changes: 9 additions & 3 deletions subworkflows/local/fasta_blast_refsel.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,17 @@ workflow FASTA_BLAST_REFSEL {
// Filter out false positve hits that based on query length, alignment length, identity, e-score & bit-score
ch_blast_txt
.hits
.join(fasta, by:[0], remainder:false)
.set{ hits_contigs }
.join(fasta, by:[0], remainder:true)
.multiMap{
meta, txt, fasta ->
hits : [meta, txt ? txt : []]
contigs : [meta, fasta]
}
.set{input_blast_filter}

BLAST_FILTER (
hits_contigs,
input_blast_filter.hits,
input_blast_filter.contigs,
blast_db_fasta
)
ch_versions = ch_versions.mix(BLAST_FILTER.out.versions.first())
Expand Down

0 comments on commit 680be05

Please sign in to comment.