Skip to content

Commit

Permalink
Watchdog update, jbrowse mode
Browse files Browse the repository at this point in the history
  • Loading branch information
weber8thomas committed Jan 2, 2024
1 parent 544673b commit 7d5db5e
Show file tree
Hide file tree
Showing 6 changed files with 491 additions and 222 deletions.
537 changes: 318 additions & 219 deletions watchdog_pipeline/watchdog_pipeline.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions workflow/rules/aggregate_fct.smk
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def selected_input_bam(wildcards):
)



def selected_input_bai(wildcards):
"""
Function based on checkpoint filter_bad_cells_from_mosaic_count
Expand Down
19 changes: 19 additions & 0 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,11 @@ if config["scNOVA"] is True:
labels_path = "{folder}/{sample}/cell_selection/labels.tsv".format(
folder=config["data_location"], sample=sample
)

assert os.path.isfile(
labels_path
), "Ashleys labels were not computed yet, use first ashleys mode to perform cell selection"

# print(labels_path)
if os.path.exists(labels_path):
# Read df
Expand Down Expand Up @@ -843,6 +848,20 @@ def get_all_plots(wildcards):
sample=wildcards.sample,
),
)
l_outputs.extend(
expand(
"{folder}/{sample}/plots/UCSC/{sample}.bedUCSC.gz",
folder=config["data_location"],
sample=wildcards.sample,
),
)
l_outputs.extend(
expand(
"{folder}/{sample}/plots/JBROWSE/{sample}.ok",
folder=config["data_location"],
sample=wildcards.sample,
),
)

# Stats section

Expand Down
23 changes: 22 additions & 1 deletion workflow/rules/plots.smk
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,27 @@ rule scTRIP_multiplot_aggr:
mem_mb=get_mem_mb,


rule jbrowse_genome_browser_file:
input:
counts="{folder}/{sample}/counts/{sample}.txt.gz",
stringent_calls=(
"{folder}/{sample}/mosaiclassifier/sv_calls/stringent_filterTRUE.tsv"
),
output:
"{folder}/{sample}/plots/JBROWSE/{sample}.ok",
log:
"{folder}/log/JBROWSE/{sample}.log",
conda:
"/g/korbel2/weber/miniconda3/envs/genome_browsing"
# "../envs/genome_browsing.yaml"
container:
None
resources:
mem_mb=get_mem_mb,
shell:
"python workflow/scripts/genome_browsing/generate_jbrowse_tracks.py {input.counts} {input.stringent_calls} {output} > {log}"


rule ucsc_genome_browser_file:
input:
counts="{folder}/{sample}/counts/{sample}.txt.gz",
Expand All @@ -346,7 +367,7 @@ rule ucsc_genome_browser_file:
output:
"{folder}/{sample}/plots/UCSC/{sample}.bedUCSC.gz",
log:
"{folder}/log/ucsc_genome_browser_file/{sample}.bedUCSC.gz",
"{folder}/log/ucsc_genome_browser_file/{sample}.bedUCSC.log",
conda:
"../envs/mc_base.yaml"
resources:
Expand Down
113 changes: 113 additions & 0 deletions workflow/scripts/genome_browsing/generate_jbrowse_tracks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import pyBigWig
import pandas as pd
import sys, os
import subprocess

# Assuming sv_cell_df already contains the 'sv_call_name' column


# Function to map sv_call_name to color
def map_color(sv_call_name):
colors = {
"none": "#F8F8F8",
"del_h1": "#77AADD",
"del_h2": "#4477AA",
"del_hom": "#114477",
"dup_h1": "#CC99BB",
"dup_h2": "#AA4488",
"dup_hom": "#771155",
"inv_h1": "#DDDD77",
"inv_h2": "#AAAA44",
"inv_hom": "#777711",
"idup_h1": "#DDAA77",
"idup_h2": "#AA7744",
"complex": "#774411",
}
return colors.get(sv_call_name, "#000000") # Default to black if not found


# Load read counts data
counts_file_init = pd.read_csv(sys.argv[1], sep="\t", compression="gzip")
chrom_size_df = counts_file_init.groupby("chrom")["end"].max().reset_index()

# Load SV data
sv_df = pd.read_csv(sys.argv[2], sep="\t")

output_file = sys.argv[3]
output_dir = "/".join(output_file.split("/")[:-1])

os.makedirs(output_dir, exist_ok=True)


# Process each cell
for cell in sorted(counts_file_init.cell.unique()):
print(cell)
# Filter read counts for current cell
counts_file = counts_file_init[counts_file_init["cell"] == cell]
print(counts_file)
counts_file["w"] = counts_file["w"].astype(float)
counts_file["c"] = counts_file["c"].astype(float) * -1

# Create BigWig for Watson counts
bw = pyBigWig.open(f"{output_dir}/{cell}-W.bigWig", "w")
bw.addHeader(list(chrom_size_df.itertuples(index=False, name=None)))
bw.addEntries(
counts_file.chrom.values.tolist(),
counts_file.start.values.tolist(),
ends=counts_file.end.values.tolist(),
values=counts_file.w.values.tolist(),
)
bw.close()

# Create BigWig for Crick counts
bw = pyBigWig.open(f"{output_dir}/{cell}-C.bigWig", "w")
bw.addHeader(list(chrom_size_df.itertuples(index=False, name=None)))
bw.addEntries(
counts_file.chrom.values.tolist(),
counts_file.start.values.tolist(),
ends=counts_file.end.values.tolist(),
values=counts_file.c.values.tolist(),
)
bw.close()

# Process SV data for current cell
sv_cell_df = sv_df[sv_df["cell"] == cell]
# sv_cell_df = sv_cell_df[["chrom", "start", "end", "sv_call_name"]]

# Add a color column
sv_cell_df["color"] = sv_cell_df["sv_call_name"].apply(map_color)

# Select the relevant columns for the BED file
sv_cell_df = sv_cell_df[
[
"chrom",
"start",
"end",
"sv_call_name",
"color",
"sv_call_haplotype",
"llr_to_ref",
"af",
]
]

# Write to BED file
sv_filename = f"{output_dir}/{cell}-SV.bed"
sv_cell_df.to_csv(sv_filename, sep="\t", index=False, header=False)

# Compress the file using bgzip
compressed_filename = sv_filename + ".gz"
subprocess.run(["bgzip", "-c", sv_filename], stdout=open(compressed_filename, "wb"))

# Index the compressed file using tabix
subprocess.run(["tabix", "-p", "bed", compressed_filename])

# Create BigWig for SV - this step might need adjustment based on SV data format
# bw = pyBigWig.open(sv_filename, "w")
# bw.addHeader(list(chrom_size_df.itertuples(index=False, name=None)))
# Additional logic for adding SV data to BigWig may be required here
# bw.close()

from pathlib import Path

Path(output_file).touch()
20 changes: 18 additions & 2 deletions workflow/scripts/strandphaser_scripts/prepare_strandphaser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
print("[General]", file=f)
print("numCPU = 1", file=f)
print("chromosomes = '" + snakemake.wildcards.chrom + "'", file=f)
print("pairedEndReads = '" + [e.strip() for e in open(snakemake.input.single_paired_end_detect, "r").readlines()][0] + "'", file=f)
print(
"pairedEndReads = '"
+ [
e.strip()
for e in open(snakemake.input.single_paired_end_detect, "r").readlines()
][0]
+ "'",
file=f,
)
print("min.mapq = 10", file=f)
print("", file=f)
print("[StrandPhaseR]", file=f)
Expand All @@ -16,5 +24,13 @@
print("compareSingleCells = FALSE", file=f)
print("callBreaks = FALSE", file=f)
print("exportVCF = '", snakemake.wildcards.sample, "'", sep="", file=f)
print("bsGenome = '", snakemake.config["references_data"][snakemake.config["reference"]]["R_reference"], "'", sep="", file=f)
print(
"bsGenome = '",
snakemake.config["references_data"][snakemake.config["reference"]][
"R_reference"
],
"'",
sep="",
file=f,
)
# print("bsGenome = '", snakemake.config["R_reference"], "'", sep="", file=f)

0 comments on commit 7d5db5e

Please sign in to comment.