Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.1.2: Docker fix #43

Merged
merged 15 commits into from
Jun 28, 2023
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ workflow/test.txt

.tests/data/RPE-BM510/all_bak/
.tests/data/RPE-BM510/fastq/
.tests/data/RPE-BM510/multiqc/
.tests/data/RPE-BM510/predictions/
.tests/data/RPE-BM510/cell_selection/
.tests/data/RPE-BM510/all_BM/
Expand Down Expand Up @@ -208,3 +209,5 @@ workflow/data/scNOVA_zenodo_filelist.txt
workflow/data/mapping_counts_allchrs_hg38.txt
workflow/data/arbigent/scTRIP_segmentation.bed
!workflow/data/GC/*.txt.gz
.tests/data_CHR17/RPE-BM510/multiqc/
.tests/data_CHR17/RPE-BM510/bam_ashleys/
2 changes: 1 addition & 1 deletion .tests/config/simple_config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 2.1.1
version: 2.1.2
ashleys_pipeline_version: 2.1.1
#######################################
# MOSAICATCHER CONFIGURATION FILE. #
Expand Down
103 changes: 60 additions & 43 deletions afac/ucsc_vizu.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,58 +25,75 @@ def create_bed_row(row, category, color):
return f"{chrom}\t{start}\t{end}\t{category}\t{score}\t{strand}\t{start}\t{end}\t{color}\n"


def process_file(input_file, df_sv, output):
# Extract cell name
cell_name = os.path.basename(input_file).replace(".txt.percell.gz", "")
# def process_file(input_file, df_sv, output):

# Read the input gzipped file
df = pd.read_csv(input_file, sep="\t")

# Create separate DataFrames for 'c' and 'w' columns
df_c = df[["chrom", "start", "end", "c"]]
df_c["c"] = df_c["c"] * -1
df_w = df[["chrom", "start", "end", "w"]]

# Filter df_sv
df_sv_cell = df_sv.loc[df_sv["cell"] == cell_name]

with gzip.open(output, "at") as output_file:
output_file.write(
f"track type=bedGraph name={cell_name}_W maxHeightPixels=40 description=BedGraph_{cell_name}_w.sort.mdup.bam_allChr visibility=full color=244,163,97\n"
)
df_w.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")

output_file.write(
f"track type=bedGraph name={cell_name}_C maxHeightPixels=40 description=BedGraph_{cell_name}_c.sort.mdup.bam_allChr visibility=full color=102,139,138\n"
)
df_c.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")

output_file.write(f'track name="{cell_name}_SV" description="SV_call_name for cell {cell_name}" visibility=squish itemRgb="On"\n')
for _, row in df_sv_cell.iterrows():
bed_row = create_bed_row(row, row["sv_call_name"], row["color"])
output_file.write(bed_row)


def main(input_counts_folder, input_sv_file, output_file):
def main(input_counts, input_sv_file_stringent, input_sv_file_lenient, output):
# Concatenate the W, C, and SV_call_name DataFrames
df_sv = pd.read_csv(input_sv_file, sep="\t") # Replace with your input TSV file containing SV_call_name information
df_sv["color"] = df_sv["sv_call_name"].map(colors)
df_sv = df_sv.sort_values(by=["cell"])
df_sv_stringent = pd.read_csv(input_sv_file_stringent, sep="\t")
df_sv_stringent["color"] = df_sv_stringent["sv_call_name"].map(colors)
df_sv_stringent = df_sv_stringent.sort_values(by=["cell"])
df_sv_lenient = pd.read_csv(input_sv_file_lenient, sep="\t")
df_sv_lenient["color"] = df_sv_lenient["sv_call_name"].map(colors)
df_sv_lenient = df_sv_lenient.sort_values(by=["cell"])

# Get the list of input files in the input folder
input_files = glob.glob(os.path.join(input_counts_folder, "*.txt.percell.gz"))
# input_files = glob.glob(os.path.join(input_counts_folder, "*.txt.percell.gz"))
df_mosaic = pd.read_csv(input_counts, sep="\t")
cell_list = df_mosaic.cell.unique().tolist()
print(df_mosaic)
print(cell_list)

# Process each input file
for input_file in sorted(input_files):
process_file(input_file, df_sv, output_file)
for cell_name in sorted(cell_list):
# process_file(input_file, df_sv, output_file)

# Extract cell name
# cell_name = os.path.basename(input_file).replace(".txt.percell.gz", "")

# Read the input gzipped file
# df = pd.read_csv(input_file, sep="\t")
df = df_mosaic.loc[df_mosaic["cell"] == cell_name]

# Create separate DataFrames for 'c' and 'w' columns
df_c = df[["chrom", "start", "end", "c"]]
df_c["c"] = df_c["c"] * -1
df_w = df[["chrom", "start", "end", "w"]]

# Filter df_sv
df_sv_cell_stringent = df_sv_stringent.loc[df_sv_stringent["cell"] == cell_name]
df_sv_cell_lenient = df_sv_lenient.loc[df_sv_lenient["cell"] == cell_name]

with gzip.open(output, "at") as output_file:
output_file.write(
f"track type=bedGraph name={cell_name}_W maxHeightPixels=40 description=BedGraph_{cell_name}_w.sort.mdup.bam_allChr visibility=full color=244,163,97\n"
)
df_w.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")

output_file.write(
f"track type=bedGraph name={cell_name}_C maxHeightPixels=40 description=BedGraph_{cell_name}_c.sort.mdup.bam_allChr visibility=full color=102,139,138\n"
)
df_c.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")

output_file.write(f'track name="{cell_name}_SV_stringent" description="Stringent - SV_call_name for cell {cell_name}" visibility=squish itemRgb="On"\n')
for _, row in df_sv_cell_stringent.iterrows():
bed_row = create_bed_row(row, row["sv_call_name"], row["color"])
output_file.write(bed_row)
# output_file.write(f'track name="{cell_name}_SV_lenient" description="Lenient - SV_call_name for cell {cell_name}" visibility=squish itemRgb="On"\n')
# for _, row in df_sv_cell_lenient.iterrows():
# bed_row = create_bed_row(row, row["sv_call_name"], row["color"])
# output_file.write(bed_row)


if __name__ == "__main__":
if len(sys.argv) != 4:
print("Usage: python script.py <input_counts_folder> <input_sv_file> <output_file>")
if len(sys.argv) != 5:
print("Usage: python script.py <input_counts> <input_sv_stringent_file> <input_sv_lenient_file> <output_file>")
# print("Usage: python script.py <input_counts> <input_sv_stringent_file> <output_file>")
sys.exit(1)

input_counts_folder = sys.argv[1]
input_sv_file = sys.argv[2]
output_file = sys.argv[3]
main(input_counts_folder, input_sv_file, output_file)
input_counts = sys.argv[1]
input_sv_stringent_file = sys.argv[2]
input_sv_lenient_file = sys.argv[3]
output_file = sys.argv[4]
# main(input_counts, input_sv_stringent_file, output_file)
main(input_counts, input_sv_stringent_file, input_sv_lenient_file, output_file)
2 changes: 1 addition & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 2.1.1
version: 2.1.2
ashleys_pipeline_version: 2.1.1
#######################################
# MOSAICATCHER CONFIGURATION FILE #
Expand Down
Binary file modified docs/images/figure_pipeline.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/plots/alfred_devi.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/plots/alfred_dist.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
28 changes: 17 additions & 11 deletions github-actions-runner/Dockerfile-2.1.1.dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM condaforge/mambaforge:latest
LABEL io.github.snakemake.containerized="true"
LABEL io.github.snakemake.conda_env_hash="f26d158ef605d3d70371155d9090a3c58ef6bd9e8f8e6b73731d17192f7e70ab"
LABEL io.github.snakemake.conda_env_hash="55c177ec267b6cafb7c46af6bd81eceaffe243d4e28a2a2434e5abddc1e8cff0"

# Step 1: Retrieve conda environments

# Conda environment:
# source: ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml
# source: https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_base.yaml
# prefix: /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a
# name: ashleys_base
# channels:
Expand All @@ -25,10 +25,10 @@ LABEL io.github.snakemake.conda_env_hash="f26d158ef605d3d70371155d9090a3c58ef6bd
# # MULTIQC
# - multiqc
RUN mkdir -p /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a
COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a/environment.yaml
ADD https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_base.yaml /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a/environment.yaml

# Conda environment:
# source: ../ashleys-qc-pipeline/workflow/envs/ashleys_rtools.yaml
# source: https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_rtools.yaml
# prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
# name: rtools
# channels:
Expand Down Expand Up @@ -81,7 +81,7 @@ COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml /conda-envs/d7ae7fcf
# # SOLVE R lib issue
# - r-stringi=1.7.12
RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml
ADD https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml

# Conda environment:
# source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml
Expand Down Expand Up @@ -154,7 +154,7 @@ COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e7802

# Conda environment:
# source: workflow/envs/rtools.yaml
# prefix: /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4
# prefix: /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff
# name: rtools
# channels:
# - bioconda
Expand All @@ -167,8 +167,9 @@ COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e7802
# # ###############
# - bioconductor-biocparallel
# - bioconductor-bsgenome
# # - bioconductor-bsgenome.hsapiens.ucsc.hg19
# # - bioconductor-bsgenome.hsapiens.ucsc.hg38
# - bioconductor-bsgenome.hsapiens.ucsc.hg19
# - bioconductor-bsgenome.hsapiens.ucsc.hg38
# - bioconductor-bsgenome.mmusculus.ucsc.mm10
# - bioconductor-complexheatmap
# # - bioconductor-fastseg
# - bioconductor-genomicalignments
Expand Down Expand Up @@ -206,8 +207,8 @@ COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e7802
# - r-tidyr
# - r-ggbeeswarm
# - r-pheatmap
RUN mkdir -p /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4
COPY workflow/envs/rtools.yaml /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/environment.yaml
RUN mkdir -p /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff
COPY workflow/envs/rtools.yaml /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff/environment.yaml

# Step 2: Generate conda environments

Expand All @@ -217,5 +218,10 @@ RUN mamba env create --prefix /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a --fil
mamba env create --prefix /conda-envs/08d4368302a4bdf7eda6b536495efe7d --file /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml && \
mamba env create --prefix /conda-envs/c80307395eddf442c2fb6870f40d822b --file /conda-envs/c80307395eddf442c2fb6870f40d822b/environment.yaml && \
mamba env create --prefix /conda-envs/f251d84cdc9f25d0e14b48e780261d66 --file /conda-envs/f251d84cdc9f25d0e14b48e780261d66/environment.yaml && \
mamba env create --prefix /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4 --file /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/environment.yaml && \
mamba env create --prefix /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff --file /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff/environment.yaml && \
mamba clean --all -y


# # Custom Bsgenome R install
# COPY github-actions-runner/bioconductor_install.R /conda-envs/
# RUN chmod -R 0777 /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/lib/R/library && /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/bin/Rscript /conda-envs/bioconductor_install.R
Loading