friendsofstrandseq · weber8thomas · Jun 28, 2023 · Nov 7, 2022 · Apr 27, 2023 · Apr 27, 2023
diff --git a/.gitignore b/.gitignore
@@ -102,6 +102,7 @@ workflow/test.txt
 
 .tests/data/RPE-BM510/all_bak/
 .tests/data/RPE-BM510/fastq/
+.tests/data/RPE-BM510/multiqc/
 .tests/data/RPE-BM510/predictions/
 .tests/data/RPE-BM510/cell_selection/
 .tests/data/RPE-BM510/all_BM/
@@ -208,3 +209,5 @@ workflow/data/scNOVA_zenodo_filelist.txt
 workflow/data/mapping_counts_allchrs_hg38.txt
 workflow/data/arbigent/scTRIP_segmentation.bed
 !workflow/data/GC/*.txt.gz
+.tests/data_CHR17/RPE-BM510/multiqc/
+.tests/data_CHR17/RPE-BM510/bam_ashleys/
diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml
@@ -1,4 +1,4 @@
-version: 2.1.1
+version: 2.1.2
 ashleys_pipeline_version: 2.1.1
 #######################################
 #   MOSAICATCHER CONFIGURATION FILE.  #

diff --git a/afac/ucsc_vizu.py b/afac/ucsc_vizu.py
@@ -25,58 +25,75 @@ def create_bed_row(row, category, color):
     return f"{chrom}\t{start}\t{end}\t{category}\t{score}\t{strand}\t{start}\t{end}\t{color}\n"
 
 
-def process_file(input_file, df_sv, output):
-    # Extract cell name
-    cell_name = os.path.basename(input_file).replace(".txt.percell.gz", "")
+# def process_file(input_file, df_sv, output):
 
-    # Read the input gzipped file
-    df = pd.read_csv(input_file, sep="\t")
 
-    # Create separate DataFrames for 'c' and 'w' columns
-    df_c = df[["chrom", "start", "end", "c"]]
-    df_c["c"] = df_c["c"] * -1
-    df_w = df[["chrom", "start", "end", "w"]]
-
-    # Filter df_sv
-    df_sv_cell = df_sv.loc[df_sv["cell"] == cell_name]
-
-    with gzip.open(output, "at") as output_file:
-        output_file.write(
-            f"track type=bedGraph name={cell_name}_W maxHeightPixels=40 description=BedGraph_{cell_name}_w.sort.mdup.bam_allChr visibility=full color=244,163,97\n"
-        )
-        df_w.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")
-
-        output_file.write(
-            f"track type=bedGraph name={cell_name}_C maxHeightPixels=40 description=BedGraph_{cell_name}_c.sort.mdup.bam_allChr visibility=full color=102,139,138\n"
-        )
-        df_c.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")
-
-        output_file.write(f'track name="{cell_name}_SV" description="SV_call_name for cell {cell_name}" visibility=squish itemRgb="On"\n')
-        for _, row in df_sv_cell.iterrows():
-            bed_row = create_bed_row(row, row["sv_call_name"], row["color"])
-            output_file.write(bed_row)
-
-
-def main(input_counts_folder, input_sv_file, output_file):
+def main(input_counts, input_sv_file_stringent, input_sv_file_lenient, output):
     # Concatenate the W, C, and SV_call_name DataFrames
-    df_sv = pd.read_csv(input_sv_file, sep="\t")  # Replace with your input TSV file containing SV_call_name information
-    df_sv["color"] = df_sv["sv_call_name"].map(colors)
-    df_sv = df_sv.sort_values(by=["cell"])
+    df_sv_stringent = pd.read_csv(input_sv_file_stringent, sep="\t") 
+    df_sv_stringent["color"] = df_sv_stringent["sv_call_name"].map(colors)
+    df_sv_stringent = df_sv_stringent.sort_values(by=["cell"])
+    df_sv_lenient = pd.read_csv(input_sv_file_lenient, sep="\t") 
+    df_sv_lenient["color"] = df_sv_lenient["sv_call_name"].map(colors)
+    df_sv_lenient = df_sv_lenient.sort_values(by=["cell"])
 
     # Get the list of input files in the input folder
-    input_files = glob.glob(os.path.join(input_counts_folder, "*.txt.percell.gz"))
+    # input_files = glob.glob(os.path.join(input_counts_folder, "*.txt.percell.gz"))
+    df_mosaic = pd.read_csv(input_counts, sep="\t")
+    cell_list = df_mosaic.cell.unique().tolist()
+    print(df_mosaic)
+    print(cell_list)
 
     # Process each input file
-    for input_file in sorted(input_files):
-        process_file(input_file, df_sv, output_file)
+    for cell_name in sorted(cell_list):
+        # process_file(input_file, df_sv, output_file)
+
+        # Extract cell name
+        # cell_name = os.path.basename(input_file).replace(".txt.percell.gz", "")
+
+        # Read the input gzipped file
+        # df = pd.read_csv(input_file, sep="\t")
+        df = df_mosaic.loc[df_mosaic["cell"] == cell_name]
+
+        # Create separate DataFrames for 'c' and 'w' columns
+        df_c = df[["chrom", "start", "end", "c"]]
+        df_c["c"] = df_c["c"] * -1
+        df_w = df[["chrom", "start", "end", "w"]]
+
+        # Filter df_sv
+        df_sv_cell_stringent = df_sv_stringent.loc[df_sv_stringent["cell"] == cell_name]
+        df_sv_cell_lenient = df_sv_lenient.loc[df_sv_lenient["cell"] == cell_name]
+
+        with gzip.open(output, "at") as output_file:
+            output_file.write(
+                f"track type=bedGraph name={cell_name}_W maxHeightPixels=40 description=BedGraph_{cell_name}_w.sort.mdup.bam_allChr visibility=full color=244,163,97\n"
+            )
+            df_w.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")
+
+            output_file.write(
+                f"track type=bedGraph name={cell_name}_C maxHeightPixels=40 description=BedGraph_{cell_name}_c.sort.mdup.bam_allChr visibility=full color=102,139,138\n"
+            )
+            df_c.to_csv(output_file, compression="gzip", sep="\t", header=False, index=False, mode="a")
+
+            output_file.write(f'track name="{cell_name}_SV_stringent" description="Stringent - SV_call_name for cell {cell_name}" visibility=squish itemRgb="On"\n')
+            for _, row in df_sv_cell_stringent.iterrows():
+                bed_row = create_bed_row(row, row["sv_call_name"], row["color"])
+                output_file.write(bed_row)
+            # output_file.write(f'track name="{cell_name}_SV_lenient" description="Lenient - SV_call_name for cell {cell_name}" visibility=squish itemRgb="On"\n')
+            # for _, row in df_sv_cell_lenient.iterrows():
+            #     bed_row = create_bed_row(row, row["sv_call_name"], row["color"])
+            #     output_file.write(bed_row)
 
 
 if __name__ == "__main__":
-    if len(sys.argv) != 4:
-        print("Usage: python script.py <input_counts_folder>  <input_sv_file> <output_file>")
+    if len(sys.argv) != 5:
+        print("Usage: python script.py <input_counts>  <input_sv_stringent_file> <input_sv_lenient_file> <output_file>")
+        # print("Usage: python script.py <input_counts>  <input_sv_stringent_file>  <output_file>")
         sys.exit(1)
 
-    input_counts_folder = sys.argv[1]
-    input_sv_file = sys.argv[2]
-    output_file = sys.argv[3]
-    main(input_counts_folder, input_sv_file, output_file)
+    input_counts = sys.argv[1]
+    input_sv_stringent_file = sys.argv[2]
+    input_sv_lenient_file = sys.argv[3]
+    output_file = sys.argv[4]
+    # main(input_counts, input_sv_stringent_file, output_file)
+    main(input_counts, input_sv_stringent_file, input_sv_lenient_file, output_file)
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,4 +1,4 @@
-version: 2.1.1
+version: 2.1.2
 ashleys_pipeline_version: 2.1.1
 #######################################
 #   MOSAICATCHER CONFIGURATION FILE   #

diff --git a/docs/images/figure_pipeline.png b/docs/images/figure_pipeline.png
diff --git a/docs/images/plots/alfred_devi.png b/docs/images/plots/alfred_devi.png
diff --git a/docs/images/plots/alfred_dist.png b/docs/images/plots/alfred_dist.png
diff --git a/github-actions-runner/Dockerfile-2.1.1.dockerfile b/github-actions-runner/Dockerfile-2.1.1.dockerfile
@@ -1,11 +1,11 @@
 FROM condaforge/mambaforge:latest
 LABEL io.github.snakemake.containerized="true"
-LABEL io.github.snakemake.conda_env_hash="f26d158ef605d3d70371155d9090a3c58ef6bd9e8f8e6b73731d17192f7e70ab"
+LABEL io.github.snakemake.conda_env_hash="55c177ec267b6cafb7c46af6bd81eceaffe243d4e28a2a2434e5abddc1e8cff0"
 
 # Step 1: Retrieve conda environments
 
 # Conda environment:
-#   source: ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml
+#   source: https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_base.yaml
 #   prefix: /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a
 #   name: ashleys_base
 #   channels:
@@ -25,10 +25,10 @@ LABEL io.github.snakemake.conda_env_hash="f26d158ef605d3d70371155d9090a3c58ef6bd
 #     # MULTIQC
 #     - multiqc
 RUN mkdir -p /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a
-COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a/environment.yaml
+ADD https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_base.yaml /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a/environment.yaml
 
 # Conda environment:
-#   source: ../ashleys-qc-pipeline/workflow/envs/ashleys_rtools.yaml
+#   source: https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_rtools.yaml
 #   prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
 #   name: rtools
 #   channels:
@@ -81,7 +81,7 @@ COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml /conda-envs/d7ae7fcf
 #     # SOLVE R lib issue
 #     - r-stringi=1.7.12
 RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
-COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml
+ADD https://github.com/friendsofstrandseq/ashleys-qc-pipeline/raw/2.1.1/workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml
 
 # Conda environment:
 #   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml
@@ -154,7 +154,7 @@ COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e7802
 
 # Conda environment:
 #   source: workflow/envs/rtools.yaml
-#   prefix: /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4
+#   prefix: /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff
 #   name: rtools
 #   channels:
 #     - bioconda
@@ -167,8 +167,9 @@ COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e7802
 #     # ###############
 #     - bioconductor-biocparallel
 #     - bioconductor-bsgenome
-#     # - bioconductor-bsgenome.hsapiens.ucsc.hg19
-#     # - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     - bioconductor-bsgenome.hsapiens.ucsc.hg19
+#     - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     - bioconductor-bsgenome.mmusculus.ucsc.mm10
 #     - bioconductor-complexheatmap
 #     # - bioconductor-fastseg
 #     - bioconductor-genomicalignments
@@ -206,8 +207,8 @@ COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e7802
 #     - r-tidyr
 #     - r-ggbeeswarm
 #     - r-pheatmap
-RUN mkdir -p /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4
-COPY workflow/envs/rtools.yaml /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/environment.yaml
+RUN mkdir -p /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff
+COPY workflow/envs/rtools.yaml /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff/environment.yaml
 
 # Step 2: Generate conda environments
 
@@ -217,5 +218,10 @@ RUN mamba env create --prefix /conda-envs/d7ae7fcf4adb54129dbf1b1e84ef888a --fil
     mamba env create --prefix /conda-envs/08d4368302a4bdf7eda6b536495efe7d --file /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml && \
     mamba env create --prefix /conda-envs/c80307395eddf442c2fb6870f40d822b --file /conda-envs/c80307395eddf442c2fb6870f40d822b/environment.yaml && \
     mamba env create --prefix /conda-envs/f251d84cdc9f25d0e14b48e780261d66 --file /conda-envs/f251d84cdc9f25d0e14b48e780261d66/environment.yaml && \
-    mamba env create --prefix /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4 --file /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/environment.yaml && \
+    mamba env create --prefix /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff --file /conda-envs/5eb5026d8b42b407b8711e037d9cc4ff/environment.yaml && \
     mamba clean --all -y
+
+
+# # Custom Bsgenome R install
+# COPY github-actions-runner/bioconductor_install.R /conda-envs/
+# RUN chmod -R 0777 /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/lib/R/library && /conda-envs/91d5ffe2d429bcebd6bab78e9ca3a1d4/bin/Rscript /conda-envs/bioconductor_install.R