Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) Snakemake workflow update #206

Merged
merged 16 commits into from
Feb 27, 2024
Merged
17 changes: 13 additions & 4 deletions workflows/download.smk
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
import os

from shared.functions import get_git_directory


# workflow specific setting
configfile: "example_configs/download_config.yaml"
# listed all the available datasets here
configfile: "path_configs/datasets.yaml"


print("Run Download Workflow")

# Attach the specific github directory here
GIT_DIR = get_git_directory(config)

# Leave only datasets
datasets = config.pop("datasets")


# Get all the dataset folder
def get_all_input(wildcards):
all_folder = []
for dataset in datasets:
for dataset in config["use_datasets"]:
all_folder.append(config["results_dir"] + "/" + dataset)
return all_folder


############## starting snakemake pipelines ##################


# Defining all output wanted from this snakemake
rule all:
input:
get_all_input,
Expand All @@ -29,8 +38,8 @@ rule download:
output:
dir=directory(config["results_dir"] + "/{dataset}"),
conda:
lambda wildcards: GIT_DIR + "/" + datasets[wildcards.dataset]["env"]
lambda wildcards: GIT_DIR + datasets[wildcards.dataset]["env"]
params:
script=lambda wildcards: GIT_DIR + "/" + datasets[wildcards.dataset]["script"],
script=lambda wildcards: GIT_DIR + datasets[wildcards.dataset]["script"],
shell:
"{params.script} -o {output.dir}"
10 changes: 8 additions & 2 deletions workflows/example_configs/download_config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
git_dir: /home/ubuntu/workspace/SpaceHack2023
results_dir: /home/ubuntu/tmp_data
# Github repo path, modify based on your env
git_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/SpaceHack2023
# General data folder. All datasets folder will be stored here
results_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow

# datasets to be downloaded. add/delete based on your need
use_datasets:
- "libd_dlpfc"
89 changes: 28 additions & 61 deletions workflows/example_configs/methods_config.yaml
Original file line number Diff line number Diff line change
@@ -1,62 +1,29 @@
data_dir: "/home/ubuntu/tmp_data/libd_dlpfc"
git_dir: /home/ubuntu/workspace/SpaceHack2023
seed: 42
technology: "Visium"
# Github repo path, modify based on your env
git_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/SpaceHack2023
# Dataset path, modify based on your env
data_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/libd_dlpfc

seed: 2023

# Methods to run for the pipeline, modify based on your need
use_methods:
- "STAGATE"
config_files:
spaGCN:
config_1: "config/config_1.json"
config_2: "config/config_2.json"
config_3: "config/config_3.json"
config_4: "config/config_4.json"
config_5: "config/config_5.json"
config_6: "config/config_6.json"
config_7: "config/config_7.json"
config_8: "config/config_8.json"
config_9: "config/config_9.json"
config_10: "config/config_10.json"
config_11: "config/config_11.json"
config_12: "config/config_12.json"
config_13: "config/config_13.json"
config_14: "config/config_14.json"
config_15: "config/config_15.json"
config_16: "config/config_16.json"
GraphST:
config_1: "config/config_1.json"
config_2: "config/config_2.json"
config_3: "config/config_3.json"
config_4: "config/config_4.json"
config_5: "config/config_5.json"
config_6: "config/config_6.json"
BANKSY:
config_1: "config/config_1.json"
meringue:
config_1: "config/config_1.json"
SCAN_IT:
config_1: "config.json"
scanpy:
config_1: "configs/config_1.json"
SpaceFlow:
config_1: "config/config.json"
SOTIP:
config_1: "config/config.json"
STAGATE:
config_1: "config/config_1.json"
config_2: "config/config_2.json"
config_3: "config/config_3.json"
config_4: "config/config_4.json"
config_5: "config/config_5.json"
config_6: "config/config_6.json"
config_7: "config/config_7.json"
config_8: "config/config_8.json"
config_9: "config/config_9.json"
config_10: "config/config_10.json"
config_11: "config/config_11.json"
config_12: "config/config_12.json"
config_13: "config/config_13.json"
config_14: "config/config_14.json"
config_15: "config/config_15.json"
config_16: "config/config_16.json"
config_17: "config/config_17.json"
config_18: "config/config_18.json"
# - "bass"
- "BayesSpace"
# - "DRSC"
# - "GraphST"
# - "SEDR"
# - "SOTIP"
# - "SpiceMix" # GPU access
# - "maple"
# - "precast"
# - "SC_MEB"
# - "spaGCN"
# - "stardust"
# - "DeepST"
# - "STAGATE" # res not n_clust
# - "scanpy"
# - "SpaceFlow"
# - "seurat" # config 5 TODO
# - "BANKSY"
# - "SCAN-IT" # buggy:data shape TODO
# - "meringue" # buggy:no library found??? TODO
24 changes: 22 additions & 2 deletions workflows/example_configs/metrics_config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,22 @@
data_dir: "/home/ubuntu/tmp_data/libd_dlpfc"
git_dir: /home/ubuntu/workspace/SpaceHack2023
# Github repo path, modify based on your env
git_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/SpaceHack2023
# Dataset path, modify based on your env
data_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/libd_dlpfc

use_metrics:
- "ARI"
# - "Completeness"
# - "Entropy"
# - "FMI"
# - "Homogeneity"
# - "MCC"
# - "NMI"
# - "domain-specific-f1"
# - "jaccard"
# - "V_measure" #Config, GT
# - "LISI" #Config, embed, GT
# - "cluster-specific-silhouette" #embed, no GT
# - "Calinski-Harabasz"
# - "Davies-Bouldin"
# - "CHAOS" #phyiscal coord only
# - "PAS" # buggy TODO
7 changes: 5 additions & 2 deletions workflows/example_configs/preprocessing_config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
data_dir: "/home/ubuntu/tmp_data/libd_dlpfc"
git_dir: /home/ubuntu/workspace/SpaceHack2023
# Github repo path, modify based on your env
git_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/SpaceHack2023
# Dataset path, modify based on your env
data_dir: /home/jovyan/scratch/SpaceHack2/userfolders/jsun/workflow/libd_dlpfc

n_pcs: "20"
Loading