-
Notifications
You must be signed in to change notification settings - Fork 0
/
work.sh
180 lines (147 loc) · 4.83 KB
/
work.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# csv file_md5sum
Rscript calculate_file_md5sum.R \
-f /share/projects/Analytics/IO/zouhua/Script/dataset/phenotype \
-m NULL \
-p csv \
-o result
# #Rscript calculate_file_md5sum.R \
# -f dataset/phenotype \
# -m result/Gmetadata_from_Doctor_csv_md5sum.csv \
# -p csv \
# -o result
# metaphlan
python merge_metaphlan.py \
-f dataset/metaphlan/metaphlan_test_filepath.tsv \
-p merge_metaphlan \
-t species \
-o result
# Humann2_card
python merge_humann.py \
-f dataset/humann/Humann2_card_test_filepath.tsv \
-p merge_humann_card \
-d humann2_card \
-o result
# Humann2_keggMean
python merge_humann.py \
-f dataset/humann/Humann2_keggMean_test_filepath.tsv \
-p merge_humann_keggMean \
-d humann2_keggMean \
-o result
# Humann2_keggMedian
python merge_humann.py \
-f dataset/humann/Humann2_keggMedian_test_filepath.tsv \
-p merge_humann_keggMedian \
-d humann2_keggMedian \
-o result
# humann2 & metacyc
python merge_humann.py \
-f dataset/humann/Humann2_pathabundance_test_filepath.tsv \
-p merge_humann \
-d humann2 \
-o result
# Humann2_vfdb
python merge_humann.py \
-f dataset/humann/Humann2_vfdb_test_filepath.tsv \
-p merge_humann_vfdb \
-d humann2_vfdb \
-o result
# humann3 & metacyc
python merge_humann.py \
-f dataset/humann/Humann3_pathabundance_test_filepath.tsv \
-p merge_humann3 \
-d humann3 \
-o result
# phyloseq for BJ GI
Rscript generate_phyloseq_for_BJ.R \
-m dataset/phenotype/bjch_sample_sequence_metadata_20220923.csv \
-p dataset/metaphlan/metaphlan2_BJ_RoundB_merge.csv \
-r RoundB \
-t metaphlan \
-n metaphlan2 \
-o result
# Swimming plot for BJ GI cancer
Rscript BJ_GI_Swimming_plot.R \
-p dataset/phenotype/bjch_sample_sequence_metadata_20220923.csv \
-c colon_cancer \
-n Swimming_Plot -w 10 -g 20 -o result
# Reads QC from pipeline
python obtain_metaphlan_ReadsQC.py \
-f dataset/pipeline_result/BJ_RoundG \
-p BJ_RoundG_ReadsQC \
-o result
python obtain_metaphlan_ReadsQC.py \
-f dataset/pipeline_result/BJ_RoundB \
-p BJ_RoundB_ReadsQC \
-o result
# phyloseq for BJ GI for three types' dataset
Rscript generate_phyloseq_for_BJ_v2.R \
-m dataset/phenotype/bjch_sample_sequence_metadata_20221008.csv \
-p dataset/profile/metaphlan2_BJ_RoundB-G_merge.csv \
-r all \
-t metaphlan \
-n metaphlan2 \
-o result
# Summarizing accounts of patients and samples in BJ GI cancer
Rscript Summarize_Patients_Samples.R \
-a dataset/phyloseq/metaphlan2_BJ_all_ps.RDS \
-p dataset/phyloseq/metaphlan2_BJ_all_ps_baseline_R6.RDS \
-o result
# Integrating phyloseq object per taxa levels into list object
Rscript integrate_taxa_phyloseq_for_BJ.R \
-p ./dataset/phyloseq/metaphlan2_phylum_BJ_all_ps_total.RDS \
-c ./dataset/phyloseq/metaphlan2_class_BJ_all_ps_total.RDS \
-o ./dataset/phyloseq/metaphlan2_order_BJ_all_ps_total.RDS \
-f ./dataset/phyloseq/metaphlan2_family_BJ_all_ps_total.RDS \
-g ./dataset/phyloseq/metaphlan2_genus_BJ_all_ps_total.RDS \
-s ./dataset/phyloseq/metaphlan2_species_BJ_all_ps_total.RDS \
-n total \
-d ./result/
# Summarizing accounts of patients and samples in BJ GI cancer (version 2)
Rscript Summarize_Patients_Samples_v2.R \
-a dataset/phyloseq/metaphlan2_BJ_all_metadata_total.csv \
-b dataset/phyloseq/metaphlan2_phylum_BJ_all_ps_filter.RDS \
-c dataset/phyloseq/metaphlan2_phylum_BJ_all_ps_baseR6.RDS \
-d dataset/phyloseq/16s_BJ_all_metadata_total.csv \
-e dataset/phyloseq/16s_BJ_all_metadata_filter.csv \
-f dataset/phyloseq/16s_BJ_all_metadata_baseR6.csv \
-n Summarize_Patients_Samples \
-o result
# checking whether the sra files were successfully downloaded
perl get_update_sra_NCBI.pl \
-f SRR_Acc_List.txt \
-d /share/projects/Analytics/IO/zouhua/pipeline/GSE164150/SRA/sra \
-o SRR_Acc_List_redownload.txt
prefetch --option-file SRR_Acc_List_redownload.txt
# converting sra into fastq
find /share/projects/Analytics/IO/zouhua/pipeline/GSE164150/SRA/sra/ -name "*.sra" | \
perl -e 'print"SampleID\tMode\tPath\n"; while(<>){chomp; $name=(split("\/", $_))[-1]; $name=~s/.sra//g; $type = "PAIRED"; print "$name\t$type\t$_\n";}' \
> samples.path.tsv
perl sra2fq.pl \
-f samples.path.tsv \
-d fastq \
-p sra_into_fastq
sh sra_into_fastq.sh
# scp file between local and remote
perl get_each_scp.pl \
-f filepath.tsv \
-s xbiome_102 \
-d /share/projects/Analytics/IO/zouhua/database/ \
-o scp_file
# rsync files between source and destination directory
python rsync.py \
-s /share/projects/Analytics/IO/zouhua/pipeline/GSE164150/SRA/sra/ \
-d /share/projects/Analytics/IO/zouhua/database \
-p sra \
-l sra_log.tsv
# batch run for scripts
python run_batch_shell.py \
-m sra_into_fastq.sh \
-o sra_into_fastq_batch.sh
# APAlyzer Expression
Rscript APAlyzer_Expression.R \
-b bam_file.tsv \
-r RData \
-g mm9_REF.RData \
-c chr19 \
-e 3UTR \
-o result