forked from patrickbryant1/SpeedPPI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_ppi_all_vs_all.sh
78 lines (71 loc) · 2.17 KB
/
create_ppi_all_vs_all.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#Create a PPI network
#ARGS
#INPUT
FASTA_SEQS=$1 #All fasta seqs
HHBLITS=$2 #Path to HHblits
PDOCKQ_T=$3
OUTDIR=$4
#DEFAULT
UNICLUST=./data/uniclust30_2018_08/uniclust30_2018_08 #Assume path according to setup
#The pipeline starts here
#1. Create individual fastas
FASTADIR=$OUTDIR/fasta/
if [ -f "$FASTADIR/id_seqs.csv" ]; then
echo Fastas exist...
echo "Remove the directory $FASTADIR if you want to write new fastas."
else
mkdir -p $FASTADIR
python3 ./src/preprocess_fasta.py --fasta_file $FASTA_SEQS \
--outdir $FASTADIR
echo "Writing fastas of each sequence to $FASTADIR"
fi
wait
#2. Run HHblits for all fastas to create MSAs
MSADIR=$OUTDIR/msas/
if [ -d "$MSADIR" ]; then
echo MSAs exists...
echo Checking if all are present
else
mkdir $MSADIR
fi
for FASTA in $FASTADIR/*.fasta
do
ID=$(basename $FASTA)
ID=$(echo $ID|cut -d '.' -f 1)
echo $ID
if [ -f "$MSADIR/$ID.a3m" ]; then
echo $MSADIR/$ID.a3m exists
else
echo Creating MSA for $ID
$HHBLITS -i $FASTA -d $UNICLUST -E 0.001 -all -oa3m $MSADIR/$ID'.a3m'
fi
done
#3. Predict the structure using a modified version of AlphaFold2 (FoldDock)
PR_CSV=$FASTADIR/id_seqs.csv
NUM_PREDS=$(wc -l $PR_CSV|cut -d ' ' -f 1)
NUM_PREDS=$(($NUM_PREDS-1))
DATADIR=./data/
RECYCLES=10
NUM_CPUS=1
for (( c=1; c<=$NUM_PREDS; c++ ))
do
mkdir $OUTDIR'/pred'$c'/'
echo Running pred $c out of $NUM_PREDS
python3 ./src/run_alphafold_all_vs_all.py --protein_csv $PR_CSV \
--target_row $c \
--msa_dir $MSADIR \
--data_dir $DATADIR \
--max_recycles $RECYCLES \
--pdockq_t $PDOCKQ_T \
--num_cpus $NUM_CPUS \
--output_dir $OUTDIR'/pred'$c'/'
done
#4. Merge all predictions to construct a PPI network.
#When the pDockQ > 0.5, the PPV is >0.9 (https://www.nature.com/articles/s41467-022-28865-w, https://www.nature.com/articles/s41594-022-00910-8)
#The default threshold to construct edges (links) is 0.5
python3 ./src/build_ppi.py --pred_dir $OUTDIR/ \
--pdockq_t $PDOCKQ_T --outdir $OUTDIR/
#5. Move all high-confidence predictions to a dir
mkdir $OUTDIR'/high_confidence_preds/'
mv $OUTDIR/pred*/*.pdb $OUTDIR'/high_confidence_preds/'
echo Moved all high confidence predictions to $OUTDIR'/high_confidence_preds/'