-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jennifer Polinski
committed
Jan 11, 2024
1 parent
c276fd4
commit d7d8a36
Showing
11 changed files
with
563 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
#get results for branches under selection according to hyphy absrel | ||
|
||
|
||
fileList="*output.txt" | ||
|
||
#outside of loop add file header | ||
echo "Hyphy MEME sites under episodic selection" > hyphy_meme_Results.txt | ||
echo "K. Castellano - 2024 January 8" >> hyphy_meme_Results.txt | ||
echo "------------------------------" >> hyphy_meme_Results.txt | ||
|
||
for file in ${fileList} | ||
do | ||
prefix=$(echo ${file} | cut -d "_" -f 1) | ||
echo ${prefix} >> hyphy_meme_Results.txt #print orthogroup ID | ||
#this command below will print all lines after the string "### For partition" which contain the sites under selection | ||
sed -e '1,/### For partition/d' ${file} >> hyphy_meme_Results.txt | ||
echo "" >> hyphy_meme_Results.txt #add empty line after each orthogroup in the output file to help make it clearer to read | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/bash | ||
|
||
module load AGAT/v1.2.0 | ||
|
||
agat_sp_keep_longest_isoform.pl -gff GCF_018143015.1_Lvar_3.0_genomic.gff \ | ||
-out Lvariegatus_AGATfilt.gff 2>&1 | tee -a Lvariegatus_AGAT.log | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
#Run Orthofinder for Natural Selection Analysis | ||
#Kate Castellano | ||
|
||
#-f = path to fasta files for each species | ||
python3 /data/app/OrthoFinder/orthofinder.py -M msa -f ~/positive-selection/protein-files-urchinsOnly/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/perl | ||
#from Kevin Kocot | ||
#run by Kate Castellano | ||
|
||
use strict; | ||
use warnings; | ||
|
||
MAIN: { | ||
my $indir = $ARGV[0] || usage(); | ||
my $outdir = $ARGV[1] || usage(); | ||
my $min = $ARGV[2] || usage(); | ||
my $count = 0; | ||
|
||
check_outdir($outdir); | ||
|
||
opendir DIR, $indir or die "cannot open $indir:$!"; | ||
my @files = readdir DIR; | ||
foreach my $f (@files) { | ||
open IN, "$indir/$f" or die "cannot open $indir/$f:$!"; | ||
my $count = 0; | ||
my $seqs = ''; | ||
my %species = (); | ||
while (my $line = <IN>) { | ||
$seqs .= $line; | ||
next unless ($line =~ m/^>([^_]+)/); | ||
my $sp = $1; | ||
$count++ unless ($species{$sp}); | ||
$species{$sp}++; | ||
} | ||
write_seqs($outdir,$f,$seqs) if ($count == $min); | ||
} | ||
} | ||
|
||
sub check_outdir { | ||
my $outdir = shift; | ||
if (-d $outdir) { | ||
opendir OUTDIR, $outdir or die "cannot read $outdir:$!"; | ||
my @existing = grep {!/^\.\.?$/} readdir OUTDIR; | ||
foreach my $e (@existing) { | ||
warn "warning: $outdir exists and includes $e\n"; | ||
} | ||
} else { | ||
mkdir $outdir or die "cannot open $outdir"; | ||
} | ||
} | ||
|
||
sub write_seqs { | ||
my $dir = shift; | ||
my $file = shift; | ||
my $seqs = shift; | ||
open OUT, ">$dir/$file" or die "cannot open >$dir/$file:$!"; | ||
print OUT $seqs; | ||
close OUT; | ||
} | ||
|
||
sub usage { | ||
die "usage: $0 INDIR OUTDIR MINIMUM_SEQS\n"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
#Get Get CDS sequences for each orthogroup | ||
#Kate Castellano | ||
|
||
listFiles="*_headers.txt" | ||
cdsFile="/data/prj/urchin/red-urchin-genome/positive-selection/cds-files-urchinsOnly/AllSpecies.faa" | ||
|
||
for file in ${listFiles} | ||
do | ||
|
||
prefix=$(echo ${file} | cut -d "_" -f 1,2) | ||
|
||
seqtk subseq ${cdsFile} ${file} > ${prefix}_CDS.fa | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
#this script will create phyllip format alignments for each orthogroup | ||
#the list.txt file contains the prefix for each file <orthogroup#>_1 | ||
|
||
for i in `cat list.txt` | ||
do | ||
|
||
/data/app/pal2nal.v14/pal2nal.pl ${i}.fa ${i}_CDS.fa -output paml -nomismatch -nogap -codontable 1 > ${i}_pal2nal.fa 2>&1 | tee -a pal2nal_log.txt | ||
|
||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/bin/bash | ||
#edit pal2nal alignment files for hyphy | ||
|
||
#copy pal2nal files to this location | ||
#cp /data/prj/urchin/red-urchin-genome/positive-selection/protein-files-urchinsOnly/OrthoFinder/Results_Sep15/MultipleSequenceAlignments_AllSpecies/pal2nal/*.fa . | ||
|
||
|
||
fileList="*.fa" | ||
|
||
for file in ${fileList} | ||
do | ||
|
||
prefix=$(echo ${file} | cut -d "." -f 1) | ||
sed '1d' ${file} > ${prefix}_edit.fa #remove first line which contains the number of aligned nucleotides | ||
|
||
#add > in front of each header to turn it into a fasta file | ||
sed -i 's/B/>B/' ${prefix}_edit.fa | ||
sed -i 's/M/>M/' ${prefix}_edit.fa | ||
sed -i 's/L/>L/' ${prefix}_edit.fa | ||
sed -i 's/S/>S/' ${prefix}_edit.fa | ||
|
||
rm ${file} #remove original pal2nal files we copied over to this location | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
#hyphy absrel | ||
|
||
|
||
module load hyphy/v2.5.58 | ||
#ln -s /data/resources/app_modules/hyphy-2.5.58/res #only need to do this once; make symbolic link to batch files | ||
|
||
fileList="*.fa" | ||
treePath="/data/prj/urchin/red-urchin-genome/positive-selection/protein-files-urchinsOnly/OrthoFinder/Results_Sep15/Gene_Trees_edit/" | ||
|
||
for file in ${fileList} | ||
do | ||
|
||
prefix=$(echo ${file} | cut -d "_" -f 1) | ||
hyphy absrel --pvalue 0.05 --alignment ${file} --tree ${treePath}${prefix}_tree.txt --branches All >> ${prefix}_absrel_output.txt | ||
#will ouput a .json file and the specificed .txt file | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
#get results for branches under selection according to hyphy absrel | ||
|
||
|
||
fileList="*output.txt" | ||
|
||
#outside of loop add file header | ||
echo "Adaptive branch site random effects likelihood test Results - Urchins Only - Hyphy abSREL all branches" > hyphy_absrel_Results.txt | ||
echo "K. Castellano - 2024 January 4" >> hyphy_absrel_Results.txt | ||
echo "------------------------------------------------------------------------------------------------------" >> hyphy_absrel_Results.txt | ||
|
||
for file in ${fileList} | ||
do | ||
prefix=$(echo ${file} | cut -d "_" -f 1) | ||
echo ${prefix} >> hyphy_absrel_Results.txt #print orthogroup ID | ||
#this command below will print all lines after the string "### Adaptive branch site random effects likelihood test" which contain the branches under selection | ||
sed -e '1,/### Adaptive branch site random effects likelihood test/d' ${file} >> hyphy_absrel_Results.txt | ||
echo "" >> hyphy_absrel_Results.txt #add empty line after each orthogroup in the output file to help make it clearer to read | ||
|
||
done | ||
|
||
sed -i '/tested./{N;s/\n$//}' hyphy_absrel_Results.txt #remove the empty line between the number of branches under selection and the lines with the p-value of the branches under selection; just makes it easier to read and see what data goes with which orthogroup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/bin/bash | ||
#run hyphy MEME on orthogroups that showed signatures of positive selection using hyphy absrel to identify sites under selection | ||
#K Castellano January 2024 | ||
|
||
module load hyphy/v2.5.58 | ||
|
||
treePath="/data/prj/urchin/red-urchin-genome/positive-selection/protein-files-urchinsOnly/OrthoFinder/Results_Sep15/Gene_Trees_edit/" | ||
|
||
for i in `cat meme_list.txt` | ||
do | ||
|
||
hyphy meme --alignment ${i}_1_pal2nal_edit.fa --tree ${treePath}${i}_tree.txt >> ${i}_meme_output.txt | ||
|
||
done |
Oops, something went wrong.