Skip to content

Commit

Permalink
added positive selection
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennifer Polinski committed Jan 11, 2024
1 parent c276fd4 commit d7d8a36
Show file tree
Hide file tree
Showing 11 changed files with 563 additions and 0 deletions.
20 changes: 20 additions & 0 deletions 7_positive-selection/10_meme_getResults.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
#get results for branches under selection according to hyphy absrel


fileList="*output.txt"

#outside of loop add file header
echo "Hyphy MEME sites under episodic selection" > hyphy_meme_Results.txt
echo "K. Castellano - 2024 January 8" >> hyphy_meme_Results.txt
echo "------------------------------" >> hyphy_meme_Results.txt

for file in ${fileList}
do
prefix=$(echo ${file} | cut -d "_" -f 1)
echo ${prefix} >> hyphy_meme_Results.txt #print orthogroup ID
#this command below will print all lines after the string "### For partition" which contain the sites under selection
sed -e '1,/### For partition/d' ${file} >> hyphy_meme_Results.txt
echo "" >> hyphy_meme_Results.txt #add empty line after each orthogroup in the output file to help make it clearer to read

done
7 changes: 7 additions & 0 deletions 7_positive-selection/1_AGAT_filt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

module load AGAT/v1.2.0

agat_sp_keep_longest_isoform.pl -gff GCF_018143015.1_Lvar_3.0_genomic.gff \
-out Lvariegatus_AGATfilt.gff 2>&1 | tee -a Lvariegatus_AGAT.log

6 changes: 6 additions & 0 deletions 7_positive-selection/2_OrthoFinder_MSA.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
#Run Orthofinder for Natural Selection Analysis
#Kate Castellano

#-f = path to fasta files for each species
python3 /data/app/OrthoFinder/orthofinder.py -M msa -f ~/positive-selection/protein-files-urchinsOnly/
58 changes: 58 additions & 0 deletions 7_positive-selection/3_get_fasta_w_min_number.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/perl
#from Kevin Kocot
#run by Kate Castellano

use strict;
use warnings;

MAIN: {
my $indir = $ARGV[0] || usage();
my $outdir = $ARGV[1] || usage();
my $min = $ARGV[2] || usage();
my $count = 0;

check_outdir($outdir);

opendir DIR, $indir or die "cannot open $indir:$!";
my @files = readdir DIR;
foreach my $f (@files) {
open IN, "$indir/$f" or die "cannot open $indir/$f:$!";
my $count = 0;
my $seqs = '';
my %species = ();
while (my $line = <IN>) {
$seqs .= $line;
next unless ($line =~ m/^>([^_]+)/);
my $sp = $1;
$count++ unless ($species{$sp});
$species{$sp}++;
}
write_seqs($outdir,$f,$seqs) if ($count == $min);
}
}

sub check_outdir {
my $outdir = shift;
if (-d $outdir) {
opendir OUTDIR, $outdir or die "cannot read $outdir:$!";
my @existing = grep {!/^\.\.?$/} readdir OUTDIR;
foreach my $e (@existing) {
warn "warning: $outdir exists and includes $e\n";
}
} else {
mkdir $outdir or die "cannot open $outdir";
}
}

sub write_seqs {
my $dir = shift;
my $file = shift;
my $seqs = shift;
open OUT, ">$dir/$file" or die "cannot open >$dir/$file:$!";
print OUT $seqs;
close OUT;
}

sub usage {
die "usage: $0 INDIR OUTDIR MINIMUM_SEQS\n";
}
15 changes: 15 additions & 0 deletions 7_positive-selection/4_getCDS.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
#Get Get CDS sequences for each orthogroup
#Kate Castellano

listFiles="*_headers.txt"
cdsFile="/data/prj/urchin/red-urchin-genome/positive-selection/cds-files-urchinsOnly/AllSpecies.faa"

for file in ${listFiles}
do

prefix=$(echo ${file} | cut -d "_" -f 1,2)

seqtk subseq ${cdsFile} ${file} > ${prefix}_CDS.fa

done
11 changes: 11 additions & 0 deletions 7_positive-selection/5_pal2nal.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
#this script will create phyllip format alignments for each orthogroup
#the list.txt file contains the prefix for each file <orthogroup#>_1

for i in `cat list.txt`
do

/data/app/pal2nal.v14/pal2nal.pl ${i}.fa ${i}_CDS.fa -output paml -nomismatch -nogap -codontable 1 > ${i}_pal2nal.fa 2>&1 | tee -a pal2nal_log.txt

done

24 changes: 24 additions & 0 deletions 7_positive-selection/6_editFiles.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
#edit pal2nal alignment files for hyphy

#copy pal2nal files to this location
#cp /data/prj/urchin/red-urchin-genome/positive-selection/protein-files-urchinsOnly/OrthoFinder/Results_Sep15/MultipleSequenceAlignments_AllSpecies/pal2nal/*.fa .


fileList="*.fa"

for file in ${fileList}
do

prefix=$(echo ${file} | cut -d "." -f 1)
sed '1d' ${file} > ${prefix}_edit.fa #remove first line which contains the number of aligned nucleotides

#add > in front of each header to turn it into a fasta file
sed -i 's/B/>B/' ${prefix}_edit.fa
sed -i 's/M/>M/' ${prefix}_edit.fa
sed -i 's/L/>L/' ${prefix}_edit.fa
sed -i 's/S/>S/' ${prefix}_edit.fa

rm ${file} #remove original pal2nal files we copied over to this location

done
18 changes: 18 additions & 0 deletions 7_positive-selection/7_hyphy_absrel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
#hyphy absrel


module load hyphy/v2.5.58
#ln -s /data/resources/app_modules/hyphy-2.5.58/res #only need to do this once; make symbolic link to batch files

fileList="*.fa"
treePath="/data/prj/urchin/red-urchin-genome/positive-selection/protein-files-urchinsOnly/OrthoFinder/Results_Sep15/Gene_Trees_edit/"

for file in ${fileList}
do

prefix=$(echo ${file} | cut -d "_" -f 1)
hyphy absrel --pvalue 0.05 --alignment ${file} --tree ${treePath}${prefix}_tree.txt --branches All >> ${prefix}_absrel_output.txt
#will ouput a .json file and the specificed .txt file

done
22 changes: 22 additions & 0 deletions 7_positive-selection/8_absrel_getResults.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
#get results for branches under selection according to hyphy absrel


fileList="*output.txt"

#outside of loop add file header
echo "Adaptive branch site random effects likelihood test Results - Urchins Only - Hyphy abSREL all branches" > hyphy_absrel_Results.txt
echo "K. Castellano - 2024 January 4" >> hyphy_absrel_Results.txt
echo "------------------------------------------------------------------------------------------------------" >> hyphy_absrel_Results.txt

for file in ${fileList}
do
prefix=$(echo ${file} | cut -d "_" -f 1)
echo ${prefix} >> hyphy_absrel_Results.txt #print orthogroup ID
#this command below will print all lines after the string "### Adaptive branch site random effects likelihood test" which contain the branches under selection
sed -e '1,/### Adaptive branch site random effects likelihood test/d' ${file} >> hyphy_absrel_Results.txt
echo "" >> hyphy_absrel_Results.txt #add empty line after each orthogroup in the output file to help make it clearer to read

done

sed -i '/tested./{N;s/\n$//}' hyphy_absrel_Results.txt #remove the empty line between the number of branches under selection and the lines with the p-value of the branches under selection; just makes it easier to read and see what data goes with which orthogroup
14 changes: 14 additions & 0 deletions 7_positive-selection/9_meme_hyphy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#run hyphy MEME on orthogroups that showed signatures of positive selection using hyphy absrel to identify sites under selection
#K Castellano January 2024

module load hyphy/v2.5.58

treePath="/data/prj/urchin/red-urchin-genome/positive-selection/protein-files-urchinsOnly/OrthoFinder/Results_Sep15/Gene_Trees_edit/"

for i in `cat meme_list.txt`
do

hyphy meme --alignment ${i}_1_pal2nal_edit.fa --tree ${treePath}${i}_tree.txt >> ${i}_meme_output.txt

done
Loading

0 comments on commit d7d8a36

Please sign in to comment.