Skip to content

Commit

Permalink
thys
Browse files Browse the repository at this point in the history
  • Loading branch information
Thys3Potgieter committed Dec 27, 2024
2 parents 683314a + 01d5892 commit c15a7c7
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 11 deletions.
File renamed without changes.
4 changes: 2 additions & 2 deletions bin/bash/Progenio
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ if [ ! -d $outdir/jbrowse ] ; then
# not needed #mq_strains2ref_peptides.py $config $outdir || ( rm -rf $outdir/jbrowse ; exit 1 )
mq_strains2ref.py $config $outdir || ( rm -rf $outdir/jbrowse ; exit 1 )
mq_reference_features.py $config $outdir
mq_peptide_features.py $config $outdir
mq_jbrowse_upload_script.py $config $outdir || ( rm -rf $outdir/jbrowse_ ; exit 1 )
fi

mq_peptide_features.py $config $outdir
mq_jbrowse_upload_script.py $config $outdir || ( rm -rf $outdir/jbrowse_ ; exit 1 )


#exit 0
Expand Down
2 changes: 1 addition & 1 deletion bin/python/mq_peptide_to_protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def peptide_list_blast(peptides, targets, features):

#pg = pg.head(10)

combined = pg
combined = pg.copy()


for row in combined.iterrows():
Expand Down
26 changes: 18 additions & 8 deletions bin/python/mq_reference_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ def load_gff3(file):


for ref in config['reference']:
proteome_id = config['reference'][ref]['proteome_id']
with open(output + '/blast/orfs2proteins/{}_mapping.json'.format(proteome_id), 'r') as f:
orf2protein = json.load(f)

assembly_id = config['reference'][ref]['assembly_id']
for strain in config['strains']:

Expand Down Expand Up @@ -142,23 +146,28 @@ def load_gff3(file):
rows = ['##gff-version 3']
feature_rows = ['##gff-version 3']
seen = []

for orf, group in peptide_annotations.groupby('ORF_id'):
#orf = group['ORF_id'].values[0]



#for orf, group in peptide_annotations.groupby('ORF_id'):
for orf_id in list(set(ref_orfs['ORF_id'].tolist())):

group = peptide_annotations[peptide_annotations['ORF_id'].apply(lambda x:x.split('|')[1] == orf_id)]
assert len(group['ORF_id'].unique()) <= 1
#var_start = group['PeptidePosition'].min()

#var_peps = peptide_annotations[peptide_annotations['ORF_id'] == orf]
#var_pe


blastp_list = group['BLASTP'].values
assert len(set(blastp_list)) == 1
blastp = blastp_list[0]
try:
blastp = orf2protein[orf_id][0]
except:
blastp = 'None'
#orf_id = orf.split('|')[1]

orf_id = orf.split('|')[1]
#print(group)
orf_filt = ref_orfs[ref_orfs['ORF_id'] == orf_id]

if len(orf_filt) == 0:
continue
feat_id = orf_filt['ID'].values[0]
Expand Down Expand Up @@ -194,6 +203,7 @@ def load_gff3(file):
orf_peptides = []
# iterate through the proteoform peptides
for row in group.iterrows():
orf = row[1]['ORF_id']
var_id = row[1]['VarId']

if isinstance(var_id, str):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit c15a7c7

Please sign in to comment.