#usage #python retrieve_protein_with_gi_number.py <list.of.sequences.requested> <output.fa> # 0 1 2 import sys from Bio import SeqIO from Bio import Entrez Entrez.email = "carden24@mail.ubc.ca" #inputs filelist = open(sys.argv[1], 'r') #output fileout = open(sys.argv[2], 'w') #create a list with the names of the sequences requested requestedsequences = [] for line in filelist: line = line.strip('\n') requestedsequences.append(line) print "%d Sequences requested" % len(requestedsequences) print requestedsequences #handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="gb", retmode="text") handle = Entrez.efetch(db = "protein", id = requestedsequences, rettype = "fasta", retmode = "text") records = SeqIO.parse(handle, "fasta") for record in records: # print record.id # print record.seq fileout.write(">%s\n%s\n" %(record.id, record.seq)) handle.close() fileout.close()