import re

__author__ = 'nath'
from numpy import sum
from Sequence import Sequence


class fastaContainer:

    def __init__(self,file):
        self.end=False
        self.file = file
        self.f = open(self.file,'r')
        self.curName = self.f.readline().rstrip("\n").lstrip(">")

    def __del__(self):
        self.f.close()

    def __iter__(self):
        return self

    def __next__(self):
        if self.end:
            raise StopIteration
        else :
            return self.nextSeq()


    def nextSeq(self):
        """return the next sequence in the fasta file"""
        buffName = self.curName
        buffSeq = ""
        while ( 1 ):
            l = self.f.readline()
            if not l:
                self.end=True
                break
            if l[0:1] == '>':
                self.curName = l.rstrip("\n").lstrip(">")
                break
            else:
                buffSeq += l.rstrip("\n")

        res = Sequence(buffSeq,buffName)
        return res

    def resetFile(self):
        """reset the position of the file handler to the top of the file
        """
        self.f.close()
        self = self.__init__(self.file)



    def findAll(self,listIDs,mode="NM"):
        """return a list of sequences where the Ids are provided in the list
            mode can be set to "NM" (refseq IDs) or "MIMAT" (mirBase IDs)
        """
        self.resetFile()
        res = []

        tests = [0]*len(listIDs)

        while ( 1 ):
            s = self.nextSeq()

            if self.end or sum(tests) == len(listIDs):
                break
            id = ""
            if mode == "NM":
                id = s.parseNM()
            if mode == "MIMAT":
                id = s.parseMIMAT()
            if id in listIDs:
                tests[listIDs.index(id)] = 1
                res.append(s)


        return res

    def findSeq(self,ID,mode="NM"):
        self.resetFile()
        listTemp = []
        listTemp.append(ID)
        res = self.findAll(listTemp,mode)
        if (len(res) < 1 ):
            return None
        return res[0]

def revCompl(seq):
    res = seq[::-1].upper()
    transtab = "".maketrans("AUTCG","UAAGC")
    res= res.translate(transtab)
    return res.upper()





if __name__ == '__main__':
    fc = fastaContainer("/Users/nath//IRIC/paul/human.rna-NOTPREDICTED-mRNA.fasta")
    print(fc.nextSeq())
    print(fc.nextSeq())
    fc.resetFile()
    s = fc.nextSeq()
    print (s)
    print (s.parseNM())
    #print (fc.findAll(["NM_000314","NM_989898","NM_001145712"]))
    print("1")
    s  = fc.findSeq("NM_000314")
    print (s)
    tag = "*5(1-1031)c(1032-2243)3(2244-5572)"
    #tag = "*c(1-486)"

    print (tag)
    tab=[]

    """m = re.search(r"5\((\d+)-(\d+)\)c\((\d+)-(\d+)\)3\((\d+)-(\d+)\)",tag)

    for i in range(1,7):
        tab.append(int(m.group(i)))

    print (tab)
    tab = []"""

    """exemple = s.seq
    m = re.search(r"5\((\d+)-(\d+)\)",tag)
    if m:
        tab.append(int(m.group(1)))
        tab.append(int(m.group(2)))
    m = re.search(r"c\((\d+)-(\d+)\)",tag)
    if m:
        tab.append(int(m.group(1)))
        tab.append(int(m.group(2)))
    m = re.search(r"3\((\d+)-(\d+)\)",tag)
    if m:
        tab.append(int(m.group(1)))
        tab.append(int(m.group(2)))
    print (tab)
    seqs = []
    for i in range (0,int(len(tab)/2)):
        seqs.append(exemple[tab[i]:tab[i+1]])
    """
    print (s.seq)
    print (s.seq.upper())
    print (revCompl(s.seq))

    print ()
    print ('A'+revCompl('A'))