diff --git a/db/Dockerfile b/db/Dockerfile index ddab798f6..933892410 100644 --- a/db/Dockerfile +++ b/db/Dockerfile @@ -55,7 +55,8 @@ RUN mkblastdb Glycine/D3-tomentella/annotations/G1403.gnm1.ann1.XNZQ/glyd3.G1403 RUN mkblastdb Glycine/dolichocarpa/annotations/G1134.gnm1.ann1.4BJM/glydo.G1134.gnm1.ann1.4BJM.mrna.fna.gz nucl 82538 'Glycine dolichocarpa G1134 v1.1 mRNAs' RUN mkblastdb Glycine/falcata/annotations/G1718.gnm1.ann1.2KSV/glyfa.G1718.gnm1.ann1.2KSV.mrna.fna.gz nucl 45690 'Glycine falcata G1718 v1.1 mRNAs' RUN mkblastdb Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/glyma.Wm82.gnm4.ann1.T8TQ.mrna.fna.gz nucl 3847 'Glycine max Wm82 v4.1 mRNAs' -RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.mrna.fna.gz nucl 3848 'Glycine soja PI483463 v1.1 mRNAs' +#this is a hack to avoid hitting the parse_seqids limit of 50 which some genes with >9 isoforms just barely exceed; probably the better solution would be to deredundify our yuck and their yuck in the ids +RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.mrna_primary.fna.gz nucl 3848 'Glycine soja PI483463 v1.1 mRNAs' RUN mkblastdb Glycine/stenophita/annotations/G1974.gnm1.ann1.F257/glyst.G1974.gnm1.ann1.F257.mrna.fna.gz nucl 96944 'Glycine stenophita G1974 v1.1 mRNAs' RUN mkblastdb Glycine/syndetika/annotations/G1300.gnm1.ann1.RRK6/glysy.G1300.gnm1.ann1.RRK6.mrna.fna.gz nucl 713886 'Glycine syndetika G1300 v1.1 mRNAs' RUN mkblastdb Lotus/japonicus/annotations/MG20.gnm3.ann1.WF9B/lotja.MG20.gnm3.ann1.WF9B.mrna.fna.gz nucl 34305 'Lotus japonicus MG20 v3.1 mRNAs' @@ -63,7 +64,8 @@ RUN mkblastdb Lupinus/albus/annotations/Amiga.gnm1.ann1.3GKS/lupal.Amiga.gnm1.an RUN mkblastdb Lupinus/angustifolius/annotations/Tanjil.gnm1.ann1.nnV9/lupan.Tanjil.gnm1.ann1.nnV9.mrna.fna.gz nucl 3871 'Lupinus angustifolius Tanjil v1.1 mRNAs' RUN mkblastdb Medicago/truncatula/annotations/A17.gnm5.ann1_6.L2RX/medtr.A17.gnm5.ann1_6.L2RX.mrna.fna.gz nucl 3880 'Medicago truncatula A17 v5.1.6 mRNAs' RUN mkblastdb Medicago/sativa/annotations/XinJiangDaYe.gnm1.ann1.RKB9/medsa.XinJiangDaYe.gnm1.ann1.RKB9.mrna.fna.gz nucl 3879 'Medicago sativa XinJiangDaYe v1.1 mRNAs' -RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.mrna.fna.gz nucl 33129 'Phaseolus acutifolius Frijol Bayo v1.1 mRNAs' +#primary hack does not save this one +#RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.mrna_primary.fna.gz nucl 33129 'Phaseolus acutifolius Frijol Bayo v1.1 mRNAs' RUN mkblastdb Phaseolus/lunatus/annotations/G27455.gnm1.ann1.JD7C/phalu.G27455.gnm1.ann1.JD7C.mrna.fna.gz nucl 3884 'Phaseolus lunatus G27455 v1.1 mRNAs' RUN mkblastdb Phaseolus/vulgaris/annotations/G19833.gnm2.ann1.PB8d/phavu.G19833.gnm2.ann1.PB8d.mrna.fna.gz nucl 3885 'Phaseolus vulgaris G19833 v2.1 mRNAs' RUN mkblastdb Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz nucl 3888 'Pisum sativum Cameor v1.1 mRNAs' @@ -72,38 +74,6 @@ RUN mkblastdb Vigna/angularis/annotations/Gyeongwon.gnm3.ann1.3Nz5/vigan.Gyeongw RUN mkblastdb Vigna/radiata/annotations/VC1973A.gnm6.ann1.M1Qs/vigra.VC1973A.gnm6.ann1.M1Qs.mrna.fna.gz nucl 157791 'Vigna radiata VC1973A v6.1 mRNAs' RUN mkblastdb Vigna/unguiculata/annotations/IT97K-499-35.gnm1.ann2.FD7K/vigun.IT97K-499-35.gnm1.ann2.FD7K.mrna.fna.gz nucl 3917 'Vigna unguiculata IT97K-499-35 v1.2 mRNAs' -WORKDIR "/db/CDS_Sequence_Collection" -RUN mkblastdb Aeschynomene/evenia/annotations/CIAT22838.gnm1.ann1.ZM3R/aesev.CIAT22838.gnm1.ann1.ZM3R.cds.fna.gz nucl 561484 'Aeschynomene evenia CIAT22838 v1.1 CDSs' -RUN mkblastdb Arachis/duranensis/annotations/V14167.gnm1.ann1.cxSM/aradu.V14167.gnm1.ann1.cxSM.cds.fna.gz nucl 130453 'Arachis duranensis v1.1 CDSs' -RUN mkblastdb Arachis/hypogaea/annotations/Tifrunner.gnm2.ann1.4K0L/arahy.Tifrunner.gnm2.ann1.4K0L.cds.fna.gz nucl 3818 'Arachis hypogaea Tifrunner v2.1 CDSs' -RUN mkblastdb Arachis/ipaensis/annotations/K30076.gnm1.ann1.J37m/araip.K30076.gnm1.ann1.J37m.cds.fna.gz nucl 130454 'Arachis ipaensis K30076 v1.1 CDSs' -RUN mkblastdb Cajanus/cajan/annotations/ICPL87119.gnm1.ann1.Y27M/cajca.ICPL87119.gnm1.ann1.Y27M.cds.fna.gz nucl 3821 'Cajanus cajan ICPL87119 v1.1 CDSs' -RUN mkblastdb Cercis/canadensis/annotations/ISC453364.gnm1.ann1.HZJM/cerca.ISC453364.gnm1.ann1.B05Z.cds.fna.gz nucl 49801 'Cercis canadensis ISC453364 v1.1 CDSs' -RUN mkblastdb Chamaecrista/fasciculata/annotations/MN87.gnm1.ann1.LWFM/chafa.MN87.gnm1.ann1.LWFM.cds.fna.gz nucl 53854 'Chamaecrista fasciculata MN87 v1.1 pCDSs' -RUN mkblastdb Cicer/arietinum/annotations/CDCFrontier.gnm1.ann1.nRhs/cicar.CDCFrontier.gnm1.ann1.nRhs.cds.fna.gz nucl 3827 'Cicer arietinum CDCFrontier v1.1 CDSs' -RUN mkblastdb Faidherbia/albida/annotations/WAFC.gnm1.ann1.RTP9/faial.WAFC.gnm1.ann1.RTP9.cds.fna.gz nucl 138055 'Faidherbia albida WAFC v1.1 CDSs' -RUN mkblastdb Glycine/cyrtoloba/annotations/G1267.gnm1.ann1.HRFD/glycy.G1267.gnm1.ann1.HRFD.cds.fna.gz nucl 45689 'Glycine cyrtoloba G1267 v1.1 CDSs' -RUN mkblastdb Glycine/D3-tomentella/annotations/G1403.gnm1.ann1.XNZQ/glyd3.G1403.gnm1.ann1.XNZQ.cds.fna.gz nucl 2908013 'Glycine D3-tomentella G1403 v1.1 CDSs' -RUN mkblastdb Glycine/dolichocarpa/annotations/G1134.gnm1.ann1.4BJM/glydo.G1134.gnm1.ann1.4BJM.cds.fna.gz nucl 82538 'Glycine dolichocarpa G1134 v1.1 CDSs' -RUN mkblastdb Glycine/falcata/annotations/G1718.gnm1.ann1.2KSV/glyfa.G1718.gnm1.ann1.2KSV.cds.fna.gz nucl 45690 'Glycine falcata G1718 v1.1 CDSs' -RUN mkblastdb Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/glyma.Wm82.gnm4.ann1.T8TQ.cds.fna.gz nucl 3847 'Glycine max Wm82 v4.1 CDSs' -RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.cds.fna.gz nucl 3848 'Glycine soja PI483463 v1.1 CDSs' -RUN mkblastdb Glycine/stenophita/annotations/G1974.gnm1.ann1.F257/glyst.G1974.gnm1.ann1.F257.cds.fna.gz nucl 96944 'Glycine stenophita G1974 v1.1 CDSs' -RUN mkblastdb Glycine/syndetika/annotations/G1300.gnm1.ann1.RRK6/glysy.G1300.gnm1.ann1.RRK6.cds.fna.gz nucl 713886 'Glycine syndetika G1300 v1.1 CDSs' -RUN mkblastdb Lotus/japonicus/annotations/MG20.gnm3.ann1.WF9B/lotja.MG20.gnm3.ann1.WF9B.cds.fna.gz nucl 34305 'Lotus japonicus MG20 v3.1 CDSs' -RUN mkblastdb Lupinus/albus/annotations/Amiga.gnm1.ann1.3GKS/lupal.Amiga.gnm1.ann1.3GKS.cds.fna.gz nucl 3870 'Lupinus albus Amiga v1.1 CDSs' -RUN mkblastdb Lupinus/angustifolius/annotations/Tanjil.gnm1.ann1.nnV9/lupan.Tanjil.gnm1.ann1.nnV9.cds.fna.gz nucl 3871 'Lupinus angustifolius Tanjil v1.1 CDSs' -RUN mkblastdb Medicago/truncatula/annotations/A17.gnm5.ann1_6.L2RX/medtr.A17.gnm5.ann1_6.L2RX.cds.fna.gz nucl 3880 'Medicago truncatula A17 v5.1.6 CDSs' -RUN mkblastdb Medicago/sativa/annotations/XinJiangDaYe.gnm1.ann1.RKB9/medsa.XinJiangDaYe.gnm1.ann1.RKB9.cds.fna.gz nucl 3879 'Medicago sativa XinJiangDaYe v1.1 CDSs' -RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.cds.fna.gz nucl 33129 'Phaseolus acutifolius Frijol Bayo v1.1 CDSs' -RUN mkblastdb Phaseolus/lunatus/annotations/G27455.gnm1.ann1.JD7C/phalu.G27455.gnm1.ann1.JD7C.cds.fna.gz nucl 3884 'Phaseolus lunatus G27455 v1.1 CDSs' -RUN mkblastdb Phaseolus/vulgaris/annotations/G19833.gnm2.ann1.PB8d/phavu.G19833.gnm2.ann1.PB8d.cds.fna.gz nucl 3885 'Phaseolus vulgaris G19833 v2.1 CDSs' -RUN mkblastdb Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz nucl 3888 'Pisum sativum Cameor v1.1 CDSs' -RUN mkblastdb Trifolium/pratense/annotations/MilvusB.gnm2.ann1.DFgp/tripr.MilvusB.gnm2.ann1.DFgp.cds.fna.gz nucl 57577 'Trifolium pratense Milvus B v2.1 CDSs' -RUN mkblastdb Vigna/angularis/annotations/Gyeongwon.gnm3.ann1.3Nz5/vigan.Gyeongwon.gnm3.ann1.3Nz5.cds.fna.gz nucl 3914 'Vigna angularis Gyeongwon v3.1 CDSs' -RUN mkblastdb Vigna/radiata/annotations/VC1973A.gnm6.ann1.M1Qs/vigra.VC1973A.gnm6.ann1.M1Qs.cds.fna.gz nucl 157791 'Vigna radiata VC1973A v6.1 CDSs' -RUN mkblastdb Vigna/unguiculata/annotations/IT97K-499-35.gnm1.ann2.FD7K/vigun.IT97K-499-35.gnm1.ann2.FD7K.cds.fna.gz nucl 3917 'Vigna unguiculata IT97K-499-35 v1.2 CDSs' - WORKDIR "/db/Protein_Sequence_Collection" RUN mkblastdb Aeschynomene/evenia/annotations/CIAT22838.gnm1.ann1.ZM3R/aesev.CIAT22838.gnm1.ann1.ZM3R.protein.faa.gz prot 561484 'Aeschynomene evenia CIAT22838 v1.1 proteins' RUN mkblastdb Arachis/duranensis/annotations/V14167.gnm1.ann1.cxSM/aradu.V14167.gnm1.ann1.cxSM.protein.faa.gz prot 130453 'Arachis duranensis v1.1 proteins' @@ -119,7 +89,8 @@ RUN mkblastdb Glycine/D3-tomentella/annotations/G1403.gnm1.ann1.XNZQ/glyd3.G1403 RUN mkblastdb Glycine/dolichocarpa/annotations/G1134.gnm1.ann1.4BJM/glydo.G1134.gnm1.ann1.4BJM.protein.faa.gz prot 82538 'Glycine dolichocarpa G1134 v1.1 proteins' RUN mkblastdb Glycine/falcata/annotations/G1718.gnm1.ann1.2KSV/glyfa.G1718.gnm1.ann1.2KSV.protein.faa.gz prot 45690 'Glycine falcata G1718 v1.1 proteins' RUN mkblastdb Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/glyma.Wm82.gnm4.ann1.T8TQ.protein.faa.gz prot 3847 'Glycine max Wm82 v4.1 proteins' -RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.protein.faa.gz prot 3848 'Glycine soja PI483463 v1.1 proteins' +#this is a hack to avoid hitting the parse_seqids limit of 50 which some genes with >9 isoforms just barely exceed; probably the better solution would be to deredundify our yuck and their yuck in the ids +RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.protein_primary.faa.gz prot 3848 'Glycine soja PI483463 v1.1 proteins' RUN mkblastdb Glycine/stenophita/annotations/G1974.gnm1.ann1.F257/glyst.G1974.gnm1.ann1.F257.protein.faa.gz prot 96944 'Glycine stenophita G1974 v1.1 proteins' RUN mkblastdb Glycine/syndetika/annotations/G1300.gnm1.ann1.RRK6/glysy.G1300.gnm1.ann1.RRK6.protein.faa.gz prot 713886 'Glycine syndetika G1300 v1.1 proteins' RUN mkblastdb Lotus/japonicus/annotations/MG20.gnm3.ann1.WF9B/lotja.MG20.gnm3.ann1.WF9B.protein.faa.gz prot 34305 'Lotus japonicus MG20 v3.1 proteins' @@ -127,7 +98,8 @@ RUN mkblastdb Lupinus/albus/annotations/Amiga.gnm1.ann1.3GKS/lupal.Amiga.gnm1.an RUN mkblastdb Lupinus/angustifolius/annotations/Tanjil.gnm1.ann1.nnV9/lupan.Tanjil.gnm1.ann1.nnV9.protein.faa.gz prot 3871 'Lupinus angustifolius Tanjil v1.1 proteins' RUN mkblastdb Medicago/truncatula/annotations/A17.gnm5.ann1_6.L2RX/medtr.A17.gnm5.ann1_6.L2RX.protein.faa.gz prot 3880 'Medicago truncatula A17 v5.1.6 proteins' RUN mkblastdb Medicago/sativa/annotations/XinJiangDaYe.gnm1.ann1.RKB9/medsa.XinJiangDaYe.gnm1.ann1.RKB9.protein.faa.gz prot 3879 'Medicago sativa XinJiangDaYe v1.1 proteins' -RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.protein.faa.gz prot 33129 'Phaseolus acutifolius Frijol Bayo v1.1 proteins' +#primary hack does not save this one +#RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.protein_primary.faa.gz prot 33129 'Phaseolus acutifolius Frijol Bayo v1.1 proteins' RUN mkblastdb Phaseolus/lunatus/annotations/G27455.gnm1.ann1.JD7C/phalu.G27455.gnm1.ann1.JD7C.protein.faa.gz prot 3884 'Phaseolus lunatus G27455 v1.1 proteins' RUN mkblastdb Phaseolus/vulgaris/annotations/G19833.gnm2.ann1.PB8d/phavu.G19833.gnm2.ann1.PB8d.protein.faa.gz prot 3885 'Phaseolus vulgaris G19833 v2.1 proteins' RUN mkblastdb Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz prot 3888 'Pisum sativum Cameor v1.1 proteins'