Skip to content

Commit

Permalink
hack around some parse_seqids issues; will need to revisit this. Also…
Browse files Browse the repository at this point in the history
…, only include mRNAs not CDSs for now, due to exceeding the maximum number of levels that a docker build will allow (not documented, seemingly by design: docker/docs#8230); I think basically the use of an individual command per library for the sake of caching is probably the culprit here, but will need to consult @nathanweeks before doing anything rash and ignorant here.
  • Loading branch information
adf-ncgr committed Apr 4, 2022
1 parent 1413f40 commit f25ada3
Showing 1 changed file with 8 additions and 36 deletions.
44 changes: 8 additions & 36 deletions db/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,17 @@ RUN mkblastdb Glycine/D3-tomentella/annotations/G1403.gnm1.ann1.XNZQ/glyd3.G1403
RUN mkblastdb Glycine/dolichocarpa/annotations/G1134.gnm1.ann1.4BJM/glydo.G1134.gnm1.ann1.4BJM.mrna.fna.gz nucl 82538 'Glycine dolichocarpa G1134 v1.1 mRNAs'
RUN mkblastdb Glycine/falcata/annotations/G1718.gnm1.ann1.2KSV/glyfa.G1718.gnm1.ann1.2KSV.mrna.fna.gz nucl 45690 'Glycine falcata G1718 v1.1 mRNAs'
RUN mkblastdb Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/glyma.Wm82.gnm4.ann1.T8TQ.mrna.fna.gz nucl 3847 'Glycine max Wm82 v4.1 mRNAs'
RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.mrna.fna.gz nucl 3848 'Glycine soja PI483463 v1.1 mRNAs'
#this is a hack to avoid hitting the parse_seqids limit of 50 which some genes with >9 isoforms just barely exceed; probably the better solution would be to deredundify our yuck and their yuck in the ids
RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.mrna_primary.fna.gz nucl 3848 'Glycine soja PI483463 v1.1 mRNAs'
RUN mkblastdb Glycine/stenophita/annotations/G1974.gnm1.ann1.F257/glyst.G1974.gnm1.ann1.F257.mrna.fna.gz nucl 96944 'Glycine stenophita G1974 v1.1 mRNAs'
RUN mkblastdb Glycine/syndetika/annotations/G1300.gnm1.ann1.RRK6/glysy.G1300.gnm1.ann1.RRK6.mrna.fna.gz nucl 713886 'Glycine syndetika G1300 v1.1 mRNAs'
RUN mkblastdb Lotus/japonicus/annotations/MG20.gnm3.ann1.WF9B/lotja.MG20.gnm3.ann1.WF9B.mrna.fna.gz nucl 34305 'Lotus japonicus MG20 v3.1 mRNAs'
RUN mkblastdb Lupinus/albus/annotations/Amiga.gnm1.ann1.3GKS/lupal.Amiga.gnm1.ann1.3GKS.mrna.fna.gz nucl 3870 'Lupinus albus Amiga v1.1 mRNAs'
RUN mkblastdb Lupinus/angustifolius/annotations/Tanjil.gnm1.ann1.nnV9/lupan.Tanjil.gnm1.ann1.nnV9.mrna.fna.gz nucl 3871 'Lupinus angustifolius Tanjil v1.1 mRNAs'
RUN mkblastdb Medicago/truncatula/annotations/A17.gnm5.ann1_6.L2RX/medtr.A17.gnm5.ann1_6.L2RX.mrna.fna.gz nucl 3880 'Medicago truncatula A17 v5.1.6 mRNAs'
RUN mkblastdb Medicago/sativa/annotations/XinJiangDaYe.gnm1.ann1.RKB9/medsa.XinJiangDaYe.gnm1.ann1.RKB9.mrna.fna.gz nucl 3879 'Medicago sativa XinJiangDaYe v1.1 mRNAs'
RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.mrna.fna.gz nucl 33129 'Phaseolus acutifolius Frijol Bayo v1.1 mRNAs'
#primary hack does not save this one
#RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.mrna_primary.fna.gz nucl 33129 'Phaseolus acutifolius Frijol Bayo v1.1 mRNAs'
RUN mkblastdb Phaseolus/lunatus/annotations/G27455.gnm1.ann1.JD7C/phalu.G27455.gnm1.ann1.JD7C.mrna.fna.gz nucl 3884 'Phaseolus lunatus G27455 v1.1 mRNAs'
RUN mkblastdb Phaseolus/vulgaris/annotations/G19833.gnm2.ann1.PB8d/phavu.G19833.gnm2.ann1.PB8d.mrna.fna.gz nucl 3885 'Phaseolus vulgaris G19833 v2.1 mRNAs'
RUN mkblastdb Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz nucl 3888 'Pisum sativum Cameor v1.1 mRNAs'
Expand All @@ -72,38 +74,6 @@ RUN mkblastdb Vigna/angularis/annotations/Gyeongwon.gnm3.ann1.3Nz5/vigan.Gyeongw
RUN mkblastdb Vigna/radiata/annotations/VC1973A.gnm6.ann1.M1Qs/vigra.VC1973A.gnm6.ann1.M1Qs.mrna.fna.gz nucl 157791 'Vigna radiata VC1973A v6.1 mRNAs'
RUN mkblastdb Vigna/unguiculata/annotations/IT97K-499-35.gnm1.ann2.FD7K/vigun.IT97K-499-35.gnm1.ann2.FD7K.mrna.fna.gz nucl 3917 'Vigna unguiculata IT97K-499-35 v1.2 mRNAs'

WORKDIR "/db/CDS_Sequence_Collection"
RUN mkblastdb Aeschynomene/evenia/annotations/CIAT22838.gnm1.ann1.ZM3R/aesev.CIAT22838.gnm1.ann1.ZM3R.cds.fna.gz nucl 561484 'Aeschynomene evenia CIAT22838 v1.1 CDSs'
RUN mkblastdb Arachis/duranensis/annotations/V14167.gnm1.ann1.cxSM/aradu.V14167.gnm1.ann1.cxSM.cds.fna.gz nucl 130453 'Arachis duranensis v1.1 CDSs'
RUN mkblastdb Arachis/hypogaea/annotations/Tifrunner.gnm2.ann1.4K0L/arahy.Tifrunner.gnm2.ann1.4K0L.cds.fna.gz nucl 3818 'Arachis hypogaea Tifrunner v2.1 CDSs'
RUN mkblastdb Arachis/ipaensis/annotations/K30076.gnm1.ann1.J37m/araip.K30076.gnm1.ann1.J37m.cds.fna.gz nucl 130454 'Arachis ipaensis K30076 v1.1 CDSs'
RUN mkblastdb Cajanus/cajan/annotations/ICPL87119.gnm1.ann1.Y27M/cajca.ICPL87119.gnm1.ann1.Y27M.cds.fna.gz nucl 3821 'Cajanus cajan ICPL87119 v1.1 CDSs'
RUN mkblastdb Cercis/canadensis/annotations/ISC453364.gnm1.ann1.HZJM/cerca.ISC453364.gnm1.ann1.B05Z.cds.fna.gz nucl 49801 'Cercis canadensis ISC453364 v1.1 CDSs'
RUN mkblastdb Chamaecrista/fasciculata/annotations/MN87.gnm1.ann1.LWFM/chafa.MN87.gnm1.ann1.LWFM.cds.fna.gz nucl 53854 'Chamaecrista fasciculata MN87 v1.1 pCDSs'
RUN mkblastdb Cicer/arietinum/annotations/CDCFrontier.gnm1.ann1.nRhs/cicar.CDCFrontier.gnm1.ann1.nRhs.cds.fna.gz nucl 3827 'Cicer arietinum CDCFrontier v1.1 CDSs'
RUN mkblastdb Faidherbia/albida/annotations/WAFC.gnm1.ann1.RTP9/faial.WAFC.gnm1.ann1.RTP9.cds.fna.gz nucl 138055 'Faidherbia albida WAFC v1.1 CDSs'
RUN mkblastdb Glycine/cyrtoloba/annotations/G1267.gnm1.ann1.HRFD/glycy.G1267.gnm1.ann1.HRFD.cds.fna.gz nucl 45689 'Glycine cyrtoloba G1267 v1.1 CDSs'
RUN mkblastdb Glycine/D3-tomentella/annotations/G1403.gnm1.ann1.XNZQ/glyd3.G1403.gnm1.ann1.XNZQ.cds.fna.gz nucl 2908013 'Glycine D3-tomentella G1403 v1.1 CDSs'
RUN mkblastdb Glycine/dolichocarpa/annotations/G1134.gnm1.ann1.4BJM/glydo.G1134.gnm1.ann1.4BJM.cds.fna.gz nucl 82538 'Glycine dolichocarpa G1134 v1.1 CDSs'
RUN mkblastdb Glycine/falcata/annotations/G1718.gnm1.ann1.2KSV/glyfa.G1718.gnm1.ann1.2KSV.cds.fna.gz nucl 45690 'Glycine falcata G1718 v1.1 CDSs'
RUN mkblastdb Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/glyma.Wm82.gnm4.ann1.T8TQ.cds.fna.gz nucl 3847 'Glycine max Wm82 v4.1 CDSs'
RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.cds.fna.gz nucl 3848 'Glycine soja PI483463 v1.1 CDSs'
RUN mkblastdb Glycine/stenophita/annotations/G1974.gnm1.ann1.F257/glyst.G1974.gnm1.ann1.F257.cds.fna.gz nucl 96944 'Glycine stenophita G1974 v1.1 CDSs'
RUN mkblastdb Glycine/syndetika/annotations/G1300.gnm1.ann1.RRK6/glysy.G1300.gnm1.ann1.RRK6.cds.fna.gz nucl 713886 'Glycine syndetika G1300 v1.1 CDSs'
RUN mkblastdb Lotus/japonicus/annotations/MG20.gnm3.ann1.WF9B/lotja.MG20.gnm3.ann1.WF9B.cds.fna.gz nucl 34305 'Lotus japonicus MG20 v3.1 CDSs'
RUN mkblastdb Lupinus/albus/annotations/Amiga.gnm1.ann1.3GKS/lupal.Amiga.gnm1.ann1.3GKS.cds.fna.gz nucl 3870 'Lupinus albus Amiga v1.1 CDSs'
RUN mkblastdb Lupinus/angustifolius/annotations/Tanjil.gnm1.ann1.nnV9/lupan.Tanjil.gnm1.ann1.nnV9.cds.fna.gz nucl 3871 'Lupinus angustifolius Tanjil v1.1 CDSs'
RUN mkblastdb Medicago/truncatula/annotations/A17.gnm5.ann1_6.L2RX/medtr.A17.gnm5.ann1_6.L2RX.cds.fna.gz nucl 3880 'Medicago truncatula A17 v5.1.6 CDSs'
RUN mkblastdb Medicago/sativa/annotations/XinJiangDaYe.gnm1.ann1.RKB9/medsa.XinJiangDaYe.gnm1.ann1.RKB9.cds.fna.gz nucl 3879 'Medicago sativa XinJiangDaYe v1.1 CDSs'
RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.cds.fna.gz nucl 33129 'Phaseolus acutifolius Frijol Bayo v1.1 CDSs'
RUN mkblastdb Phaseolus/lunatus/annotations/G27455.gnm1.ann1.JD7C/phalu.G27455.gnm1.ann1.JD7C.cds.fna.gz nucl 3884 'Phaseolus lunatus G27455 v1.1 CDSs'
RUN mkblastdb Phaseolus/vulgaris/annotations/G19833.gnm2.ann1.PB8d/phavu.G19833.gnm2.ann1.PB8d.cds.fna.gz nucl 3885 'Phaseolus vulgaris G19833 v2.1 CDSs'
RUN mkblastdb Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz nucl 3888 'Pisum sativum Cameor v1.1 CDSs'
RUN mkblastdb Trifolium/pratense/annotations/MilvusB.gnm2.ann1.DFgp/tripr.MilvusB.gnm2.ann1.DFgp.cds.fna.gz nucl 57577 'Trifolium pratense Milvus B v2.1 CDSs'
RUN mkblastdb Vigna/angularis/annotations/Gyeongwon.gnm3.ann1.3Nz5/vigan.Gyeongwon.gnm3.ann1.3Nz5.cds.fna.gz nucl 3914 'Vigna angularis Gyeongwon v3.1 CDSs'
RUN mkblastdb Vigna/radiata/annotations/VC1973A.gnm6.ann1.M1Qs/vigra.VC1973A.gnm6.ann1.M1Qs.cds.fna.gz nucl 157791 'Vigna radiata VC1973A v6.1 CDSs'
RUN mkblastdb Vigna/unguiculata/annotations/IT97K-499-35.gnm1.ann2.FD7K/vigun.IT97K-499-35.gnm1.ann2.FD7K.cds.fna.gz nucl 3917 'Vigna unguiculata IT97K-499-35 v1.2 CDSs'

WORKDIR "/db/Protein_Sequence_Collection"
RUN mkblastdb Aeschynomene/evenia/annotations/CIAT22838.gnm1.ann1.ZM3R/aesev.CIAT22838.gnm1.ann1.ZM3R.protein.faa.gz prot 561484 'Aeschynomene evenia CIAT22838 v1.1 proteins'
RUN mkblastdb Arachis/duranensis/annotations/V14167.gnm1.ann1.cxSM/aradu.V14167.gnm1.ann1.cxSM.protein.faa.gz prot 130453 'Arachis duranensis v1.1 proteins'
Expand All @@ -119,15 +89,17 @@ RUN mkblastdb Glycine/D3-tomentella/annotations/G1403.gnm1.ann1.XNZQ/glyd3.G1403
RUN mkblastdb Glycine/dolichocarpa/annotations/G1134.gnm1.ann1.4BJM/glydo.G1134.gnm1.ann1.4BJM.protein.faa.gz prot 82538 'Glycine dolichocarpa G1134 v1.1 proteins'
RUN mkblastdb Glycine/falcata/annotations/G1718.gnm1.ann1.2KSV/glyfa.G1718.gnm1.ann1.2KSV.protein.faa.gz prot 45690 'Glycine falcata G1718 v1.1 proteins'
RUN mkblastdb Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/glyma.Wm82.gnm4.ann1.T8TQ.protein.faa.gz prot 3847 'Glycine max Wm82 v4.1 proteins'
RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.protein.faa.gz prot 3848 'Glycine soja PI483463 v1.1 proteins'
#this is a hack to avoid hitting the parse_seqids limit of 50 which some genes with >9 isoforms just barely exceed; probably the better solution would be to deredundify our yuck and their yuck in the ids
RUN mkblastdb Glycine/soja/annotations/PI483463.gnm1.ann1.3Q3Q/glyso.PI483463.gnm1.ann1.3Q3Q.protein_primary.faa.gz prot 3848 'Glycine soja PI483463 v1.1 proteins'
RUN mkblastdb Glycine/stenophita/annotations/G1974.gnm1.ann1.F257/glyst.G1974.gnm1.ann1.F257.protein.faa.gz prot 96944 'Glycine stenophita G1974 v1.1 proteins'
RUN mkblastdb Glycine/syndetika/annotations/G1300.gnm1.ann1.RRK6/glysy.G1300.gnm1.ann1.RRK6.protein.faa.gz prot 713886 'Glycine syndetika G1300 v1.1 proteins'
RUN mkblastdb Lotus/japonicus/annotations/MG20.gnm3.ann1.WF9B/lotja.MG20.gnm3.ann1.WF9B.protein.faa.gz prot 34305 'Lotus japonicus MG20 v3.1 proteins'
RUN mkblastdb Lupinus/albus/annotations/Amiga.gnm1.ann1.3GKS/lupal.Amiga.gnm1.ann1.3GKS.protein.faa.gz prot 3870 'Lupinus albus Amiga v1.1 proteins'
RUN mkblastdb Lupinus/angustifolius/annotations/Tanjil.gnm1.ann1.nnV9/lupan.Tanjil.gnm1.ann1.nnV9.protein.faa.gz prot 3871 'Lupinus angustifolius Tanjil v1.1 proteins'
RUN mkblastdb Medicago/truncatula/annotations/A17.gnm5.ann1_6.L2RX/medtr.A17.gnm5.ann1_6.L2RX.protein.faa.gz prot 3880 'Medicago truncatula A17 v5.1.6 proteins'
RUN mkblastdb Medicago/sativa/annotations/XinJiangDaYe.gnm1.ann1.RKB9/medsa.XinJiangDaYe.gnm1.ann1.RKB9.protein.faa.gz prot 3879 'Medicago sativa XinJiangDaYe v1.1 proteins'
RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.protein.faa.gz prot 33129 'Phaseolus acutifolius Frijol Bayo v1.1 proteins'
#primary hack does not save this one
#RUN mkblastdb Phaseolus/acutifolius/annotations/Frijol_Bayo.gnm1.ann1.ML22/phaac.Frijol_Bayo.gnm1.ann1.ML22.protein_primary.faa.gz prot 33129 'Phaseolus acutifolius Frijol Bayo v1.1 proteins'
RUN mkblastdb Phaseolus/lunatus/annotations/G27455.gnm1.ann1.JD7C/phalu.G27455.gnm1.ann1.JD7C.protein.faa.gz prot 3884 'Phaseolus lunatus G27455 v1.1 proteins'
RUN mkblastdb Phaseolus/vulgaris/annotations/G19833.gnm2.ann1.PB8d/phavu.G19833.gnm2.ann1.PB8d.protein.faa.gz prot 3885 'Phaseolus vulgaris G19833 v2.1 proteins'
RUN mkblastdb Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz prot 3888 'Pisum sativum Cameor v1.1 proteins'
Expand Down

0 comments on commit f25ada3

Please sign in to comment.