diff --git a/endToEndTests/test/queries/aaMutDistribution_all.json b/endToEndTests/test/queries/aaMutDistribution_all.json index ee191f8c..32c47f13 100644 --- a/endToEndTests/test/queries/aaMutDistribution_all.json +++ b/endToEndTests/test/queries/aaMutDistribution_all.json @@ -3,120 +3,232 @@ "query": { "action": { "type": "AminoAcidMutations", - "minProportion": 0.4 + "minProportion": 0.4, + "orderByFields": ["position", "mutationFrom"] }, "filterExpression": { "type": "True" } }, "expectedQueryResult": [ - { - "count": 46, - "mutation": "R203K", - "mutationFrom": "R", - "mutationTo": "K", - "position": 203, - "proportion": 0.46, - "sequenceName": "N" - }, - { - "count": 46, - "mutation": "G204R", - "mutationFrom": "G", - "mutationTo": "R", - "position": 204, - "proportion": 0.46, - "sequenceName": "N" - }, - { - "count": 64, - "mutation": "T3255I", - "mutationFrom": "T", - "mutationTo": "I", - "position": 3255, - "proportion": 0.6464646464646465, - "sequenceName": "ORF1a" - }, - { - "count": 44, - "mutation": "S3675-", - "mutationFrom": "S", - "mutationTo": "-", - "position": 3675, - "proportion": 0.4489795918367347, - "sequenceName": "ORF1a" - }, - { - "count": 44, - "mutation": "G3676-", - "mutationFrom": "G", - "mutationTo": "-", - "position": 3676, - "proportion": 0.4444444444444444, - "sequenceName": "ORF1a" - }, - { - "count": 98, - "mutation": "P314L", - "mutationFrom": "P", - "mutationTo": "L", - "position": 314, - "proportion": 0.98, - "sequenceName": "ORF1b" - }, - { - "count": 37, - "mutation": "G142D", - "mutationFrom": "G", - "mutationTo": "D", - "position": 142, - "proportion": 0.4065934065934066, - "sequenceName": "S" - }, - { - "count": 38, - "mutation": "L452R", - "mutationFrom": "L", - "mutationTo": "R", - "position": 452, - "proportion": 0.4318181818181818, - "sequenceName": "S" - }, - { - "count": 69, - "mutation": "T478K", - "mutationFrom": "T", - "mutationTo": "K", - "position": 478, - "proportion": 0.7340425531914894, - "sequenceName": "S" - }, - { - "count": 41, - "mutation": "N501Y", - "mutationFrom": "N", - "mutationTo": "Y", - "position": 501, - "proportion": 0.44086021505376344, - "sequenceName": "S" - }, - { - "count": 98, - "mutation": "D614G", - "mutationFrom": "D", - "mutationTo": "G", - "position": 614, - "proportion": 0.98989898989899, - "sequenceName": "S" - }, - { - "count": 42, - "mutation": "P681H", - "mutationFrom": "P", - "mutationTo": "H", - "position": 681, - "proportion": 0.42, - "sequenceName": "S" - } + {"count":97,"mutation":"A2F","mutationFrom":"A","mutationTo":"F","position":2,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"D3V","mutationFrom":"D","mutationTo":"V","position":3,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"S4F","mutationFrom":"S","mutationTo":"F","position":4,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"N5L","mutationFrom":"N","mutationTo":"L","position":5,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":96,"mutation":"G6V","mutationFrom":"G","mutationTo":"V","position":6,"proportion":0.9795918367346939,"sequenceName":"M"}, + {"count":97,"mutation":"T7L","mutationFrom":"T","mutationTo":"L","position":7,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"I8L","mutationFrom":"I","mutationTo":"L","position":8,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"T9P","mutationFrom":"T","mutationTo":"P","position":9,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"V10L","mutationFrom":"V","mutationTo":"L","position":10,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"E11V","mutationFrom":"E","mutationTo":"V","position":11,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":96,"mutation":"E12S","mutationFrom":"E","mutationTo":"S","position":12,"proportion":0.9795918367346939,"sequenceName":"M"}, + {"count":96,"mutation":"L13S","mutationFrom":"L","mutationTo":"S","position":13,"proportion":0.9795918367346939,"sequenceName":"M"}, + {"count":97,"mutation":"K14Q","mutationFrom":"K","mutationTo":"Q","position":14,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"K15C","mutationFrom":"K","mutationTo":"C","position":15,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"L16V","mutationFrom":"L","mutationTo":"V","position":16,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"L17N","mutationFrom":"L","mutationTo":"N","position":17,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":93,"mutation":"E18L","mutationFrom":"E","mutationTo":"L","position":18,"proportion":0.9489795918367347,"sequenceName":"M"}, + {"count":44,"mutation":"Q19T","mutationFrom":"Q","mutationTo":"T","position":19,"proportion":0.4536082474226804,"sequenceName":"M"}, + {"count":95,"mutation":"W20T","mutationFrom":"W","mutationTo":"T","position":20,"proportion":0.9693877551020408,"sequenceName":"M"}, + {"count":96,"mutation":"N21R","mutationFrom":"N","mutationTo":"R","position":21,"proportion":0.9795918367346939,"sequenceName":"M"}, + {"count":97,"mutation":"L22T","mutationFrom":"L","mutationTo":"T","position":22,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"V23Q","mutationFrom":"V","mutationTo":"Q","position":23,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":81,"mutation":"I24L","mutationFrom":"I","mutationTo":"L","position":24,"proportion":0.826530612244898,"sequenceName":"M"}, + {"count":81,"mutation":"G25P","mutationFrom":"G","mutationTo":"P","position":25,"proportion":0.826530612244898,"sequenceName":"M"}, + {"count":79,"mutation":"F26P","mutationFrom":"F","mutationTo":"P","position":26,"proportion":0.8061224489795918,"sequenceName":"M"}, + {"count":81,"mutation":"L27A","mutationFrom":"L","mutationTo":"A","position":27,"proportion":0.826530612244898,"sequenceName":"M"}, + {"count":97,"mutation":"F28Y","mutationFrom":"F","mutationTo":"Y","position":28,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"L29T","mutationFrom":"L","mutationTo":"T","position":29,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":96,"mutation":"T30N","mutationFrom":"T","mutationTo":"N","position":30,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"W31S","mutationFrom":"W","mutationTo":"S","position":31,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"I32F","mutationFrom":"I","mutationTo":"F","position":32,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"C33T","mutationFrom":"C","mutationTo":"T","position":33,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"L34R","mutationFrom":"L","mutationTo":"R","position":34,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":97,"mutation":"L35G","mutationFrom":"L","mutationTo":"G","position":35,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":98,"mutation":"Q36V","mutationFrom":"Q","mutationTo":"V","position":36,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":98,"mutation":"F37Y","mutationFrom":"F","mutationTo":"Y","position":37,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":98,"mutation":"A38Y","mutationFrom":"A","mutationTo":"Y","position":38,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":98,"mutation":"Y39P","mutationFrom":"Y","mutationTo":"P","position":39,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":98,"mutation":"A40D","mutationFrom":"A","mutationTo":"D","position":40,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":99,"mutation":"N41K","mutationFrom":"N","mutationTo":"K","position":41,"proportion":0.99,"sequenceName":"M"}, + {"count":99,"mutation":"R42V","mutationFrom":"R","mutationTo":"V","position":42,"proportion":0.99,"sequenceName":"M"}, + {"count":99,"mutation":"N43F","mutationFrom":"N","mutationTo":"F","position":43,"proportion":0.99,"sequenceName":"M"}, + {"count":99,"mutation":"F45S","mutationFrom":"F","mutationTo":"S","position":45,"proportion":0.99,"sequenceName":"M"}, + {"count":98,"mutation":"L46S","mutationFrom":"L","mutationTo":"S","position":46,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":98,"mutation":"Y47V","mutationFrom":"Y","mutationTo":"V","position":47,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":98,"mutation":"I48L","mutationFrom":"I","mutationTo":"L","position":48,"proportion":0.98989898989899,"sequenceName":"M"}, + {"count":99,"mutation":"I49H","mutationFrom":"I","mutationTo":"H","position":49,"proportion":0.99,"sequenceName":"M"}, + {"count":99,"mutation":"K50S","mutationFrom":"K","mutationTo":"S","position":50,"proportion":0.99,"sequenceName":"M"}, + {"count":99,"mutation":"L51T","mutationFrom":"L","mutationTo":"T","position":51,"proportion":0.99,"sequenceName":"M"}, + {"count":95,"mutation":"I52Q","mutationFrom":"I","mutationTo":"Q","position":52,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":94,"mutation":"F53D","mutationFrom":"F","mutationTo":"D","position":53,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"W55F","mutationFrom":"W","mutationTo":"F","position":55,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":93,"mutation":"L57P","mutationFrom":"L","mutationTo":"P","position":57,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"W58F","mutationFrom":"W","mutationTo":"F","position":58,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":94,"mutation":"P59F","mutationFrom":"P","mutationTo":"F","position":59,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":93,"mutation":"V60S","mutationFrom":"V","mutationTo":"S","position":60,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"T61N","mutationFrom":"T","mutationTo":"N","position":61,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"L62V","mutationFrom":"L","mutationTo":"V","position":62,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"A63T","mutationFrom":"A","mutationTo":"T","position":63,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"C64W","mutationFrom":"C","mutationTo":"W","position":64,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"V66H","mutationFrom":"V","mutationTo":"H","position":66,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":76,"mutation":"L67A","mutationFrom":"L","mutationTo":"A","position":67,"proportion":0.8260869565217391,"sequenceName":"M"}, + {"count":92,"mutation":"A68I","mutationFrom":"A","mutationTo":"I","position":68,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":68,"mutation":"A69H","mutationFrom":"A","mutationTo":"H","position":69,"proportion":0.7311827956989247,"sequenceName":"M"}, + {"count":92,"mutation":"Y71S","mutationFrom":"Y","mutationTo":"S","position":71,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"R72G","mutationFrom":"R","mutationTo":"G","position":72,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"I73T","mutationFrom":"I","mutationTo":"T","position":73,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"W75G","mutationFrom":"W","mutationTo":"G","position":75,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"I76T","mutationFrom":"I","mutationTo":"T","position":76,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"T77K","mutationFrom":"T","mutationTo":"K","position":77,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"G78R","mutationFrom":"G","mutationTo":"R","position":78,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"G79F","mutationFrom":"G","mutationTo":"F","position":79,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"I80D","mutationFrom":"I","mutationTo":"D","position":80,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":91,"mutation":"A81N","mutationFrom":"A","mutationTo":"N","position":81,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":90,"mutation":"I82P","mutationFrom":"I","mutationTo":"P","position":82,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":91,"mutation":"A83V","mutationFrom":"A","mutationTo":"V","position":83,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":92,"mutation":"M84L","mutationFrom":"M","mutationTo":"L","position":84,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":91,"mutation":"A85P","mutationFrom":"A","mutationTo":"P","position":85,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":90,"mutation":"C86F","mutationFrom":"C","mutationTo":"F","position":86,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"L87N","mutationFrom":"L","mutationTo":"N","position":87,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"V88D","mutationFrom":"V","mutationTo":"D","position":88,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"L90V","mutationFrom":"L","mutationTo":"V","position":90,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":91,"mutation":"M91Y","mutationFrom":"M","mutationTo":"Y","position":91,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":91,"mutation":"W92F","mutationFrom":"W","mutationTo":"F","position":92,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":91,"mutation":"L93A","mutationFrom":"L","mutationTo":"A","position":93,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":65,"mutation":"Y95T","mutationFrom":"Y","mutationTo":"T","position":95,"proportion":0.7222222222222222,"sequenceName":"M"}, + {"count":91,"mutation":"F96E","mutationFrom":"F","mutationTo":"E","position":96,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":91,"mutation":"I97K","mutationFrom":"I","mutationTo":"K","position":97,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":92,"mutation":"A98S","mutationFrom":"A","mutationTo":"S","position":98,"proportion":0.9787234042553191,"sequenceName":"M"}, + {"count":93,"mutation":"S99N","mutationFrom":"S","mutationTo":"N","position":99,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"F100I","mutationFrom":"F","mutationTo":"I","position":100,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":92,"mutation":"R101I","mutationFrom":"R","mutationTo":"I","position":101,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":91,"mutation":"L102R","mutationFrom":"L","mutationTo":"R","position":102,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":90,"mutation":"F103G","mutationFrom":"F","mutationTo":"G","position":103,"proportion":0.9782608695652174,"sequenceName":"M"}, + {"count":91,"mutation":"A104W","mutationFrom":"A","mutationTo":"W","position":104,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":92,"mutation":"R105I","mutationFrom":"R","mutationTo":"I","position":105,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"T106F","mutationFrom":"T","mutationTo":"F","position":106,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"R107G","mutationFrom":"R","mutationTo":"G","position":107,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"S108T","mutationFrom":"S","mutationTo":"T","position":108,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":91,"mutation":"M109T","mutationFrom":"M","mutationTo":"T","position":109,"proportion":0.978494623655914,"sequenceName":"M"}, + {"count":91,"mutation":"W110L","mutationFrom":"W","mutationTo":"L","position":110,"proportion":0.978494623655914,"sequenceName":"M"}, + {"count":91,"mutation":"S111D","mutationFrom":"S","mutationTo":"D","position":111,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":92,"mutation":"F112S","mutationFrom":"F","mutationTo":"S","position":112,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"N113K","mutationFrom":"N","mutationTo":"K","position":113,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"P114T","mutationFrom":"P","mutationTo":"T","position":114,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"E115Q","mutationFrom":"E","mutationTo":"Q","position":115,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"T116S","mutationFrom":"T","mutationTo":"S","position":116,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"N117L","mutationFrom":"N","mutationTo":"L","position":117,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"I118L","mutationFrom":"I","mutationTo":"L","position":118,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"L119I","mutationFrom":"L","mutationTo":"I","position":119,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"L120V","mutationFrom":"L","mutationTo":"V","position":120,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":91,"mutation":"V122N","mutationFrom":"V","mutationTo":"N","position":122,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":90,"mutation":"P123A","mutationFrom":"P","mutationTo":"A","position":123,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":91,"mutation":"L124T","mutationFrom":"L","mutationTo":"T","position":124,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":90,"mutation":"H125N","mutationFrom":"H","mutationTo":"N","position":125,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":89,"mutation":"G126V","mutationFrom":"G","mutationTo":"V","position":126,"proportion":0.9888888888888889,"sequenceName":"M"}, + {"count":89,"mutation":"T127V","mutationFrom":"T","mutationTo":"V","position":127,"proportion":0.9888888888888889,"sequenceName":"M"}, + {"count":89,"mutation":"L129K","mutationFrom":"L","mutationTo":"K","position":129,"proportion":0.9888888888888889,"sequenceName":"M"}, + {"count":89,"mutation":"T130V","mutationFrom":"T","mutationTo":"V","position":130,"proportion":0.9888888888888889,"sequenceName":"M"}, + {"count":90,"mutation":"R131C","mutationFrom":"R","mutationTo":"C","position":131,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"P132E","mutationFrom":"P","mutationTo":"E","position":132,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"L133F","mutationFrom":"L","mutationTo":"F","position":133,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"L134Q","mutationFrom":"L","mutationTo":"Q","position":134,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":90,"mutation":"E135F","mutationFrom":"E","mutationTo":"F","position":135,"proportion":0.989010989010989,"sequenceName":"M"}, + {"count":91,"mutation":"S136C","mutationFrom":"S","mutationTo":"C","position":136,"proportion":0.9891304347826086,"sequenceName":"M"}, + {"count":92,"mutation":"E137N","mutationFrom":"E","mutationTo":"N","position":137,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":91,"mutation":"L138D","mutationFrom":"L","mutationTo":"D","position":138,"proportion":0.978494623655914,"sequenceName":"M"}, + {"count":92,"mutation":"V139P","mutationFrom":"V","mutationTo":"P","position":139,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"I140F","mutationFrom":"I","mutationTo":"F","position":140,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":92,"mutation":"G141L","mutationFrom":"G","mutationTo":"L","position":141,"proportion":0.989247311827957,"sequenceName":"M"}, + {"count":37,"mutation":"A142D","mutationFrom":"A","mutationTo":"D","position":142,"proportion":0.4065934065934066,"sequenceName":"M"}, + {"count":38,"mutation":"A142G","mutationFrom":"A","mutationTo":"G","position":142,"proportion":0.4175824175824176,"sequenceName":"M"}, + {"count":37,"mutation":"G142D","mutationFrom":"G","mutationTo":"D","position":142,"proportion":0.4065934065934066,"sequenceName":"S"}, + {"count":75,"mutation":"I144Y","mutationFrom":"I","mutationTo":"Y","position":144,"proportion":0.7653061224489796,"sequenceName":"M"}, + {"count":81,"mutation":"L145Y","mutationFrom":"L","mutationTo":"Y","position":145,"proportion":0.826530612244898,"sequenceName":"M"}, + {"count":97,"mutation":"R146H","mutationFrom":"R","mutationTo":"H","position":146,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"G147K","mutationFrom":"G","mutationTo":"K","position":147,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"H148N","mutationFrom":"H","mutationTo":"N","position":148,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"L149N","mutationFrom":"L","mutationTo":"N","position":149,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":97,"mutation":"R150K","mutationFrom":"R","mutationTo":"K","position":150,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":96,"mutation":"I151S","mutationFrom":"I","mutationTo":"S","position":151,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"A152W","mutationFrom":"A","mutationTo":"W","position":152,"proportion":0.9795918367346939,"sequenceName":"M"}, + {"count":95,"mutation":"G153M","mutationFrom":"G","mutationTo":"M","position":153,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"H154E","mutationFrom":"H","mutationTo":"E","position":154,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":94,"mutation":"H155S","mutationFrom":"H","mutationTo":"S","position":155,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":59,"mutation":"L156E","mutationFrom":"L","mutationTo":"E","position":156,"proportion":0.6276595744680851,"sequenceName":"M"}, + {"count":59,"mutation":"G157F","mutationFrom":"G","mutationTo":"F","position":157,"proportion":0.6276595744680851,"sequenceName":"M"}, + {"count":95,"mutation":"C159V","mutationFrom":"C","mutationTo":"V","position":159,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"D160Y","mutationFrom":"D","mutationTo":"Y","position":160,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"I161S","mutationFrom":"I","mutationTo":"S","position":161,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"K162S","mutationFrom":"K","mutationTo":"S","position":162,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"D163A","mutationFrom":"D","mutationTo":"A","position":163,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"L164N","mutationFrom":"L","mutationTo":"N","position":164,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"P165N","mutationFrom":"P","mutationTo":"N","position":165,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":94,"mutation":"K166C","mutationFrom":"K","mutationTo":"C","position":166,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"E167T","mutationFrom":"E","mutationTo":"T","position":167,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"I168F","mutationFrom":"I","mutationTo":"F","position":168,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"T169E","mutationFrom":"T","mutationTo":"E","position":169,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"V170Y","mutationFrom":"V","mutationTo":"Y","position":170,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"A171V","mutationFrom":"A","mutationTo":"V","position":171,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":93,"mutation":"T172S","mutationFrom":"T","mutationTo":"S","position":172,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":92,"mutation":"S173Q","mutationFrom":"S","mutationTo":"Q","position":173,"proportion":0.9787234042553191,"sequenceName":"M"}, + {"count":93,"mutation":"R174P","mutationFrom":"R","mutationTo":"P","position":174,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":93,"mutation":"T175F","mutationFrom":"T","mutationTo":"F","position":175,"proportion":0.9893617021276596,"sequenceName":"M"}, + {"count":94,"mutation":"S177M","mutationFrom":"S","mutationTo":"M","position":177,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":94,"mutation":"Y178D","mutationFrom":"Y","mutationTo":"D","position":178,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":93,"mutation":"Y179L","mutationFrom":"Y","mutationTo":"L","position":179,"proportion":0.9789473684210527,"sequenceName":"M"}, + {"count":94,"mutation":"K180E","mutationFrom":"K","mutationTo":"E","position":180,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":92,"mutation":"L181G","mutationFrom":"L","mutationTo":"G","position":181,"proportion":0.9583333333333334,"sequenceName":"M"}, + {"count":95,"mutation":"G182K","mutationFrom":"G","mutationTo":"K","position":182,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"A183Q","mutationFrom":"A","mutationTo":"Q","position":183,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"S184G","mutationFrom":"S","mutationTo":"G","position":184,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"Q185N","mutationFrom":"Q","mutationTo":"N","position":185,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"R186F","mutationFrom":"R","mutationTo":"F","position":186,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"V187K","mutationFrom":"V","mutationTo":"K","position":187,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"A188N","mutationFrom":"A","mutationTo":"N","position":188,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":94,"mutation":"G189L","mutationFrom":"G","mutationTo":"L","position":189,"proportion":0.9791666666666666,"sequenceName":"M"}, + {"count":93,"mutation":"D190R","mutationFrom":"D","mutationTo":"R","position":190,"proportion":0.96875,"sequenceName":"M"}, + {"count":95,"mutation":"S191E","mutationFrom":"S","mutationTo":"E","position":191,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"G192F","mutationFrom":"G","mutationTo":"F","position":192,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"F193V","mutationFrom":"F","mutationTo":"V","position":193,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"A194F","mutationFrom":"A","mutationTo":"F","position":194,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"A195K","mutationFrom":"A","mutationTo":"K","position":195,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"Y196N","mutationFrom":"Y","mutationTo":"N","position":196,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"S197I","mutationFrom":"S","mutationTo":"I","position":197,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":96,"mutation":"R198D","mutationFrom":"R","mutationTo":"D","position":198,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"Y199G","mutationFrom":"Y","mutationTo":"G","position":199,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"R200Y","mutationFrom":"R","mutationTo":"Y","position":200,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"I201F","mutationFrom":"I","mutationTo":"F","position":201,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"G202K","mutationFrom":"G","mutationTo":"K","position":202,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"N203I","mutationFrom":"N","mutationTo":"I","position":203,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":46,"mutation":"R203K","mutationFrom":"R","mutationTo":"K","position":203,"proportion":0.46,"sequenceName":"N"}, + {"count":46,"mutation":"G204R","mutationFrom":"G","mutationTo":"R","position":204,"proportion":0.46,"sequenceName":"N"}, + {"count":94,"mutation":"K205S","mutationFrom":"K","mutationTo":"S","position":205,"proportion":0.9894736842105263,"sequenceName":"M"}, + {"count":95,"mutation":"L206K","mutationFrom":"L","mutationTo":"K","position":206,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"N207H","mutationFrom":"N","mutationTo":"H","position":207,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"D209P","mutationFrom":"D","mutationTo":"P","position":209,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":95,"mutation":"H210I","mutationFrom":"H","mutationTo":"I","position":210,"proportion":0.9895833333333334,"sequenceName":"M"}, + {"count":81,"mutation":"S211N","mutationFrom":"S","mutationTo":"N","position":211,"proportion":0.84375,"sequenceName":"M"}, + {"count":81,"mutation":"S212L","mutationFrom":"S","mutationTo":"L","position":212,"proportion":0.84375,"sequenceName":"M"}, + {"count":78,"mutation":"S213V","mutationFrom":"S","mutationTo":"V","position":213,"proportion":0.8125,"sequenceName":"M"}, + {"count":96,"mutation":"S214R","mutationFrom":"S","mutationTo":"R","position":214,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"N216L","mutationFrom":"N","mutationTo":"L","position":216,"proportion":0.9795918367346939,"sequenceName":"M"}, + {"count":97,"mutation":"I217P","mutationFrom":"I","mutationTo":"P","position":217,"proportion":0.9897959183673469,"sequenceName":"M"}, + {"count":96,"mutation":"A218Q","mutationFrom":"A","mutationTo":"Q","position":218,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"L219G","mutationFrom":"L","mutationTo":"G","position":219,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":96,"mutation":"L220F","mutationFrom":"L","mutationTo":"F","position":220,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":95,"mutation":"V221S","mutationFrom":"V","mutationTo":"S","position":221,"proportion":0.979381443298969,"sequenceName":"M"}, + {"count":89,"mutation":"Q222A","mutationFrom":"Q","mutationTo":"A","position":222,"proportion":0.9175257731958762,"sequenceName":"M"}, + {"count":96,"mutation":"*223L","mutationFrom":"*","mutationTo":"L","position":223,"proportion":0.9896907216494846,"sequenceName":"M"}, + {"count":98,"mutation":"P314L","mutationFrom":"P","mutationTo":"L","position":314,"proportion":0.98,"sequenceName":"ORF1b"}, + {"count":38,"mutation":"L452R","mutationFrom":"L","mutationTo":"R","position":452,"proportion":0.4318181818181818,"sequenceName":"S"}, + {"count":69,"mutation":"T478K","mutationFrom":"T","mutationTo":"K","position":478,"proportion":0.7340425531914894,"sequenceName":"S"}, + {"count":41,"mutation":"N501Y","mutationFrom":"N","mutationTo":"Y","position":501,"proportion":0.44086021505376344,"sequenceName":"S"}, + {"count":98,"mutation":"D614G","mutationFrom":"D","mutationTo":"G","position":614,"proportion":0.98989898989899,"sequenceName":"S"}, + {"count":42,"mutation":"P681H","mutationFrom":"P","mutationTo":"H","position":681,"proportion":0.42,"sequenceName":"S"}, + {"count":64,"mutation":"T3255I","mutationFrom":"T","mutationTo":"I","position":3255,"proportion":0.6464646464646465,"sequenceName":"ORF1a"}, + {"count":44,"mutation":"S3675-","mutationFrom":"S","mutationTo":"-","position":3675,"proportion":0.4489795918367347,"sequenceName":"ORF1a"}, + {"count":44,"mutation":"G3676-","mutationFrom":"G","mutationTo":"-","position":3676,"proportion":0.4444444444444444,"sequenceName":"ORF1a"} ] } diff --git a/endToEndTests/test/queries/fasta_allTestSequences.json b/endToEndTests/test/queries/fasta_allTestSequences.json index 9f9de1ae..dc58b6bb 100644 --- a/endToEndTests/test/queries/fasta_allTestSequences.json +++ b/endToEndTests/test/queries/fasta_allTestSequences.json @@ -3,115 +3,113 @@ "query": { "action": { "type": "Fasta", - "sequenceName": "testSecondSequence" + "sequenceName": "testSecondSequence", + "orderByFields": [ "gisaid_epi_isl" ] }, "filterExpression": { "type": "True" } }, "expectedQueryResult": [ - { "gisaid_epi_isl": "EPI_ISL_3086369", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3247294", "testSecondSequence": null }, - { - "gisaid_epi_isl": "EPI_ISL_3259931", - "testSecondSequence": "JRZFHVKQIQGIVPUNJZCDKLOPDFTWZWXEXKZIHLGFWZNIGUAAPJBXPQCJBFUYHHIOPNDMTMHAFPHMZRCNUGIBRZCNKAJZMWXMBMPQRTZQUHTIFSOBXAQWMESDRWVJQWRE" - }, - { "gisaid_epi_isl": "EPI_ISL_3267832", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3465556", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3465732", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1119584", "testSecondSequence": "ACGN" }, - { "gisaid_epi_isl": "EPI_ISL_1750868", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2359636", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_737604", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_931279", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003849", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1131102", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1260480", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1273715", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_581968", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_721941", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_737860", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1001920", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003036", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003373", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003425", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003519", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_737715", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_830864", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_899725", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_899762", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_931031", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003010", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1036103", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1130868", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1750503", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1840634", "testSecondSequence": "ACGN" }, - { "gisaid_epi_isl": "EPI_ISL_2213804", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2360326", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3578231", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1003629", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1080536", "testSecondSequence": "ATGT" }, - { "gisaid_epi_isl": "EPI_ISL_1119315", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1129663", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1195052", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1273458", "testSecondSequence": "ANGT" }, - { "gisaid_epi_isl": "EPI_ISL_1360935", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1361468", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1407962", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1408062", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1408408", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1597890", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1597932", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1599113", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1747752", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1747885", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1748215", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1748243", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1748395", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1749892", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1749899", "testSecondSequence": "AAGN" }, - { "gisaid_epi_isl": "EPI_ISL_1749960", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1760534", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2016901", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2017036", "testSecondSequence": "ANGT" }, - { "gisaid_epi_isl": "EPI_ISL_2019235", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2019350", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2180023", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2180995", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2181005", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2213934", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2213984", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2214128", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2270139", "testSecondSequence": null }, - { "gisaid_epi_isl": "EPI_ISL_2307766", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2307888", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2308054", "testSecondSequence": null }, - { "gisaid_epi_isl": "EPI_ISL_2374969", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375097", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375165", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375247", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2375490", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2379651", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2405276", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2408472", "testSecondSequence": "AAGT" }, - { "gisaid_epi_isl": "EPI_ISL_2544226", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2544332", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2544452", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2574088", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3016465", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1001493", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1002156", "testSecondSequence": "ACGN" }, - { "gisaid_epi_isl": "EPI_ISL_1004495", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1005148", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1408805", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2086867", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3128796", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_3128811", "testSecondSequence": "ACGTACGT" }, - { "gisaid_epi_isl": "EPI_ISL_768148", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1002052", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_2367431", "testSecondSequence": "NCGT" }, - { "gisaid_epi_isl": "EPI_ISL_3128737", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_466942", "testSecondSequence": "ACGT" }, - { "gisaid_epi_isl": "EPI_ISL_1682849", "testSecondSequence": "ACGT" } + {"gisaid_epi_isl":"EPI_ISL_1001493","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1001920","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1002052","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1002156","testSecondSequence":"ACGN"}, + {"gisaid_epi_isl":"EPI_ISL_1003010","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1003036","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1003373","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1003425","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1003519","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1003629","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1003849","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1004495","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1005148","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1036103","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1080536","testSecondSequence":"ATGT"}, + {"gisaid_epi_isl":"EPI_ISL_1119315","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1119584","testSecondSequence":"ACGN"}, + {"gisaid_epi_isl":"EPI_ISL_1129663","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1130868","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1131102","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1195052","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1260480","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1273458","testSecondSequence":"ANGT"}, + {"gisaid_epi_isl":"EPI_ISL_1273715","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1360935","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1361468","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1407962","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1408062","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1408408","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1408805","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1597890","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1597932","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1599113","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1682849","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1747752","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1747885","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1748215","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1748243","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1748395","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1749892","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1749899","testSecondSequence":"AAGN"}, + {"gisaid_epi_isl":"EPI_ISL_1749960","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1750503","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1750868","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1760534","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_1840634","testSecondSequence":"ACGN"}, + {"gisaid_epi_isl":"EPI_ISL_2016901","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2017036","testSecondSequence":"ANGT"}, + {"gisaid_epi_isl":"EPI_ISL_2019235","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2019350","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2086867","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2180023","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2180995","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2181005","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2213804","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2213934","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2213984","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2214128","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2270139","testSecondSequence":null}, + {"gisaid_epi_isl":"EPI_ISL_2307766","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2307888","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2308054","testSecondSequence":null}, + {"gisaid_epi_isl":"EPI_ISL_2359636","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2360326","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2367431","testSecondSequence":"NCGT"}, + {"gisaid_epi_isl":"EPI_ISL_2374969","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2375097","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2375165","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2375247","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2375490","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2379651","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2405276","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2408472","testSecondSequence":"AAGT"}, + {"gisaid_epi_isl":"EPI_ISL_2544226","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2544332","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2544452","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_2574088","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3016465","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3086369","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3128737","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3128796","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3128811","testSecondSequence":"ACGTACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3247294","testSecondSequence":null}, + {"gisaid_epi_isl":"EPI_ISL_3259931","testSecondSequence":"JRZFHVKQIQGIVPUNJZCDKLOPDFTWZWXEXKZIHLGFWZNIGUAAPJBXPQCJBFUYHHIOPNDMTMHAFPHMZRCNUGIBRZCNKAJZMWXMBMPQRTZQUHTIFSOBXAQWMESDRWVJQWRE"}, + {"gisaid_epi_isl":"EPI_ISL_3267832","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3465556","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3465732","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_3578231","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_466942","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_581968","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_721941","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_737604","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_737715","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_737860","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_768148","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_830864","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_899725","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_899762","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_931031","testSecondSequence":"ACGT"}, + {"gisaid_epi_isl":"EPI_ISL_931279","testSecondSequence":"ACGT"} ] } diff --git a/endToEndTests/test/queries/recombinantLineageWithAlias.json b/endToEndTests/test/queries/recombinantLineageWithAlias.json index 6d11e8bb..dc406e64 100644 --- a/endToEndTests/test/queries/recombinantLineageWithAlias.json +++ b/endToEndTests/test/queries/recombinantLineageWithAlias.json @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 1 + "count": 10 } ] } diff --git a/include/silo/common/bidirectional_map.h b/include/silo/common/bidirectional_map.h index dd5e0539..88453acc 100644 --- a/include/silo/common/bidirectional_map.h +++ b/include/silo/common/bidirectional_map.h @@ -33,6 +33,10 @@ class BidirectionalMap { : id_to_value(), value_to_id() {} + BidirectionalMap(std::vector&& id_to_value, std::unordered_map&& value_to_id) + : id_to_value(id_to_value), + value_to_id(value_to_id) {} + BidirectionalMap(BidirectionalMap&& map) = default; BidirectionalMap& operator=(BidirectionalMap&& map) = default; diff --git a/include/silo/common/lineage_tree.h b/include/silo/common/lineage_tree.h index 9d5e7ed6..96381d2e 100644 --- a/include/silo/common/lineage_tree.h +++ b/include/silo/common/lineage_tree.h @@ -1,10 +1,14 @@ #pragma once +#include +#include #include #include #include "silo/common/bidirectional_map.h" +#include "silo/common/lineage_name.h" +#include "silo/common/types.h" #include "silo/preprocessing/lineage_definition_file.h" namespace silo::common { @@ -12,7 +16,7 @@ namespace silo::common { // The tree is allowed to be disconnected class LineageTree { friend class boost::serialization::access; - std::vector> parent_relation; + std::vector> parent_relation; template [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { @@ -30,7 +34,8 @@ class LineageTree { static LineageTree fromEdgeList( size_t n_vertices, - const std::vector>& edge_list + const std::vector>& edge_list, + const BidirectionalMap& lookup ); std::optional getParent(Idx value); @@ -43,12 +48,14 @@ class LineageTreeAndIdMap { // clang-format off archive & lineage_tree; archive & lineage_id_lookup_map; + archive & alias_mapping; // clang-format on } public: LineageTree lineage_tree; BidirectionalMap lineage_id_lookup_map; + std::unordered_map alias_mapping; LineageTreeAndIdMap() = default; LineageTreeAndIdMap(LineageTreeAndIdMap&& other) = default; @@ -65,10 +72,14 @@ class LineageTreeAndIdMap { private: LineageTreeAndIdMap( LineageTree&& lineage_tree, - BidirectionalMap&& lineage_id_lookup_map + BidirectionalMap&& lineage_id_lookup_map, + std::unordered_map&& alias_mapping ); }; -bool containsCycle(int n, const std::vector>& edges); +std::optional> containsCycle( + size_t number_of_vertices, + const std::vector>& edges +); } // namespace silo::common diff --git a/include/silo/preprocessing/lineage_definition_file.h b/include/silo/preprocessing/lineage_definition_file.h index debbdbaf..c3bb259a 100644 --- a/include/silo/preprocessing/lineage_definition_file.h +++ b/include/silo/preprocessing/lineage_definition_file.h @@ -5,18 +5,15 @@ #include -namespace silo::preprocessing { - -class LineageName { - public: - std::string string; +#include "silo/common/lineage_name.h" - bool operator==(const LineageName& other) const; -}; +namespace silo::preprocessing { +using silo::common::LineageName; class LineageDefinition { public: LineageName lineage_name; + std::vector aliases; std::vector parents; }; diff --git a/include/silo/storage/column/indexed_string_column.h b/include/silo/storage/column/indexed_string_column.h index 2098902f..98bf699f 100644 --- a/include/silo/storage/column/indexed_string_column.h +++ b/include/silo/storage/column/indexed_string_column.h @@ -30,17 +30,19 @@ class IndexedStringColumnPartition { // clang-format off archive & value_ids; archive & indexed_values; - archive & lineage_index; // clang-format on } std::vector value_ids; std::unordered_map indexed_values; - common::BidirectionalMap& lookup; - std::optional lineage_index; + common::BidirectionalMap* lookup; + std::optional* lineage_index; public: - explicit IndexedStringColumnPartition(common::BidirectionalMap& lookup); + explicit IndexedStringColumnPartition( + common::BidirectionalMap* lookup, + std::optional* lineage_index + ); [[nodiscard]] std::optional filter(silo::Idx value_id) const; @@ -54,7 +56,7 @@ class IndexedStringColumnPartition { [[nodiscard]] const std::vector& getValues() const; - [[nodiscard]] inline std::string lookupValue(Idx id) const { return lookup.getValue(id); } + [[nodiscard]] inline std::string lookupValue(Idx id) const { return lookup->getValue(id); } [[nodiscard]] std::optional getValueId(const std::string& value) const; @@ -67,19 +69,21 @@ class IndexedStringColumn { template [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off - archive & *lookup; + archive & lookup; + archive & lineage_index; // clang-format on } - std::unique_ptr> lookup; + common::BidirectionalMap lookup; + std::optional lineage_index; std::deque partitions; public: IndexedStringColumn(); - IndexedStringColumnPartition& createPartition(); + IndexedStringColumn(const common::LineageTreeAndIdMap& lineage_tree); - void generateLineageIndex(const common::LineageTreeAndIdMap& lineage_tree); + IndexedStringColumnPartition& createPartition(); }; } // namespace silo::storage::column diff --git a/include/silo/storage/lineage_index.h b/include/silo/storage/lineage_index.h index 2b875b71..6981d30e 100644 --- a/include/silo/storage/lineage_index.h +++ b/include/silo/storage/lineage_index.h @@ -1,7 +1,10 @@ #pragma once +#include #include +#include +#include #include #include "silo/common/bidirectional_map.h" @@ -14,23 +17,30 @@ class LineageIndex { friend class boost::serialization::access; common::LineageTree lineage_tree; - std::unordered_map index; + std::unordered_map alias_mapping; + std::unordered_map index_including_sublineages; + std::unordered_map index_excluding_sublineages; template [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off - archive & index; + archive & lineage_tree; + archive & alias_mapping; + archive & index_including_sublineages; + archive & index_excluding_sublineages; // clang-format on } public: - LineageIndex(common::LineageTree lineage_tree); + LineageIndex(common::LineageTree lineage_tree, std::unordered_map alias_mapping); LineageIndex() = default; - void insert(size_t row_id, Idx value); + void insert(size_t row_id, Idx value_id); std::optional filterIncludingSublineages(Idx value_id) const; + + std::optional filterExcludingSublineages(Idx value_id) const; }; } // namespace silo::storage diff --git a/src/silo/common/lineage_tree.cpp b/src/silo/common/lineage_tree.cpp index 552ffb4f..524d8e1d 100644 --- a/src/silo/common/lineage_tree.cpp +++ b/src/silo/common/lineage_tree.cpp @@ -1,6 +1,8 @@ #include "silo/common/lineage_tree.h" #include +#include +#include #include "silo/common/panic.h" #include "silo/preprocessing/preprocessing_exception.h" @@ -11,129 +13,207 @@ using silo::preprocessing::LineageName; namespace { -class UnionFind { - public: - std::vector parent; - std::vector rank; - - explicit UnionFind(size_t n) { - parent.resize(n); - rank.resize(n, 0); - for (uint32_t i = 0; i < n; ++i) { - parent[i] = i; +std::string edgesToString( + const std::vector& ids, + const BidirectionalMap& lookup +) { + std::ostringstream oss; + for (size_t i = 0; i < ids.size(); ++i) { + oss << lookup.getValue(ids[i]); + if (i != ids.size() - 1) { + oss << " -> "; } } + return oss.str(); +} - int find(uint32_t u) { - if (u != parent[u]) { - parent[u] = find(parent[u]); - } - return parent[u]; - } +class Graph { + size_t number_of_vertices; + std::vector> adjacency_list; - bool unionSets(int u, int v) { - int rootU = find(u); - int rootV = find(v); + bool isCyclicUtil( + Idx current_vertex, + std::vector& visited, + std::vector& recursion_stack, + std::vector& stack_trace + ) const; - if (rootU == rootV) - return false; + public: + std::optional> getCycle() const; - if (rank[rootU] > rank[rootV]) { - parent[rootV] = rootU; - } else if (rank[rootU] < rank[rootV]) { - parent[rootU] = rootV; - } else { - parent[rootV] = rootU; - rank[rootU]++; - } - return true; - } + Graph(size_t number_of_vertices); + void addEdge(Idx u, Idx v); }; -} // namespace +Graph::Graph(size_t number_of_vertices) + : number_of_vertices(number_of_vertices), + adjacency_list(number_of_vertices) {} -bool containsCycle(int n, const std::vector>& edges) { - UnionFind union_find(n); - for (const auto& [vertex1, vertex2] : edges) { - if (!union_find.unionSets(vertex1, vertex2)) { +void Graph::addEdge(Idx u, Idx v) { + ASSERT_LT(u, number_of_vertices); + ASSERT_LT(v, number_of_vertices); + adjacency_list.at(u).emplace_back(v); +} + +bool Graph::isCyclicUtil( + Idx current_vertex, + std::vector& visited, + std::vector& recursion_stack, + std::vector& stack_trace +) const { + // Mark the current node as visited and add to recursion stack + visited[current_vertex] = true; + recursion_stack[current_vertex] = true; + stack_trace.emplace_back(current_vertex); + + // Recur for all vertices adjacent to this vertex + for (int neighbor : adjacency_list[current_vertex]) { + if (!visited[neighbor] && isCyclicUtil(neighbor, visited, recursion_stack, stack_trace)) { + return true; + } else if (recursion_stack[neighbor]) { + stack_trace.emplace_back(neighbor); return true; } } + + recursion_stack[current_vertex] = false; + stack_trace.pop_back(); return false; } +std::optional> Graph::getCycle() const { + std::vector visited(number_of_vertices, false); + std::vector recursion_stack(number_of_vertices, false); + std::vector stack_trace; + + for (int i = 0; i < number_of_vertices; i++) { + if (!visited[i]) { + if (isCyclicUtil(i, visited, recursion_stack, stack_trace)) { + // We found a witness lasso of the form 1 -> 2 -> 3 -> 4 -> 5 -> 3 + // We need to remove leading vertices up until the cycle + Idx cycle_node = stack_trace.back(); + auto cycle_node_first_occurrence = + std::find(stack_trace.begin(), stack_trace.end(), cycle_node); + ASSERT(cycle_node_first_occurrence < stack_trace.end()); + stack_trace.erase(stack_trace.begin(), cycle_node_first_occurrence); + return stack_trace; + } + ASSERT(stack_trace.empty()); + } + } + return std::nullopt; +} + +} // namespace + +std::optional> containsCycle( + size_t number_of_vertices, + const std::vector>& edges +) { + Graph graph(number_of_vertices); + for (const auto& [from, to] : edges) { + graph.addEdge(from, to); + } + return graph.getCycle(); +} + std::optional LineageTree::getParent(silo::Idx value) { - return parent_relation.at(value); + // TODO(#589) Recombinant lineage not yet supported -> do not follow their edges + if (parent_relation.at(value).size() != 1) { + return std::nullopt; + } + return parent_relation.at(value).at(0); } LineageTree LineageTree::fromEdgeList( size_t n_vertices, - const std::vector>& edge_list + const std::vector>& edge_list, + const BidirectionalMap& lookup ) { - if (containsCycle(n_vertices, edge_list)) { - throw preprocessing::PreprocessingException("The given LineageTree contains a cycle."); + if (auto cycle = containsCycle(n_vertices, edge_list)) { + throw preprocessing::PreprocessingException(fmt::format( + "The given LineageTree contains the cycle: {}", edgesToString(cycle.value(), lookup) + )); } LineageTree result; result.parent_relation.resize(n_vertices); - for (const auto& [vertex_id, parent] : edge_list) { - if (result.parent_relation.at(vertex_id).has_value()) { - PANIC( - "Implementation error. Recombinant trees not supported, but encountered a recombinant " - "entry in edge list." - ); - } - result.parent_relation.at(vertex_id) = parent; + for (const auto& [parent_id, vertex_id] : edge_list) { + result.parent_relation.at(vertex_id).emplace_back(parent_id); } return result; } LineageTreeAndIdMap::LineageTreeAndIdMap(const LineageTreeAndIdMap& other) : lineage_tree(other.lineage_tree), - lineage_id_lookup_map(other.lineage_id_lookup_map.copy()) {} + lineage_id_lookup_map(other.lineage_id_lookup_map.copy()), + alias_mapping(other.alias_mapping) {} LineageTreeAndIdMap& LineageTreeAndIdMap::operator=(const LineageTreeAndIdMap& other) { lineage_tree = other.lineage_tree; lineage_id_lookup_map = other.lineage_id_lookup_map.copy(); + alias_mapping = other.alias_mapping; return *this; } LineageTreeAndIdMap::LineageTreeAndIdMap( LineageTree&& lineage_tree, - BidirectionalMap&& lineage_id_lookup_map + BidirectionalMap&& lineage_id_lookup_map, + std::unordered_map&& alias_mapping ) : lineage_tree(std::move(lineage_tree)), - lineage_id_lookup_map(std::move(lineage_id_lookup_map)) {} + lineage_id_lookup_map(std::move(lineage_id_lookup_map)), + alias_mapping(std::move(alias_mapping)) {} LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFile( const preprocessing::LineageDefinitionFile& file ) { BidirectionalMap lookup; + std::unordered_map alias_mapping; for (const auto& lineage : file.lineages) { + if (lookup.getId(lineage.lineage_name.string).has_value()) { + throw silo::preprocessing::PreprocessingException(fmt::format( + "The lineage definitions contain the duplicate lineage '{}'", lineage.lineage_name + )); + } lookup.getOrCreateId(lineage.lineage_name.string); } + for (const auto& lineage : file.lineages) { + auto lineage_id = lookup.getId(lineage.lineage_name.string).value(); + for (const auto& alias : lineage.aliases) { + if (lookup.getId(alias.string).has_value()) { + throw silo::preprocessing::PreprocessingException(fmt::format( + "The alias '{}' for lineage '{}' is already defined as a lineage or another alias.", + alias, + lineage.lineage_name + )); + } + auto alias_id = lookup.getOrCreateId(alias.string); + alias_mapping[alias_id] = lineage_id; + } + } std::vector> edge_list; for (const auto& lineage : file.lineages) { const Idx my_id = lookup.getId(lineage.lineage_name.string).value(); + ASSERT(!alias_mapping.contains(my_id)); - if (lineage.parents.size() > 1) { - // TODO(#589) Recombinant lineage not yet supported - continue; - } - if (lineage.parents.size() == 1) { - const LineageName parent_lineage = lineage.parents.at(0); + for (const auto& parent_lineage : lineage.parents) { auto parent_id = lookup.getId(parent_lineage.string); if (!parent_id.has_value()) { throw preprocessing::PreprocessingException(fmt::format( "The lineage '{}' which is specified as the parent of vertex '{}' does not have a " "definition itself.", - parent_lineage.string, - lineage.lineage_name.string + parent_lineage, + lineage.lineage_name )); } - edge_list.emplace_back(my_id, parent_id.value()); + if (alias_mapping.contains(parent_id.value())) { + parent_id = alias_mapping.at(parent_id.value()); + } + edge_list.emplace_back(parent_id.value(), my_id); } } - return {LineageTree::fromEdgeList(file.lineages.size(), edge_list), std::move(lookup)}; + auto lineage_tree = LineageTree::fromEdgeList(file.lineages.size(), edge_list, lookup); + return {std::move(lineage_tree), std::move(lookup), std::move(alias_mapping)}; } LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFilePath( diff --git a/src/silo/common/lineage_tree.test.cpp b/src/silo/common/lineage_tree.test.cpp index 6deacb2d..a01baaf3 100644 --- a/src/silo/common/lineage_tree.test.cpp +++ b/src/silo/common/lineage_tree.test.cpp @@ -94,7 +94,7 @@ TEST(LineageTreeAndIdMap, correctTreeRelations) { ASSERT_EQ(lineage_tree.lineage_tree.getParent(base), std::nullopt); } -TEST(LineageTreeAndIdMap, correctCycleInFile) { +TEST(LineageTreeAndIdMap, correctCycleErrorInFile) { auto throwing_lambda = []() { LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAML(R"( BASE: @@ -111,7 +111,164 @@ TEST(LineageTreeAndIdMap, correctCycleInFile) { try { throwing_lambda(); } catch (const silo::preprocessing::PreprocessingException& e) { - ASSERT_EQ(std::string(e.what()), "The given LineageTree contains a cycle."); + ASSERT_EQ( + std::string(e.what()), + "The given LineageTree contains the cycle: BASE -> CHILD -> BASE" + ); + throw; + } + }, + silo::preprocessing::PreprocessingException + ); +} + +TEST(LineageTreeAndIdMap, correctSelfCycleErrorInFile) { + auto throwing_lambda = []() { + LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAML(R"( +BASE: + parents: + - BASE +CHILD: + parents: + - BASE +)")); + }; + + EXPECT_THROW( + { + try { + throwing_lambda(); + } catch (const silo::preprocessing::PreprocessingException& e) { + ASSERT_EQ( + std::string(e.what()), "The given LineageTree contains the cycle: BASE -> BASE" + ); + throw; + } + }, + silo::preprocessing::PreprocessingException + ); +} + +TEST(LineageTreeAndIdMap, correctLassoCycleErrorInFile) { + auto throwing_lambda = []() { + LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAML(R"( +BASE: {} +CHILD1: + parents: + - BASE + - CHILD3 +CHILD2: + parents: + - CHILD1 +CHILD3: + parents: + - CHILD2 +)")); + }; + + EXPECT_THROW( + { + try { + throwing_lambda(); + } catch (const silo::preprocessing::PreprocessingException& e) { + ASSERT_EQ( + std::string(e.what()), + "The given LineageTree contains the cycle: CHILD1 -> CHILD2 -> CHILD3 -> CHILD1" + ); + throw; + } + }, + silo::preprocessing::PreprocessingException + ); +} + +TEST(LineageDefinitionFile, errorOnDuplicateKey) { + auto throwing_lambda = []() { + LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAML(R"( +some_duplicate_lineage: + parents: + - some_other_key +some_other_key: +some_duplicate_lineage: + parents: + - some_other_key)")); + }; + + EXPECT_THROW( + { + try { + throwing_lambda(); + } catch (const silo::preprocessing::PreprocessingException& e) { + ASSERT_EQ( + std::string(e.what()), + "The lineage definitions contain the duplicate lineage 'some_duplicate_lineage'" + ); + throw; + } + }, + silo::preprocessing::PreprocessingException + ); +} + +TEST(LineageDefinitionFile, errorOnDuplicateAlias) { + auto throwing_lambda = []() { + LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAML(R"( +lineage1: + aliases: + - duplicate_alias + parents: + - some_other_key +lineage2: +lineage3: + aliases: + - duplicate_alias + parents: + - some_other_key)")); + }; + + EXPECT_THROW( + { + try { + throwing_lambda(); + } catch (const silo::preprocessing::PreprocessingException& e) { + ASSERT_EQ( + std::string(e.what()), + "The alias 'duplicate_alias' for lineage 'lineage3' is already defined as a lineage " + "or another alias." + ); + throw; + } + }, + silo::preprocessing::PreprocessingException + ); +} + +TEST(LineageDefinitionFile, errorOnLineageAsAlias) { + auto throwing_lambda = []() { + LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAML(R"( +lineage1: + aliases: + - some_alias + parents: + - some_other_key +lineage2_also_used_as_alias: +lineage3: + aliases: + - lineage2_also_used_as_alias + parents: + - some_other_key)")); + }; + + EXPECT_THROW( + { + try { + throwing_lambda(); + } catch (const silo::preprocessing::PreprocessingException& e) { + ASSERT_EQ( + std::string(e.what()), + "The alias 'lineage2_also_used_as_alias' for lineage 'lineage3' is already defined " + "as a lineage or another alias." + ); throw; } }, @@ -142,15 +299,18 @@ TEST(containsCycle, findsCycles) { ASSERT_TRUE(silo::common::containsCycle(4, {{0, 1}, {1, 0}, {2, 3}, {3, 2}})); // 8. Tree structure with additional edge forming a cycle - ASSERT_TRUE(silo::common::containsCycle(6, {{0, 1}, {0, 2}, {1, 3}, {1, 4}, {3, 5}, {4, 5}})); + ASSERT_TRUE(silo::common::containsCycle(6, {{0, 1}, {0, 2}, {1, 3}, {4, 1}, {3, 5}, {5, 4}})); // 9. Fully connected graph of 4 nodes (cycle present) - ASSERT_TRUE(silo::common::containsCycle(4, {{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}})); + ASSERT_TRUE(silo::common::containsCycle(4, {{0, 1}, {2, 0}, {0, 3}, {1, 2}, {3, 1}, {2, 3}})); // 10. Graph with multiple isolated cycles ASSERT_TRUE( silo::common::containsCycle(8, {{0, 1}, {1, 2}, {2, 0}, {3, 4}, {4, 5}, {5, 3}, {6, 7}}) ); + + // 11. Single node with a self-loop (directed cycle) + ASSERT_TRUE(silo::common::containsCycle(1, {{0, 0}})); } TEST(containsCycle, correctTrees) { @@ -181,6 +341,14 @@ TEST(containsCycle, correctTrees) { // 9. Two disconnected nodes ASSERT_FALSE(silo::common::containsCycle(2, {})); - // 10. Chain of 5 nodes (no cycle) + // 10. Chain of 5 nodes ASSERT_FALSE(silo::common::containsCycle(5, {{0, 1}, {1, 2}, {2, 3}, {3, 4}})); } + +TEST(containsCycle, correctDirectedAcyclicGraphs) { + // 1. Undirected lasso, but no directed cycle + ASSERT_FALSE(silo::common::containsCycle(6, {{0, 1}, {0, 2}, {1, 3}, {1, 4}, {3, 5}, {4, 5}})); + + // 2. Chain of 5 nodes and first to last shortcut + ASSERT_FALSE(silo::common::containsCycle(5, {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {0, 4}})); +} \ No newline at end of file diff --git a/src/silo/database.cpp b/src/silo/database.cpp index 82df190a..b20c0fbc 100644 --- a/src/silo/database.cpp +++ b/src/silo/database.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -567,16 +568,11 @@ Database Database::loadDatabaseState(const std::filesystem::path& save_directory } } - tbb::parallel_for( - tbb::blocked_range(0, database.partitions.size()), - [&](const auto& local) { - for (size_t partition_index = local.begin(); partition_index != local.end(); - ++partition_index) { - ::boost::archive::binary_iarchive input_archive(file_vec[partition_index]); - database.partitions[partition_index].serializeData(input_archive, 0); - } - } - ); + for (size_t partition_index = 0; partition_index < database.partitions.size(); + ++partition_index) { + ::boost::archive::binary_iarchive input_archive(file_vec[partition_index]); + database.partitions[partition_index].serializeData(input_archive, 0); + } SPDLOG_INFO("Finished loading partition data"); database.setDataVersion(loadDataVersion(save_directory / "data_version.silo")); @@ -602,15 +598,17 @@ void Database::initializeColumn(const config::DatabaseMetadata& metadata) { } return; case config::ColumnType::INDEXED_STRING: { - auto column = storage::column::IndexedStringColumn(); - columns.indexed_string_columns.emplace(name, std::move(column)); + if (metadata.lineage_index) { + auto column = storage::column::IndexedStringColumn(lineage_tree); + columns.indexed_string_columns.emplace(name, std::move(column)); + } else { + auto column = storage::column::IndexedStringColumn(); + columns.indexed_string_columns.emplace(name, std::move(column)); + } for (auto& partition : partitions) { partition.columns.metadata.push_back({name, column_type}); partition.insertColumn(name, columns.indexed_string_columns.at(name).createPartition()); } - if (metadata.lineage_index) { - column.generateLineageIndex(lineage_tree); - } } return; case config::ColumnType::DATE: { diff --git a/src/silo/preprocessing/lineage_definition_file.cpp b/src/silo/preprocessing/lineage_definition_file.cpp index a74c4f0f..0d302014 100644 --- a/src/silo/preprocessing/lineage_definition_file.cpp +++ b/src/silo/preprocessing/lineage_definition_file.cpp @@ -1,27 +1,16 @@ #include "silo/preprocessing/lineage_definition_file.h" #include -#include #include #include "silo/common/panic.h" #include "silo/preprocessing/preprocessing_exception.h" -namespace std { -using silo::preprocessing::LineageName; -template <> -struct hash { - std::size_t operator()(const LineageName& ln) const { - return std::hash()(ln.string); - } -}; -} // namespace std - namespace YAML { +using silo::common::LineageName; using silo::preprocessing::LineageDefinition; using silo::preprocessing::LineageDefinitionFile; -using silo::preprocessing::LineageName; template <> struct convert { @@ -42,50 +31,62 @@ struct convert { } }; +namespace { + +LineageDefinition entryToLineageDefinition(const YAML::detail::iterator_value& entry) { + auto lineage_name = entry.first.as(); + if (entry.second.IsNull()) { + return {lineage_name, std::vector{}, std::vector{}}; + } + if (!entry.second.IsMap()) { + throw silo::preprocessing::PreprocessingException(fmt::format( + "The lineage '{}' is not defined as a valid YAML Map in its definition: {}", + lineage_name, + YAML::Dump(entry.second) + )); + } + if (std::ranges::any_of(entry.second, [](const auto& element) { + const std::string field_name = element.first.template as(); + return field_name != "parents" && field_name != "aliases"; + })) { + throw silo::preprocessing::PreprocessingException(fmt::format( + "The definition of lineage '{}' may only contain the fields 'parents' and 'aliases', it " + "also contains invalid fields:\n{}", + lineage_name, + YAML::Dump(entry.second) + )); + } + std::vector parents; + if (entry.second["parents"]) { + if (!entry.second["parents"].IsSequence()) { + throw silo::preprocessing::PreprocessingException(fmt::format( + "The parents of lineage '{}' are not defined as a YAML Sequence", lineage_name + )); + } + parents = entry.second["parents"].as>(); + } + std::vector aliases; + if (entry.second["aliases"]) { + if (!entry.second["aliases"].IsSequence()) { + throw silo::preprocessing::PreprocessingException(fmt::format( + "The aliases of lineage '{}' are not defined as a YAML Sequence", lineage_name + )); + } + aliases = entry.second["aliases"].as>(); + } + return {lineage_name, aliases, parents}; +} +} // namespace + template <> struct convert { - static bool decode(const Node& node, LineageDefinitionFile& lineage_definition) { + static bool decode(const Node& node, LineageDefinitionFile& lineage_definition_file) { std::vector lineage_definitions; - std::unordered_set unique_lineage_definitions; for (const auto& entry : node) { - auto lineage_name = entry.first.as(); - if (unique_lineage_definitions.contains(lineage_name)) { - throw silo::preprocessing::PreprocessingException(fmt::format( - "The lineage definitions contain the duplicate lineage '{}'", lineage_name.string - )); - } - if (!entry.second.IsMap()) { - throw silo::preprocessing::PreprocessingException(fmt::format( - "The lineage '{}' is not defined as a valid YAML Map in its definition: {}", - lineage_name.string, - YAML::Dump(entry.second) - )); - } - if (!entry.second["parents"]) { - throw silo::preprocessing::PreprocessingException(fmt::format( - "The lineage '{}' does not contain the field 'parents'", lineage_name.string - )); - } - if (!entry.second["parents"].IsSequence()) { - throw silo::preprocessing::PreprocessingException(fmt::format( - "The parents of lineage '{}' are not defined as a YAML Sequence", lineage_name.string - )); - } - if (std::ranges::any_of(entry.second, [](const auto& element) { - return element.first.template as() != "parents"; - })) { - throw silo::preprocessing::PreprocessingException(fmt::format( - "The definition of lineage '{}' contains the invalid fields (only 'parents' is " - "allowed): {}", - lineage_name.string, - YAML::Dump(entry.second) - )); - } - unique_lineage_definitions.emplace(lineage_name); - auto parents = entry.second["parents"].as>(); - lineage_definitions.emplace_back(lineage_name, parents); + const auto lineage_definition = entryToLineageDefinition(entry); + lineage_definitions.emplace_back(std::move(lineage_definition)); } - lineage_definition = LineageDefinitionFile{lineage_definitions}; + lineage_definition_file = LineageDefinitionFile{lineage_definitions}; return true; } }; @@ -93,10 +94,6 @@ struct convert { namespace silo::preprocessing { -bool LineageName::operator==(const LineageName& other) const { - return string == other.string; -} - LineageDefinitionFile LineageDefinitionFile::fromYAMLFile(const std::filesystem::path& yaml_path) { const std::ifstream file(yaml_path, std::ios::in | std::ios::binary); if (!file) { diff --git a/src/silo/preprocessing/lineage_definition_file.test.cpp b/src/silo/preprocessing/lineage_definition_file.test.cpp index 13747330..73c93a49 100644 --- a/src/silo/preprocessing/lineage_definition_file.test.cpp +++ b/src/silo/preprocessing/lineage_definition_file.test.cpp @@ -34,33 +34,6 @@ SOME_lineage: " ); } -TEST(LineageDefinitionFile, errorOnDuplicateKey) { - auto throwing_lambda = []() { - LineageDefinitionFile::fromYAML(R"( -some_duplicate_lineage: - parents: - - anything -some_duplicate_lineage: - parents: - - also_anything)"); - }; - - EXPECT_THROW( - { - try { - throwing_lambda(); - } catch (const silo::preprocessing::PreprocessingException& e) { - ASSERT_EQ( - std::string(e.what()), - "The lineage definitions contain the duplicate lineage 'some_duplicate_lineage'" - ); - throw; - } - }, - silo::preprocessing::PreprocessingException - ); -} - TEST(LineageDefinitionFile, errorOnMisspelledParents) { auto throwing_lambda = []() { LineageDefinitionFile::fromYAML(R"( @@ -79,7 +52,9 @@ TEST(LineageDefinitionFile, errorOnMisspelledParents) { } catch (const silo::preprocessing::PreprocessingException& e) { ASSERT_EQ( std::string(e.what()), - "The lineage 'some_lineage' does not contain the field 'parents'" + R"(The definition of lineage 'some_lineage' may only contain the fields 'parents' and 'aliases', it also contains invalid fields: +parent: + - anything)" ); throw; } @@ -88,67 +63,31 @@ TEST(LineageDefinitionFile, errorOnMisspelledParents) { ); } -TEST(LineageDefinitionFile, errorOnEmptyMap) { - auto throwing_lambda = []() { - LineageDefinitionFile::fromYAML(R"( +TEST(LineageDefinitionFile, noErrorOnEmptyMap) { + EXPECT_NO_THROW(LineageDefinitionFile::fromYAML(R"( some_lineage: {} some_other_lineage: parents: - - also_anything)"); - }; - - EXPECT_THROW( - { - try { - throwing_lambda(); - } catch (const silo::preprocessing::PreprocessingException& e) { - ASSERT_EQ( - std::string(e.what()), - "The lineage 'some_lineage' does not contain the field 'parents'" - ); - throw; - } - }, - silo::preprocessing::PreprocessingException - ); + - some_lineage)")); } -TEST(LineageDefinitionFile, errorOnExtraFields) { - auto throwing_lambda = []() { - LineageDefinitionFile::fromYAML(R"( +TEST(LineageDefinitionFile, noErrorOnNull) { + EXPECT_NO_THROW(LineageDefinitionFile::fromYAML(R"( some_lineage: - parents: [] - some_extra_field: "some_value" some_other_lineage: parents: - - also_anything)"); - }; - - EXPECT_THROW( - { - try { - throwing_lambda(); - } catch (const silo::preprocessing::PreprocessingException& e) { - ASSERT_EQ( - std::string(e.what()), - "The definition of lineage 'some_lineage' contains the invalid fields (only " - "'parents' is allowed): parents: []\nsome_extra_field: some_value" - ); - throw; - } - }, - silo::preprocessing::PreprocessingException - ); + - some_lineage)")); } -TEST(LineageDefinitionFile, errorOnNullLineageMap) { +TEST(LineageDefinitionFile, errorOnExtraFields) { auto throwing_lambda = []() { LineageDefinitionFile::fromYAML(R"( some_lineage: + parents: [] + some_extra_field: "some_value" some_other_lineage: parents: - - some_lineage -)"); + - also_anything)"); }; EXPECT_THROW( @@ -158,7 +97,9 @@ TEST(LineageDefinitionFile, errorOnNullLineageMap) { } catch (const silo::preprocessing::PreprocessingException& e) { ASSERT_EQ( std::string(e.what()), - "The lineage 'some_lineage' is not defined as a valid YAML Map in its definition: ~" + "The definition of lineage 'some_lineage' may only contain the fields 'parents' and " + "'aliases', it also contains invalid fields:\nparents: []\nsome_extra_field: " + "some_value" ); throw; } diff --git a/src/silo/query_engine/filter_expressions/lineage_filter.cpp b/src/silo/query_engine/filter_expressions/lineage_filter.cpp index 1b8b3e43..2e87bee6 100644 --- a/src/silo/query_engine/filter_expressions/lineage_filter.cpp +++ b/src/silo/query_engine/filter_expressions/lineage_filter.cpp @@ -55,7 +55,7 @@ std::optional LineageFilter::getBitmapForValue( if (include_sublineages) { return lineage_column.getLineageIndex()->filterIncludingSublineages(value_id); } - return lineage_column.filter(value_id); + return lineage_column.getLineageIndex()->filterExcludingSublineages(value_id); } std::unique_ptr LineageFilter::compile( diff --git a/src/silo/storage/column/indexed_string_column.cpp b/src/silo/storage/column/indexed_string_column.cpp index 7c87aedf..d41662c3 100644 --- a/src/silo/storage/column/indexed_string_column.cpp +++ b/src/silo/storage/column/indexed_string_column.cpp @@ -10,9 +10,11 @@ namespace silo::storage::column { IndexedStringColumnPartition::IndexedStringColumnPartition( - common::BidirectionalMap& lookup + common::BidirectionalMap* lookup, + std::optional* lineage_index ) - : lookup(lookup) {} + : lookup(lookup), + lineage_index(lineage_index) {} std::optional IndexedStringColumnPartition::filter(Idx value_id) const { if (indexed_values.contains(value_id)) { @@ -24,7 +26,7 @@ std::optional IndexedStringColumnPartition::filter(Idx std::optional IndexedStringColumnPartition::filter( const std::optional& value ) const { - const auto& value_id = lookup.getId(value.value_or("")); + const auto& value_id = lookup->getId(value.value_or("")); if (!value_id.has_value()) { return std::nullopt; } @@ -34,24 +36,24 @@ std::optional IndexedStringColumnPartition::filter( void IndexedStringColumnPartition::insert(const std::string& value) { const size_t row_id = value_ids.size(); - if (lineage_index.has_value()) { - const auto value_id = lookup.getId(value); + if (lineage_index->has_value()) { + const auto value_id = lookup->getId(value); if (!value_id.has_value()) { throw silo::preprocessing::PreprocessingException( fmt::format("The value '{}' is not a valid lineage value.", value) ); } - lineage_index->insert(row_id, value_id.value()); + lineage_index->value().insert(row_id, value_id.value()); } - const Idx value_id = lookup.getOrCreateId(value); + const Idx value_id = lookup->getOrCreateId(value); indexed_values[value_id].add(row_id); value_ids.push_back(value_id); } void IndexedStringColumnPartition::insertNull() { - const Idx value_id = lookup.getOrCreateId(""); + const Idx value_id = lookup->getOrCreateId(""); indexed_values[value_id].add(value_ids.size()); value_ids.push_back(value_id); @@ -66,27 +68,24 @@ const std::vector& IndexedStringColumnPartition::getValues() const { } std::optional IndexedStringColumnPartition::getValueId(const std::string& value) const { - return lookup.getId(value); + return lookup->getId(value); } const std::optional& IndexedStringColumnPartition::getLineageIndex() const { - return lineage_index; + return *lineage_index; } IndexedStringColumn::IndexedStringColumn() { - lookup = std::make_unique>(); + lookup = common::BidirectionalMap(); } -IndexedStringColumnPartition& IndexedStringColumn::createPartition() { - return partitions.emplace_back(*lookup); +IndexedStringColumn::IndexedStringColumn(const common::LineageTreeAndIdMap& lineage_tree) { + lookup = lineage_tree.lineage_id_lookup_map.copy(); + lineage_index = LineageIndex(lineage_tree.lineage_tree, lineage_tree.alias_mapping); } -void IndexedStringColumn::generateLineageIndex(const common::LineageTreeAndIdMap& lineage_tree) { - *lookup = lineage_tree.lineage_id_lookup_map.copy(); - for (auto& partition : partitions) { - std::reference_wrapper(partition.lookup) = *lookup; - partition.lineage_index = LineageIndex(lineage_tree.lineage_tree); - } +IndexedStringColumnPartition& IndexedStringColumn::createPartition() { + return partitions.emplace_back(&lookup, &lineage_index); } } // namespace silo::storage::column diff --git a/src/silo/storage/column/indexed_string_column.test.cpp b/src/silo/storage/column/indexed_string_column.test.cpp index 999f28e1..41e955ac 100644 --- a/src/silo/storage/column/indexed_string_column.test.cpp +++ b/src/silo/storage/column/indexed_string_column.test.cpp @@ -8,7 +8,7 @@ using silo::storage::column::IndexedStringColumnPartition; TEST(IndexedStringColumn, shouldReturnTheCorrectFilteredValues) { silo::common::BidirectionalMap lookup; - IndexedStringColumnPartition under_test(lookup); + IndexedStringColumnPartition under_test(&lookup); under_test.insert("value 1"); under_test.insert("value 2"); @@ -28,7 +28,7 @@ TEST(IndexedStringColumn, shouldReturnTheCorrectFilteredValues) { TEST(IndexedStringColumnPartition, insertValuesToPartition) { silo::common::BidirectionalMap lookup; - IndexedStringColumnPartition under_test(lookup); + IndexedStringColumnPartition under_test(&lookup); under_test.insert("value 1"); under_test.insert("value 2"); diff --git a/src/silo/storage/lineage_index.cpp b/src/silo/storage/lineage_index.cpp index 8ee28bc8..06109daa 100644 --- a/src/silo/storage/lineage_index.cpp +++ b/src/silo/storage/lineage_index.cpp @@ -1,22 +1,48 @@ #include "silo/storage/lineage_index.h" +#include + namespace silo::storage { -LineageIndex::LineageIndex(common::LineageTree lineage_tree) - : lineage_tree(std::move(lineage_tree)) {} +LineageIndex::LineageIndex( + common::LineageTree lineage_tree, + std::unordered_map alias_mapping +) + : lineage_tree(std::move(lineage_tree)), + alias_mapping(std::move(alias_mapping)) {} -void LineageIndex::insert(size_t row_id, Idx value) { - index[value].add(row_id); - std::optional current = value; +void LineageIndex::insert(size_t row_id, Idx value_id) { + if (alias_mapping.contains(value_id)) { + value_id = alias_mapping.at(value_id); + } + index_excluding_sublineages[value_id].add(row_id); + index_including_sublineages[value_id].add(row_id); + std::optional current = value_id; while ((current = lineage_tree.getParent(current.value()))) { - index[current.value()].add(row_id); + index_including_sublineages[current.value()].add(row_id); } } std::optional LineageIndex::filterIncludingSublineages(Idx value_id ) const { - if (index.contains(value_id)) { - return &index.at(value_id); + if (alias_mapping.contains(value_id)) { + value_id = alias_mapping.at(value_id); + } + if (index_including_sublineages.contains(value_id)) { + SPDLOG_ERROR("Present in index: {}", value_id); + return &index_including_sublineages.at(value_id); + } + SPDLOG_ERROR("Not present in index: {}", value_id); + return std::nullopt; +} + +std::optional LineageIndex::filterExcludingSublineages(Idx value_id +) const { + if (alias_mapping.contains(value_id)) { + value_id = alias_mapping.at(value_id); + } + if (index_excluding_sublineages.contains(value_id)) { + return &index_excluding_sublineages.at(value_id); } return std::nullopt; } diff --git a/src/silo/test/lineage_filter.test.cpp b/src/silo/test/lineage_filter.test.cpp index 06eb6ec9..0c335bd9 100644 --- a/src/silo/test/lineage_filter.test.cpp +++ b/src/silo/test/lineage_filter.test.cpp @@ -132,4 +132,4 @@ QUERY_TEST( LINEAGE_FILTER_NULL_SCENARIO, LINEAGE_FILTER_NULL_INCLUDING_SUBLINEAGES_SCENARIO ) -); +)