Skip to content

Commit

Permalink
GA4GHTT-276: v4.4 test file update and minor fixes (#255)
Browse files Browse the repository at this point in the history
* minor fixes and updates

* test files for v4.4 changes
  • Loading branch information
vasudeva8 authored Sep 12, 2024
1 parent 1ccb5a1 commit a4eea2e
Show file tree
Hide file tree
Showing 32 changed files with 571 additions and 5 deletions.
5 changes: 2 additions & 3 deletions src/vcf/record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ namespace ebi
return;
}
if (util::contains(format, CICN) && !util::contains(format, CN)) {
throw new FormatBodyError{line, "Format field CICN must be used only with CN field."};
throw new FormatBodyError{line, "Format field CICN must be used only with CN field"};
}
}

Expand Down Expand Up @@ -1196,7 +1196,7 @@ namespace ebi
if (svlen_val != values[i]) {
//must be same as earlier value
std::stringstream message;
message << "INFO " << SVLEN << " must be same for all CNV, DEL, DUP alleles.";
message << "INFO " << SVLEN << " must be same for all CNV, DEL, DUP alleles";
throw new InfoBodyError{line, message.str()};
}
}
Expand Down Expand Up @@ -1238,7 +1238,6 @@ namespace ebi
//infer phasing based on other alleles phasing
allele->insert(0, anyphased ? "|" : "/");
}
//alleles.insert(alleles.begin(), values.begin(), values.end());
alleles.swap(values);
}
}
Expand Down
5 changes: 3 additions & 2 deletions src/vcf/validate_optional_policy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,8 @@ namespace ebi
void ValidateOptionalPolicy::check_body_entry_info_rb_ruc(ParsingState & state, Record const & record) const
{
std::vector<std::string> valRB, valRUC, valLen;
int rb = 0, ruc = 0 , rul = 0;
int rb = 0, rul = 0;
float ruc = 0;
const float limit = 0.05; //5% variation

if (record.source->version < Version::v44) {
Expand Down Expand Up @@ -318,7 +319,7 @@ namespace ebi
continue;
}
rb = std::stoi(valRB[i]);
ruc = std::stoi(valRUC[i]);
ruc = std::stod(valRUC[i]);
rul = itRUL != record.info.end()? std::stoi(valLen[i]) : valLen[i].size();
//RB ~= RUL * RUC
if ( (abs(rb - rul * ruc) / (float)rb) > limit) {
Expand Down
25 changes: 25 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN-RUS-RUL
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=3 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TG,TA GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=2 GT 0|0
1 100 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=1;RUS=T,A GT 0|0
1 110 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=1;RUL=2;RUS=T,A GT 0|0
1 120 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=1;RUL=2;RUS=. GT 0|0
39 changes: 39 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN - RUC, RB, CIRUC, CIRB
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=CIRUC,Number=.,Type=Float,Description="Confidence interval around RUC">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2,2 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=2,10;RUC=3 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=2,10;RB=3 GT 0|0
1 100 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=1 GT 0|0
1 110 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=1,. GT 0|0
1 120 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=.,-1 GT 0|0
1 130 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=1,-1 GT 0|0
1 140 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=-1,-1 GT 0|0
1 150 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=-8,8,9 GT 0|0
1 160 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;RB=.,20;CIRB=-8,8,-9,0 GT 0|0
1 170 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;CIRB=-8,8 GT 0|0
1 200 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=1 GT 0|0
1 210 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=1,. GT 0|0
1 220 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=.,-1 GT 0|0
1 230 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=1,-1 GT 0|0
1 240 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=-1,-1 GT 0|0
1 250 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3;CIRUC=-8,8,9 GT 0|0
1 260 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;RUC=.,2;CIRUC=-8,8,-9,0 GT 0|0
1 270 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;CIRUC=-8,8,-9,0 GT 0|0
23 changes: 23 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN - RUC, RB; warning: RB ~= RUC * RUL; error to make this fail: RUC not integer with RUB
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=CIRUC,Number=.,Type=Float,Description="Confidence interval around RUC">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2.3;RB=5 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUL=2;RUC=2.3;RB=5 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUL=2;RUC=2.3;RB=5;RUB=10 GT 0|0
24 changes: 24 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_4.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN - RUB, RUC
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=CIRUC,Number=.,Type=Float,Description="Confidence interval around RUC">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2.3;RUB=2 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUS=TT,AA;RUC=.,2;RUB=2,2 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2;RUB=2 GT 0|0
1 100 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUB=2 GT 0|0
13 changes: 13 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=INS>
##FILTER=<ID=F1,Description="Filter1">
##FILTER=<ID=F2>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <DAL:TRE> 100 PASS SVLEN=1;SVCLAIM=D GT 0|0
13 changes: 13 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - SVLEN not present
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=INS>
##FILTER=<ID=F1,Description="Filter1">
##FILTER=<ID=F2>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <DEL> 100 PASS SVCLAIM=D GT 0|0
17 changes: 17 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - SVCLAIM not present for DEL/DUP
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=INS>
##FILTER=<ID=F1,Description="Filter1">
##FILTER=<ID=F2>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <INV> 100 PASS SVLEN=10 GT 0|0
1 700 rs180734498 C <DEL> 100 PASS SVLEN=10 GT 0|0
1 700 rs180734498 C <DUP> 100 PASS SVLEN=10 GT 0|0
21 changes: 21 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_4.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - CNV:TR must have RUS/RUL
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT GT 0|0
1 800 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3 GT 0|0
1 900 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D GT 0|0
22 changes: 22 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_5.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - CNV:TR - RB ~= RUL * RUC
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT;RUC=30;RB=100 GT 0|0
1 800 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3;RUC=30;RB=100 GT 0|0
1 900 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT;RUL=2 GT 0|0
1 1000 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT;RUL=3 GT 0|0
10 changes: 10 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_6.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - BND
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=BND:Test,Description="Breakend SV acceptable in v4.3">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <BND:Test> 100 PASS CN=100;SVLEN=1 GT 0|0

21 changes: 21 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_format_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid format fields, with CICN, format CN must be present
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT GT:CN 0|0:2
1 800 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3 GT:CN:CICN 0|0:2:-1,1
1 900 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3 GT:CICN 0|0:-1,2
17 changes: 17 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_format_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid format fields, SVLEN must be same for DEL/DUP/CNV when format CN is present
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##ALT=<ID=CNV,Description="CNV">
##ALT=<ID=DEL,Description="DEL">
##ALT=<ID=DUP,Description="DUP">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <DEL>,T,<DUP> 100 PASS SVLEN=1,.,2;SVCLAIM=D,.,D GT:CN 0|0:2
1 800 rs180734498 C <CNV>,T,<DEL> 100 PASS SVLEN=2,.,1;SVCLAIM=D,.,DJ GT:CN:CICN 0|0:2:-1,1
1 900 rs180734498 C <CNV>,T,<DUP> 100 PASS SVLEN=1,.,21;SVCLAIM=D,.,J GT:CN:CICN 0|0:2:-1,2
Loading

0 comments on commit a4eea2e

Please sign in to comment.