Skip to content

Commit

Permalink
improve safety
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Apr 7, 2024
1 parent cc945fc commit c2534ae
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 67 deletions.
4 changes: 2 additions & 2 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ HTSLIB = /usr/local/lib
# detect OS architecture and add flags
CXX = g++
CXXFLAGS = -std=c++11 -Wall -fsanitize=address
# CXXFLAGS = -std=c++11 -Wall
# CXXFLAGS = -std=c++11 -Wall -g
LDFLAGS = -L$(HTSLIB) -Wl,-rpath,$(HTSLIB)
LIBS = -lhts -lz -lm -lbz2 -llzma -lcurl -lpthread
LIBS = -lhts
INC = -I. -I.. -I${HTSINC}

OBJS = test-main.o bcf-variant.o bcf-reader.o bcf-writer.o example.o
Expand Down
57 changes: 27 additions & 30 deletions test/bcf-reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,26 +64,6 @@ TEST_CASE("parse vcf with missing genotypes diploid - vector<int>", "[bcf-reader
REQUIRE(n == 1);
}

TEST_CASE("parse vcf with multialleles - vector<int>", "[bcf-reader]")
{
string vcffile{"test-multialleles.vcf.gz"};
BcfWriter bw(vcffile, "VCF4.2");
bw.header.addContig("chr20");
bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)");
bw.header.addFORMAT("GT", "1", "String", "Genotype");
for(auto & s : {"id01", "id02", "id03"}) bw.header.addSample(s); // add 3 samples
bw.writeLine("chr20\t2006060\trs146931526\tG\tA,T\t100\tPASS\tAF=0.02\tGT\t1|2\t1|1\t0|2");
bw.close();
BcfReader br("test-multialleles.vcf.gz");
BcfRecord var(br.header);
vector<int> gt;
while(br.getNextVariant(var))
{
var.getGenotypes(gt);
for(auto g : gt) cout << g << endl;
}
}

TEST_CASE("parse PL in vcf - vector<int>", "[bcf-reader]")
{
string vcffile{"test-GL.vcf.gz"};
Expand All @@ -98,11 +78,9 @@ TEST_CASE("parse PL in vcf - vector<int>", "[bcf-reader]")
BcfReader br(vcffile);
BcfRecord var(br.header);
vector<int> pl;
while(br.getNextVariant(var))
{
var.getFORMAT("PL",pl);
for(auto g : pl) cout << g << endl;
}
REQUIRE(br.getNextVariant(var)==true);
var.getFORMAT("PL",pl);
for(auto g : pl) cout << g << endl;
}

TEST_CASE("parse GL in vcf - vector<float>", "[bcf-reader]")
Expand All @@ -119,9 +97,28 @@ TEST_CASE("parse GL in vcf - vector<float>", "[bcf-reader]")
BcfReader br(vcffile);
BcfRecord var(br.header);
vector<float> gl;
while(br.getNextVariant(var))
{
var.getFORMAT("GL",gl);
for(auto g : gl) cout << g << endl;
}
REQUIRE(br.getNextVariant(var)==true);
var.getFORMAT("GL",gl);
for(auto g : gl) cout << g << endl;
}

TEST_CASE("parse vcf with multialleles - vector<int>", "[bcf-reader]")
{
string vcffile{"test-multialleles.vcf.gz"};
BcfWriter bw(vcffile, "VCF4.2");
bw.header.addContig("chr20");
bw.header.addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)");
bw.header.addFORMAT("GT", "1", "String", "Genotype");
for(auto & s : {"id01", "id02", "id03"}) bw.header.addSample(s); // add 3 samples
string l1 = "chr20\t2006060\trs146931526\tG\tA,T\t100\tPASS\tAF=0.02\tGT\t1|2\t1|1\t0|2";
bw.writeLine(l1);
bw.close();
BcfReader br("test-multialleles.vcf.gz");
BcfRecord var(br.header);
vector<int> gt;
REQUIRE(br.getNextVariant(var)==true);
auto l2 = var.asString();
REQUIRE(l2 == l1 + "\n");
var.getGenotypes(gt);
for(auto g : gt) cout << g << endl;
}
18 changes: 8 additions & 10 deletions test/bcf-variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ TEST_CASE("set genotypes without missing ", "[bcf-variant]")
BcfRecord v(br.header);
vector<int> gt;
vector<int> gi{1, 1};
if(br.getNextVariant(v))
while(br.getNextVariant(v))
{
v.getGenotypes(gt);
for(auto g : gt) cerr << g << endl;
v.setGenotypes(gi);
v.getGenotypes(gt);
for(auto g : gt) cerr << g << endl;
for(size_t i = 0; i < gt.size(); i++) REQUIRE(gt[i] == gi[i]);
}
}

Expand All @@ -25,18 +24,17 @@ TEST_CASE("set genotypes with missing ", "[bcf-variant]")
BcfReader br("test-vcf-read.vcf.gz");
BcfRecord v(br.header);
vector<int> gt;
vector<int> gi{1, -9};
vector<int> g2{-2147483648, -9};
br.getNextVariant(v);
if(br.getNextVariant(v))
vector<int> gi{1, -9}; // -9 as missing
vector<int> g2{-2147483648, -9}; // also, bcf_int32_missing (R_NA) as missing
// br.getNextVariant(v);
while(br.getNextVariant(v))
{
v.getGenotypes(gt);
for(auto g : gt) cerr << g << endl;
v.setGenotypes(gi);
v.getGenotypes(gt);
for(auto g : gt) cerr << g << endl;
for(size_t i = 0; i < gt.size(); i++) REQUIRE(gt[i] == gi[i]);
v.setGenotypes(g2);
v.getGenotypes(gt);
for(auto g : gt) cerr << g << endl;
for(size_t i = 0; i < gt.size(); i++) REQUIRE(gt[i] == -9);
}
}
69 changes: 44 additions & 25 deletions vcfpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ struct variant_close
}
};


// deleter for bcf header
struct bcf_hdr_close
{
Expand All @@ -166,7 +165,7 @@ struct bcf_hdr_close
if(x) bcf_hdr_destroy(x);
}
};

/**
* @class BcfHeader
* @brief Object represents header of the VCF, offering methods to access and modify the tags
Expand Down Expand Up @@ -394,9 +393,9 @@ class BcfRecord
friend class BcfWriter;

private:
BcfHeader* header;
BcfHeader * header;
std::shared_ptr<bcf1_t> line = std::shared_ptr<bcf1_t>(bcf_init(), variant_close()); // variant
bcf_hdr_t * hdr_d; // a dup header by bcf_hdr_dup(header->hdr)
bcf_hdr_t * hdr_d = NULL; // a dup header by bcf_hdr_dup(header->hdr)
bcf_fmt_t * fmt = NULL;
bcf_info_t * info = NULL;
int32_t * gts = NULL;
Expand All @@ -420,7 +419,11 @@ class BcfRecord
init(h);
}

~BcfRecord() {}
~BcfRecord()
{
if(gts) free(gts);
if(hdr_d) bcf_hdr_destroy(hdr_d);
}

/// initilize a BcfRecord object by pointing to another BcfHeader object
void init(BcfHeader & h)
Expand Down Expand Up @@ -601,6 +604,7 @@ class BcfRecord
}
else
{
free(dst);
throw std::runtime_error("failed to parse the " + tag + " format of this variant.\n");
}
}
Expand Down Expand Up @@ -634,6 +638,7 @@ class BcfRecord
}
else
{
free(dst);
throw std::runtime_error("couldn't parse the " + tag + " format of this variant.\n");
}
}
Expand Down Expand Up @@ -661,10 +666,15 @@ class BcfRecord
ret = bcf_get_info_float(header->hdr, line.get(), tag.c_str(), &dst, &ndst);
}
if(ret >= 0)
{
v = std::vector<S>(dst, dst + ret); // user have to check if there is missing in the return v;
free(dst);
}
else
{
free(dst);
throw std::runtime_error("couldn't parse the " + tag + " format of this variant.\n");
free(dst);
}
return true;
}

Expand Down Expand Up @@ -798,22 +808,26 @@ class BcfRecord
// bcf_gt_type
int i, j, k;
nploidy = v.size() / nsamples;
gts = (int32_t *)malloc(v.size() * sizeof(int32_t));
int32_t * gt = (int32_t *)malloc(v.size() * sizeof(int32_t));
for(i = 0; i < nsamples; i++)
{
for(j = 0; j < nploidy; j++)
{
k = i * nploidy + j;
if(v[k] == -9 || v[k] == bcf_int32_missing)
gts[k] = bcf_gt_missing;
gt[k] = bcf_gt_missing;
else if(gtPhase[i])
gts[k] = bcf_gt_phased(v[k]);
gt[k] = bcf_gt_phased(v[k]);
else
gts[k] = bcf_gt_unphased(v[k]);
gt[k] = bcf_gt_unphased(v[k]);
}
}
if(bcf_update_genotypes(header->hdr, line.get(), gts, v.size()) < 0)
if(bcf_update_genotypes(header->hdr, line.get(), gt, v.size()) < 0)
{
free(gt);
throw std::runtime_error("couldn't set genotypes correctly.\n");
}
free(gt);
return true;
}

Expand Down Expand Up @@ -1276,7 +1290,7 @@ class BcfReader
std::shared_ptr<htsFile> fp; // hts file
hts_idx_t * hidx = NULL; // hts index file
tbx_t * tidx = NULL; // .tbi .csi index file for vcf files
hts_itr_t * itr = NULL; // hts records iterator
hts_itr_t * itr = NULL; // tabix iterator
kstring_t s = {0, 0, NULL}; // kstring
std::string fname;
bool isBcf; // if the input file is bcf or vcf;
Expand Down Expand Up @@ -1330,10 +1344,19 @@ class BcfReader
if(!samples.empty()) setSamples(samples);
}

/// return a BcfHeader object
const BcfHeader & getHeader() const
~BcfReader()
{
return header;
close();
}

/// close the BcfReader object.
void close()
{
if(fp) fp.reset();
if(itr) hts_itr_destroy(itr);
if(hidx) hts_idx_destroy(hidx);
if(tidx) tbx_destroy(tidx);
if(s.s) free(s.s);
}

/// open a VCF/BCF/STDIN file for streaming in
Expand All @@ -1355,22 +1378,18 @@ class BcfReader
return false;
}

/// close the BcfReader object.
void close()
{
free(s.s);
if(fp) fp.reset();
if(itr) hts_itr_destroy(itr);
}

~BcfReader() {}

/** @brief set the number of threads to use */
inline int setThreads(int n)
{
return hts_set_threads(fp.get(), n);
}

/// return a BcfHeader object
const BcfHeader & getHeader() const
{
return header;
}

/** @brief get the number of records of given region */
uint64_t getVariantsCount(BcfRecord & r, const std::string & region)
{
Expand Down

0 comments on commit c2534ae

Please sign in to comment.