Skip to content

Commit

Permalink
can modify single sample
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Aug 8, 2024
1 parent f331d32 commit a705f94
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 13 deletions.
42 changes: 31 additions & 11 deletions src/vcf-reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class vcfreader {
if (!samples.empty()) br.setSamples(samples);
if (!region.empty()) br.setRegion(region);
var.initHeader(br.header);
samples_in = samples;
}

~vcfreader() {}
Expand Down Expand Up @@ -275,15 +276,28 @@ class vcfreader {
// WRITE
inline void output(const std::string& vcffile) {
bw.open(vcffile);
bw.initalHeader(br.header);
writable = true;
}
inline void modify() {
bw.copyHeader(fin);
if (!samples_in.empty()) bw.header.setSamples(samples_in);
var.resetHeader(bw.header);
writable = true;
modifiable = true;
}
inline void write() {
if (writable) bw.writeRecord(var);
if (writable) {
bw.writeRecord(var);
} else {
Rcpp::Rcout << "please call the `output()` function first to creat an output VCF\n";
}
}
inline void close() {
if (writable) bw.close();
if (writable) {
bw.close();
} else {
Rcpp::Rcout << "please call the `output()` function first to creat an output VCF\n";
}
}

inline void setCHR(std::string s) { var.setCHR(s.c_str()); }
Expand Down Expand Up @@ -324,21 +338,27 @@ class vcfreader {
inline void rmFormatTag(std::string s) { var.removeFORMAT(s); }
inline void addINFO(const std::string& id, const std::string& number, const std::string& type,
const std::string& desc) {
if (writable)
bw.header.addINFO(id, number, type, desc);
else
Rcpp::Rcout << "please call the `output(filename)` function first\n";
if (!writable) {
Rcpp::Rcout << "please call the `output()` function first to creat an output VCF\n";
return;
}
if (!modifiable) { modify(); }
bw.header.addINFO(id, number, type, desc);
}
inline void addFORMAT(const std::string& id, const std::string& number, const std::string& type,
const std::string& desc) {
if (writable)
bw.header.addFORMAT(id, number, type, desc);
else
Rcpp::Rcout << "please call the `output(filename)` function first\n";
if (!writable) {
Rcpp::Rcout << "please call the `output()` function first to creat an output VCF\n";
return;
}
if (!modifiable) { modify(); }
bw.header.addFORMAT(id, number, type, desc);
}

private:
bool modifiable = false;
bool writable = false;
std::string samples_in = "";
const std::string fin;
vcfpp::BcfReader br;
vcfpp::BcfRecord var;
Expand Down
56 changes: 54 additions & 2 deletions tests/testthat/test-modify-vcf.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
library(testthat)

test_that("modify the genotypes", {

## skip_on_os(c("windows"), arch = NULL)
outvcf <- paste0(tempfile(), ".vcf.gz")
bw <- vcfwriter$new(outvcf, "VCF4.3")
Expand Down Expand Up @@ -29,9 +30,11 @@ test_that("modify the genotypes", {
br$variant() ## get a variant record
g3 <- br$genotypes(F)
expect_identical(g0, g3)

})

test_that("modify item in FORMAT", {
test_that("modify item in FORMAT for all samples", {

## skip_on_os(c("windows"), arch = NULL)
## creat a VCF with GP in FORMAT
outvcf <- paste0(tempfile(), ".vcf.gz")
Expand All @@ -44,9 +47,10 @@ test_that("modify item in FORMAT", {
s1 <- "chr20\t2006060\trs146931526\tG\tC\t100\tPASS\tAF=0.000998403\tGP\t0.966,0.034,0\t0.003,0.872,0.125"
bw$writeline(s1)
bw$close()

## tests
br <- vcfreader$new(outvcf)
expect_true(br$variant()) ## get a variant record
br$variant() ## get a variant record
br$string()
gp <- br$formatFloat("GP")
gp <- array(gp, c(3, br$nsamples()))
Expand All @@ -71,4 +75,52 @@ test_that("modify item in FORMAT", {
expect_false(br$setFormatStr("STR","HHH,JJJ")) ## length(s) %% nsamples != 0
expect_true(br$setFormatStr("STR","HHHJJJ")) ## length(s) %% nsamples == 0
## print(br$string())

})


test_that("modify item in FORMAT for specific sample", {

## skip_on_os(c("windows"), arch = NULL)
## creat a VCF with GP in FORMAT
outvcf <- paste0(tempfile(), ".vcf.gz")
bw <- vcfwriter$new(outvcf, "VCF4.3")
bw$addContig("chr20")
bw$addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)");
bw$addFORMAT("GP", "3", "Float", "Posterior genotype probability of 0/0, 0/1, and 1/1");
bw$addSample("NA12878")
bw$addSample("NA12879")
s1 <- "chr20\t2006060\trs146931526\tG\tC\t100\tPASS\tAF=0.000998403\tGP\t0.966,0.034,0\t0.003,0.872,0.125"
bw$writeline(s1)
bw$close()

## tests
br <- vcfreader$new(outvcf, region = "", samples = "NA12878")
br$variant() ## get a variant record
br$string()
br$samples()
gp <- br$formatFloat("GP")
gp <- array(gp, c(3, br$nsamples()))
ds <- gp[2,] + gp[3,] * 2
## now open another file for output
newvcf <- paste0(tempfile(), ".vcf.gz")
br$output(newvcf)
## add INFO, DS in header first
br$addINFO("INFO", "1", "Float", "INFO score of imputation")
br$addFORMAT("DS", "1", "Float", "Diploid dosage")
br$addFORMAT("AC", "1", "Integer", "Allele counts")
br$addFORMAT("STR", "1", "String", "Test String type")
## print(br$header())
## set DS in FORMAT now
br$setFormatFloat("DS", ds[1])

## test if DS presents
expect_identical(br$formatFloat("DS"), ds[1])
br$string()

br$write()
br$close()
vcf <- vcftable(newvcf, format = "DS")
expect_true(vcf$DS==ds[1])

})
16 changes: 16 additions & 0 deletions tests/testthat/test-vcf-reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,19 @@ test_that("vcfreader: remove tag from FORMAT", {
expect_error(br$formatInt("AD"))
})


test_that("can set genotypes for single sample", {

br <- vcfreader$new(svfile, "", "HG00096")
br$variant()
br$genotypes(F)
br$setGenotypes(c(1L,1L))
outfile <- paste0(tempfile(), ".vcf.gz")
br$output(outfile)
br$write()
br$close()

vcf <- vcftable(outfile)
expect_true(vcf$gt==2)

})

0 comments on commit a705f94

Please sign in to comment.