From 751f023d889f42fd931a913d686c1c1d47eaa4fc Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Tue, 7 Mar 2023 15:35:15 +0000 Subject: [PATCH] Fix a couple small VCF auto-indexing bugs. 1. sam_idx_save wasn't validating the file is BGZF. It's invalid usage to try calling this function on uncompressed data, but we should double check. Note this is triggered by a bcftools bug where -o foo.vcf.gz##idx##foo.vcf.gz.csi writes VCF rather than VCF.gz as the "filename" doesn't end in .gz. 2. Add the hts_idx_amend_last calls to vcf_write as we did previously for SAM/BAM. This isn't technically a requirement, as all it's doing is changing virtual offsets to an alternate form that gives the same file offset (see comments above hts_idx_amend_last), but doing so means the auto-build indices match those produced by a standalone index command. This fix isn't complete as it hasn't been worked on for BCF yet. However it comes under the "nicety" category and isn't really fixing a bug so we can try to figure out how to tidy up BCF later (plus VCF.gz is basically the universal format). --- sam.c | 2 +- vcf.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sam.c b/sam.c index e2e539b2d..24027a351 100644 --- a/sam.c +++ b/sam.c @@ -1067,7 +1067,7 @@ int sam_idx_save(htsFile *fp) { errno = -ret; return -1; } - if (bgzf_flush(fp->fp.bgzf) < 0) + if (!fp->is_bgzf || bgzf_flush(fp->fp.bgzf) < 0) return -1; hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); diff --git a/vcf.c b/vcf.c index 59d433c19..8ab8b2815 100644 --- a/vcf.c +++ b/vcf.c @@ -3570,6 +3570,8 @@ int vcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v) if ( fp->format.compression!=no_compression ) { if (bgzf_flush_try(fp->fp.bgzf, fp->line.l) < 0) return -1; + if (fp->idx) + hts_idx_amend_last(fp->idx, bgzf_tell(fp->fp.bgzf)); ret = bgzf_write(fp->fp.bgzf, fp->line.s, fp->line.l); } else { ret = hwrite(fp->fp.hfile, fp->line.s, fp->line.l);