diff --git a/cram/cram_encode.c b/cram/cram_encode.c index 5d22db54de..3cad124ce1 100644 --- a/cram/cram_encode.c +++ b/cram/cram_encode.c @@ -1761,7 +1761,6 @@ static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) { c->ref_start = ref_start+1; c->ref_end = ref_end+1; c->ref_free = 1; - return 0; err: @@ -1997,6 +1996,9 @@ int cram_encode_container(cram_fd *fd, cram_container *c) { fd->no_ref_counter -= (fd->no_ref_counter > 0); pthread_mutex_unlock(&fd->ref_lock); } + + if (c->ref_end > fd->refs->ref_id[c->ref_id]->LN_length) + c->ref_end = fd->refs->ref_id[c->ref_id]->LN_length; } // Iterate through records creating the cram blocks for some diff --git a/cram/cram_io.c b/cram/cram_io.c index 7f7ffca49e..65d906fb54 100644 --- a/cram/cram_io.c +++ b/cram/cram_io.c @@ -2734,7 +2734,7 @@ int refs2id(refs_t *r, sam_hdr_t *hdr) { * Returns 0 on success * -1 on failure */ -static int refs_from_header(cram_fd *fd) { + int refs_from_header(cram_fd *fd) { if (!fd) return -1; @@ -2787,10 +2787,12 @@ static int refs_from_header(cram_fd *fd) { /* Initialise likely filename if known */ if ((ty = sam_hrecs_find_type_id(h->hrecs, "SQ", "SN", h->hrecs->ref[i].name))) { - if ((tag = sam_hrecs_find_key(ty, "M5", NULL))) { + if ((tag = sam_hrecs_find_key(ty, "M5", NULL))) r->ref_id[j]->fn = string_dup(r->pool, tag->str+3); - //fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn); - } + + if ((tag = sam_hrecs_find_key(ty, "LN", NULL))) + // LN tag used when constructing consensus reference + r->ref_id[j]->LN_length = strtoll(tag->str+3, NULL, 0); } k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n); diff --git a/cram/cram_structs.h b/cram/cram_structs.h index 9540b56180..49c2e81d55 100644 --- a/cram/cram_structs.h +++ b/cram/cram_structs.h @@ -671,7 +671,8 @@ struct cram_slice { typedef struct ref_entry { char *name; char *fn; - int64_t length; + int64_t length; // if 0 this indicates we haven't loaded it yet + int64_t LN_length; // @SQ LN length, used to trim consensus ref int64_t offset; int bases_per_line; int line_length;