Skip to content

Commit e881104

Browse files
committed
r1273: reading pass1 junctions works
but we need extra logic when using them. Tomorrow.
1 parent fb81e15 commit e881104

File tree

2 files changed

+38
-27
lines changed

2 files changed

+38
-27
lines changed

index.c

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,33 @@ KRADIX_SORT_INIT(jj, mm_idx_jjump1_t, sort_key_jj, 4)
835835
#define sort_key_jj2(a) ((a).off2)
836836
KRADIX_SORT_INIT(jj2, mm_idx_jjump1_t, sort_key_jj2, 4)
837837

838+
static void sort_jjump(mm_idx_jjump_t *jj2)
839+
{
840+
int32_t j0, j, k;
841+
if (jj2 == 0 || jj2->n == 0) return;
842+
radix_sort_jj(jj2->a, jj2->a + jj2->n);
843+
for (j0 = 0, j = 1; j <= jj2->n; ++j) {
844+
if (j == jj2->n || jj2->a[j0].off != jj2->a[j].off) {
845+
radix_sort_jj2(jj2->a + j0, jj2->a + j);
846+
j0 = j;
847+
}
848+
}
849+
// the actual merge
850+
for (j0 = 0, j = 1, k = 0; j <= jj2->n; ++j) {
851+
if (j == jj2->n || jj2->a[j0].off != jj2->a[j].off || jj2->a[j0].off2 != jj2->a[j].off2) {
852+
int32_t t, cnt = 0;
853+
uint16_t flag = 0;
854+
for (t = j0; t < j; ++t) cnt += jj2->a[t].cnt, flag |= jj2->a[t].flag;
855+
jj2->a[k] = jj2->a[j0];
856+
jj2->a[k].cnt = cnt;
857+
jj2->a[k++].flag = flag;
858+
j0 = j;
859+
}
860+
}
861+
jj2->n = k;
862+
jj2->a = REALLOC(mm_idx_jjump1_t, jj2->a, k);
863+
}
864+
838865
static mm_idx_jjump_t *mm_idx_bed2jjump(const mm_idx_t *mi, const mm_idx_intv_t *I, uint16_t flag)
839866
{
840867
int32_t i;
@@ -850,7 +877,7 @@ static mm_idx_jjump_t *mm_idx_bed2jjump(const mm_idx_t *mi, const mm_idx_intv_t
850877
jj->a[k].off = intv->a[j].st, jj->a[k].off2 = intv->a[j].en, jj->a[k].cnt = intv->a[j].cnt, jj->a[k].strand = intv->a[j].strand, jj->a[k++].flag = flag;
851878
jj->a[k].off = intv->a[j].en, jj->a[k].off2 = intv->a[j].st, jj->a[k].cnt = intv->a[j].cnt, jj->a[k].strand = intv->a[j].strand, jj->a[k++].flag = flag;
852879
}
853-
radix_sort_jj(jj->a, jj->a + jj->n);
880+
sort_jjump(jj);
854881
}
855882
return J;
856883
}
@@ -861,41 +888,21 @@ static mm_idx_jjump_t *mm_idx_jjump_merge(const mm_idx_t *mi, const mm_idx_jjump
861888
mm_idx_jjump_t *J2;
862889
J2 = CALLOC(mm_idx_jjump_t, mi->n_seq);
863890
for (i = 0; i < mi->n_seq; ++i) {
864-
int32_t j, j0, k;
891+
int32_t j, k;
865892
const mm_idx_jjump_t *jj0 = &J0[i], *jj1 = &J1[i];
866893
mm_idx_jjump_t *jj2 = &J2[i];
867-
// merge jj0 and jj1 into jj2; faster with sorted merge but the performance difference should be negligible
868894
jj2->n = jj0->n + jj1->n;
869895
jj2->a = CALLOC(mm_idx_jjump1_t, jj2->n);
870896
for (j = k = 0; j < jj0->n; ++j) jj2->a[k++] = jj0->a[j];
871-
for (j = k = 0; j < jj1->n; ++j) jj2->a[k++] = jj1->a[j];
872-
radix_sort_jj(jj2->a, jj2->a + jj2->n); // sort by a[].off
873-
// sort by a[].off and then by a[].off2 such that they can be merged later
874-
for (j0 = 0, j = 1; j <= jj2->n; ++j) {
875-
if (j == jj2->n || jj2->a[j0].off != jj2->a[j].off) {
876-
radix_sort_jj2(jj2->a + j0, jj2->a + j);
877-
j0 = j;
878-
}
879-
}
880-
// the actual merge
881-
for (j0 = 0, j = 1, k = 0; j <= jj2->n; ++j) {
882-
if (j == jj2->n || jj2->a[j0].off != jj2->a[j].off || jj2->a[j0].off2 != jj2->a[j].off2) {
883-
int32_t t, cnt = 0;
884-
uint16_t flag = 0;
885-
for (t = j0; t < j; ++t) cnt += jj2->a[t].cnt, flag |= jj2->a[t].flag;
886-
jj2->a[k] = jj2->a[j0];
887-
jj2->a[k].cnt = cnt;
888-
jj2->a[k++].flag = flag;
889-
j0 = j;
890-
}
891-
}
897+
for (j = 0; j < jj1->n; ++j) jj2->a[k++] = jj1->a[j];
898+
sort_jjump(jj2);
892899
}
893900
return J2;
894901
}
895902

896903
int mm_idx_jjump_read(mm_idx_t *mi, const char *fn, int flag, int min_sc)
897904
{
898-
int32_t i;
905+
int32_t i, n = 0;
899906
mm_idx_intv_t *I;
900907
mm_idx_jjump_t *J;
901908
if (mi->h == 0) mm_idx_index_name(mi);
@@ -905,13 +912,17 @@ int mm_idx_jjump_read(mm_idx_t *mi, const char *fn, int flag, int min_sc)
905912
free(I);
906913
if (mi->J) {
907914
mm_idx_jjump_t *J2;
908-
J2 = mm_idx_jjump_merge(mi, mi->J, J2);
915+
J2 = mm_idx_jjump_merge(mi, mi->J, J);
909916
for (i = 0; i < mi->n_seq; ++i) {
910917
free(mi->J[i].a); free(J[i].a);
911918
}
912919
free(mi->J); free(J);
913920
mi->J = J2;
914921
} else mi->J = J;
922+
for (i = 0; i < mi->n_seq; ++i)
923+
n += mi->J[i].n;
924+
if (mm_verbose >= 3)
925+
fprintf(stderr, "[%s] there are %d splice positions in the index\n", __func__, n);
915926
return 0;
916927
}
917928

minimap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <stdio.h>
66
#include <sys/types.h>
77

8-
#define MM_VERSION "2.28-r1272-dirty"
8+
#define MM_VERSION "2.28-r1273-dirty"
99

1010
#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
1111
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name

0 commit comments

Comments
 (0)