@@ -835,6 +835,33 @@ KRADIX_SORT_INIT(jj, mm_idx_jjump1_t, sort_key_jj, 4)
835
835
#define sort_key_jj2 (a ) ((a).off2)
836
836
KRADIX_SORT_INIT (jj2 , mm_idx_jjump1_t , sort_key_jj2 , 4 )
837
837
838
+ static void sort_jjump (mm_idx_jjump_t * jj2 )
839
+ {
840
+ int32_t j0 , j , k ;
841
+ if (jj2 == 0 || jj2 -> n == 0 ) return ;
842
+ radix_sort_jj (jj2 -> a , jj2 -> a + jj2 -> n );
843
+ for (j0 = 0 , j = 1 ; j <= jj2 -> n ; ++ j ) {
844
+ if (j == jj2 -> n || jj2 -> a [j0 ].off != jj2 -> a [j ].off ) {
845
+ radix_sort_jj2 (jj2 -> a + j0 , jj2 -> a + j );
846
+ j0 = j ;
847
+ }
848
+ }
849
+ // the actual merge
850
+ for (j0 = 0 , j = 1 , k = 0 ; j <= jj2 -> n ; ++ j ) {
851
+ if (j == jj2 -> n || jj2 -> a [j0 ].off != jj2 -> a [j ].off || jj2 -> a [j0 ].off2 != jj2 -> a [j ].off2 ) {
852
+ int32_t t , cnt = 0 ;
853
+ uint16_t flag = 0 ;
854
+ for (t = j0 ; t < j ; ++ t ) cnt += jj2 -> a [t ].cnt , flag |= jj2 -> a [t ].flag ;
855
+ jj2 -> a [k ] = jj2 -> a [j0 ];
856
+ jj2 -> a [k ].cnt = cnt ;
857
+ jj2 -> a [k ++ ].flag = flag ;
858
+ j0 = j ;
859
+ }
860
+ }
861
+ jj2 -> n = k ;
862
+ jj2 -> a = REALLOC (mm_idx_jjump1_t , jj2 -> a , k );
863
+ }
864
+
838
865
static mm_idx_jjump_t * mm_idx_bed2jjump (const mm_idx_t * mi , const mm_idx_intv_t * I , uint16_t flag )
839
866
{
840
867
int32_t i ;
@@ -850,7 +877,7 @@ static mm_idx_jjump_t *mm_idx_bed2jjump(const mm_idx_t *mi, const mm_idx_intv_t
850
877
jj -> a [k ].off = intv -> a [j ].st , jj -> a [k ].off2 = intv -> a [j ].en , jj -> a [k ].cnt = intv -> a [j ].cnt , jj -> a [k ].strand = intv -> a [j ].strand , jj -> a [k ++ ].flag = flag ;
851
878
jj -> a [k ].off = intv -> a [j ].en , jj -> a [k ].off2 = intv -> a [j ].st , jj -> a [k ].cnt = intv -> a [j ].cnt , jj -> a [k ].strand = intv -> a [j ].strand , jj -> a [k ++ ].flag = flag ;
852
879
}
853
- radix_sort_jj (jj -> a , jj -> a + jj -> n );
880
+ sort_jjump (jj );
854
881
}
855
882
return J ;
856
883
}
@@ -861,41 +888,21 @@ static mm_idx_jjump_t *mm_idx_jjump_merge(const mm_idx_t *mi, const mm_idx_jjump
861
888
mm_idx_jjump_t * J2 ;
862
889
J2 = CALLOC (mm_idx_jjump_t , mi -> n_seq );
863
890
for (i = 0 ; i < mi -> n_seq ; ++ i ) {
864
- int32_t j , j0 , k ;
891
+ int32_t j , k ;
865
892
const mm_idx_jjump_t * jj0 = & J0 [i ], * jj1 = & J1 [i ];
866
893
mm_idx_jjump_t * jj2 = & J2 [i ];
867
- // merge jj0 and jj1 into jj2; faster with sorted merge but the performance difference should be negligible
868
894
jj2 -> n = jj0 -> n + jj1 -> n ;
869
895
jj2 -> a = CALLOC (mm_idx_jjump1_t , jj2 -> n );
870
896
for (j = k = 0 ; j < jj0 -> n ; ++ j ) jj2 -> a [k ++ ] = jj0 -> a [j ];
871
- for (j = k = 0 ; j < jj1 -> n ; ++ j ) jj2 -> a [k ++ ] = jj1 -> a [j ];
872
- radix_sort_jj (jj2 -> a , jj2 -> a + jj2 -> n ); // sort by a[].off
873
- // sort by a[].off and then by a[].off2 such that they can be merged later
874
- for (j0 = 0 , j = 1 ; j <= jj2 -> n ; ++ j ) {
875
- if (j == jj2 -> n || jj2 -> a [j0 ].off != jj2 -> a [j ].off ) {
876
- radix_sort_jj2 (jj2 -> a + j0 , jj2 -> a + j );
877
- j0 = j ;
878
- }
879
- }
880
- // the actual merge
881
- for (j0 = 0 , j = 1 , k = 0 ; j <= jj2 -> n ; ++ j ) {
882
- if (j == jj2 -> n || jj2 -> a [j0 ].off != jj2 -> a [j ].off || jj2 -> a [j0 ].off2 != jj2 -> a [j ].off2 ) {
883
- int32_t t , cnt = 0 ;
884
- uint16_t flag = 0 ;
885
- for (t = j0 ; t < j ; ++ t ) cnt += jj2 -> a [t ].cnt , flag |= jj2 -> a [t ].flag ;
886
- jj2 -> a [k ] = jj2 -> a [j0 ];
887
- jj2 -> a [k ].cnt = cnt ;
888
- jj2 -> a [k ++ ].flag = flag ;
889
- j0 = j ;
890
- }
891
- }
897
+ for (j = 0 ; j < jj1 -> n ; ++ j ) jj2 -> a [k ++ ] = jj1 -> a [j ];
898
+ sort_jjump (jj2 );
892
899
}
893
900
return J2 ;
894
901
}
895
902
896
903
int mm_idx_jjump_read (mm_idx_t * mi , const char * fn , int flag , int min_sc )
897
904
{
898
- int32_t i ;
905
+ int32_t i , n = 0 ;
899
906
mm_idx_intv_t * I ;
900
907
mm_idx_jjump_t * J ;
901
908
if (mi -> h == 0 ) mm_idx_index_name (mi );
@@ -905,13 +912,17 @@ int mm_idx_jjump_read(mm_idx_t *mi, const char *fn, int flag, int min_sc)
905
912
free (I );
906
913
if (mi -> J ) {
907
914
mm_idx_jjump_t * J2 ;
908
- J2 = mm_idx_jjump_merge (mi , mi -> J , J2 );
915
+ J2 = mm_idx_jjump_merge (mi , mi -> J , J );
909
916
for (i = 0 ; i < mi -> n_seq ; ++ i ) {
910
917
free (mi -> J [i ].a ); free (J [i ].a );
911
918
}
912
919
free (mi -> J ); free (J );
913
920
mi -> J = J2 ;
914
921
} else mi -> J = J ;
922
+ for (i = 0 ; i < mi -> n_seq ; ++ i )
923
+ n += mi -> J [i ].n ;
924
+ if (mm_verbose >= 3 )
925
+ fprintf (stderr , "[%s] there are %d splice positions in the index\n" , __func__ , n );
915
926
return 0 ;
916
927
}
917
928
0 commit comments