Skip to content

Commit

Permalink
Make a dynamic dispatcher function for nibble2base
Browse files Browse the repository at this point in the history
  • Loading branch information
rhpvorderman committed Apr 2, 2024
1 parent 0840b8d commit 06e6545
Showing 1 changed file with 45 additions and 18 deletions.
63 changes: 45 additions & 18 deletions sam_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ DEALINGS IN THE SOFTWARE. */

#include <errno.h>
#include <stdint.h>
#include "htslib/hts_defs.h"
#include "htslib/sam.h"
#ifdef __SSSE3__
#include "tmmintrin.h"
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -70,7 +69,7 @@ static inline int possibly_expand_bam_data(bam1_t *b, size_t bytes) {
* for (i = 0; i < len; i++)
* seq[i] = seq_nt16_str[bam_seqi(nib, i)];
*/
static inline void nibble2base(uint8_t *nib, char *seq, int len) {
static inline void nibble2base_default(uint8_t *nib, char *seq, int len) {
static const char code2base[512] =
"===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N"
"A=AAACAMAGARASAVATAWAYAHAKADABAN"
Expand All @@ -89,12 +88,33 @@ static inline void nibble2base(uint8_t *nib, char *seq, int len) {
"B=BABCBMBGBRBSBVBTBWBYBHBKBDBBBN"
"N=NANCNMNGNRNSNVNTNWNYNHNKNDNBNN";

int i, len2 = len/2;
seq[0] = 0;

for (i = 0; i < len2; i++)
// Note size_t cast helps gcc optimiser.
memcpy(&seq[i*2], &code2base[(size_t)nib[i]*2], 2);

if ((i *= 2) < len)
seq[i] = seq_nt16_str[bam_seqi(nib, i)];
}

#if HTS_GCC_AT_LEAST(4,8)
/*
* Convert a nibble encoded BAM sequence to a string of bases.
*
* Using SSSE3 instructions, 16 codepoints that hold 2 bases each can be
* unpacked into 32 indexes from 0-15. Using the pshufb instruction these can
* be converted to the IUPAC characters.
* It falls back on the nibble2base_default function for the remainder.
*/
#include "tmmintrin.h"
__attribute__((target("ssse3")))
static inline void nibble2base_ssse3(uint8_t *nib, char *seq, int len) {
seq[0] = 0;
const char *seq_end_ptr = seq + len;
char *seq_cursor = seq;
const uint8_t *nibble_cursor = nib;
const char *seq_end_ptr_twoatatime = seq + (len & (~1ULL));
#ifdef __SSSE3__
uint8_t *nibble_cursor = nib;
const char *seq_vec_end_ptr = seq_end_ptr - (2 * sizeof(__m128i));
__m128i first_upper_shuffle = _mm_setr_epi8(
0, 0xff, 1, 0xff, 2, 0xff, 3, 0xff, 4, 0xff, 5, 0xff, 6, 0xff, 7, 0xff);
Expand Down Expand Up @@ -141,21 +161,28 @@ static inline void nibble2base(uint8_t *nib, char *seq, int len) {
nibble_cursor += sizeof(__m128i);
seq_cursor += 2 * sizeof(__m128i);
}
#endif
while (seq_cursor < seq_end_ptr_twoatatime) {
// Note size_t cast helps gcc optimiser.
memcpy(seq_cursor, code2base + ((size_t)*nibble_cursor * 2), 2);
seq_cursor += 2;
nibble_cursor += 1;
nibble2base_default(nibble_cursor, seq_cursor, seq_end_ptr - seq_cursor);
}
static void (*nibble2base)(uint8_t *nib, char *seq, int len);

static void nibble2base_dispatch(uint8_t *nib, char *seq, int len) {
if (__builtin_cpu_supports("ssse3")) {
nibble2base = nibble2base_ssse3;
}
if (seq_cursor != seq_end_ptr) {
/* There is a single encoded nuc left */
uint8_t nibble_c = *nibble_cursor;
uint8_t upper_nuc_index = nibble_c >> 4;
seq_cursor[0] = seq_nt16_str[upper_nuc_index];
else {
nibble2base = nibble2base_default;
}

return nibble2base(nib, seq, len);
}

static void (*nibble2base)(uint8_t *nib, char *seq, int len) = nibble2base_dispatch;

#else
static inline void nibble2base(uint8_t *nib, char *seq, int len) {
return nibble2base_default(nib, seq, len);
}
#endif
#ifdef __cplusplus
}
#endif
Expand Down

0 comments on commit 06e6545

Please sign in to comment.