Skip to content

Commit

Permalink
Remove SSSE3 inlined code and compile guards
Browse files Browse the repository at this point in the history
  • Loading branch information
rhpvorderman committed Sep 26, 2023
1 parent d739ecd commit 1037343
Showing 1 changed file with 0 additions and 51 deletions.
51 changes: 0 additions & 51 deletions sam_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ DEALINGS IN THE SOFTWARE. */
#include <errno.h>
#include <stdint.h>
#include "htslib/sam.h"
#ifdef __SSSE3__
#include "tmmintrin.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -94,54 +91,6 @@ static inline void nibble2base(uint8_t *nib, char *seq, int len) {
char *seq_cursor = seq;
const uint8_t *nibble_cursor = nib;
const char *seq_end_ptr_twoatatime = seq + (len & (~1ULL));
#ifdef __SSSE3__
const char *seq_vec_end_ptr = seq_end_ptr - (2 * sizeof(__m128i));
__m128i first_upper_shuffle = _mm_setr_epi8(
0, 0xff, 1, 0xff, 2, 0xff, 3, 0xff, 4, 0xff, 5, 0xff, 6, 0xff, 7, 0xff);
__m128i first_lower_shuffle = _mm_setr_epi8(
0xff, 0, 0xff, 1, 0xff, 2, 0xff, 3, 0xff, 4, 0xff, 5, 0xff, 6, 0xff, 7);
__m128i second_upper_shuffle = _mm_setr_epi8(
8, 0xff, 9, 0xff, 10, 0xff, 11, 0xff, 12, 0xff, 13, 0xff, 14, 0xff, 15, 0xff);
__m128i second_lower_shuffle = _mm_setr_epi8(
0xff, 8, 0xff, 9, 0xff, 10, 0xff, 11, 0xff, 12, 0xff, 13, 0xff, 14, 0xff, 15);
__m128i nuc_lookup_vec = _mm_lddqu_si128((__m128i *)seq_nt16_str);
/* Work on 16 encoded characters at the time resulting in 32 decoded characters
Examples are given for 8 encoded characters A until H to keep it readable.
Encoded stored as |AB|CD|EF|GH|
Shuffle into |AB|00|CD|00|EF|00|GH|00| and
|00|AB|00|CD|00|EF|00|GH|
Shift upper to the right resulting into
|0A|B0|0C|D0|0E|F0|0G|H0| and
|00|AB|00|CD|00|EF|00|GH|
Merge with or resulting into (X stands for garbage)
|0A|XB|0C|XD|0E|XF|0G|XH|
Bitwise and with 0b1111 leads to:
|0A|0B|0C|0D|0E|0F|0G|0H|
We can use the resulting 4-bit integers as indexes for the shuffle of
the nucleotide lookup. */
while (seq_cursor < seq_vec_end_ptr) {
__m128i encoded = _mm_lddqu_si128((__m128i *)nibble_cursor);

__m128i first_upper = _mm_shuffle_epi8(encoded, first_upper_shuffle);
__m128i first_lower = _mm_shuffle_epi8(encoded, first_lower_shuffle);
__m128i shifted_first_upper = _mm_srli_epi64(first_upper, 4);
__m128i first_merged = _mm_or_si128(shifted_first_upper, first_lower);
__m128i first_indexes = _mm_and_si128(first_merged, _mm_set1_epi8(0b1111));
__m128i first_nucleotides = _mm_shuffle_epi8(nuc_lookup_vec, first_indexes);
_mm_storeu_si128((__m128i *)seq_cursor, first_nucleotides);

__m128i second_upper = _mm_shuffle_epi8(encoded, second_upper_shuffle);
__m128i second_lower = _mm_shuffle_epi8(encoded, second_lower_shuffle);
__m128i shifted_second_upper = _mm_srli_epi64(second_upper, 4);
__m128i second_merged = _mm_or_si128(shifted_second_upper, second_lower);
__m128i second_indexes = _mm_and_si128(second_merged, _mm_set1_epi8(0b1111));
__m128i second_nucleotides = _mm_shuffle_epi8(nuc_lookup_vec, second_indexes);
_mm_storeu_si128((__m128i *)(seq_cursor + 16), second_nucleotides);

nibble_cursor += sizeof(__m128i);
seq_cursor += 2 * sizeof(__m128i);
}
#endif
while (seq_cursor < seq_end_ptr_twoatatime) {
// Note size_t cast helps gcc optimiser.
memcpy(seq_cursor, code2base + ((size_t)*nibble_cursor * 2), 2);
Expand Down

0 comments on commit 1037343

Please sign in to comment.