Skip to content

Commit

Permalink
Apply some minor optimizations to piz decode
Browse files Browse the repository at this point in the history
Signed-off-by: Kimball Thurston <kdt3rd@gmail.com>
  • Loading branch information
kdt3rd committed Oct 5, 2024
1 parent 4ea2af9 commit 621f2f8
Showing 1 changed file with 62 additions and 8 deletions.
70 changes: 62 additions & 8 deletions src/lib/OpenEXRCore/internal_piz.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,14 @@ forwardLutFromBitmap (const uint8_t* bitmap, uint16_t* lut)
return k - 1;
}

#ifdef __cplusplus
# define NO_ALIAS
#else
# define NO_ALIAS restrict
#endif

static inline uint16_t
reverseLutFromBitmap (const uint8_t* bitmap, uint16_t* lut)
reverseLutFromBitmap (const uint8_t* NO_ALIAS bitmap, uint16_t* NO_ALIAS lut)
{
uint32_t n, k = 0;

Expand All @@ -86,8 +92,20 @@ reverseLutFromBitmap (const uint8_t* bitmap, uint16_t* lut)
}

static inline void
applyLut (const uint16_t* lut, uint16_t* data, uint64_t nData)
applyLut (const uint16_t lut[NO_ALIAS USHORT_RANGE], uint16_t* NO_ALIAS data, uint64_t nData)
{
/* partially unrolling is noticeably faster */
uint16_t dvals[8];
while ( nData > 8 )
{
memcpy (dvals, data, sizeof(uint16_t)*8);
for ( int i = 0; i < 8; ++i )
dvals[i] = lut[dvals[i]];
memcpy (data, dvals, sizeof(uint16_t)*8);
data += 8;
nData -= 8;
}

for (uint64_t i = 0; i < nData; ++i)
data[i] = lut[data[i]];
}
Expand All @@ -113,14 +131,53 @@ wenc14 (uint16_t a, uint16_t b, uint16_t* l, uint16_t* h)
*h = (uint16_t) ds;
}

static inline void
wdec14_4 (uint16_t* px, uint16_t* p01, uint16_t* p10, uint16_t* p11)
{
/* pre swap
* px, p01, p10, p11
* px -> a
* p10 -> b
* p01 -> c
* p11 -> d
* */
int16_t a = (int16_t) *px;
int16_t b = (int16_t) *p10;
int16_t c = (int16_t) *p01;
int16_t d = (int16_t) *p11;

int ai = (int) a;
int bi = (int) b;
int ci = (int) c;
int di = (int) d;

int i00 = ai + (bi & 1) + (bi >> 1);
int i10 = i00 - bi;
int i01 = ci + (di & 1) + (di >> 1);
int i11 = i01 - di;

ai = i00 + (i01 & 1) + (i01 >> 1);
bi = ai - i01;
ci = i10 + (i11 & 1) + (i11 >> 1);
di = ci - i11;

/* different output order */
/* px, p01, p10, p11 */
*px = (uint16_t) ai;
*p01 = (uint16_t) bi;
*p10 = (uint16_t) ci;
*p11 = (uint16_t) di;
}

static inline void
wdec14 (uint16_t l, uint16_t h, uint16_t* a, uint16_t* b)
{
int16_t ls = (int16_t) l;
int16_t hs = (int16_t) h;

int hi = hs;
int ai = ls + (hi & 1) + (hi >> 1);
int hi = (int) hs;
int li = (int) ls;
int ai = li + (hi & 1) + (hi >> 1);

int16_t as = (int16_t) ai;
int16_t bs = (int16_t) (ai - hi);
Expand Down Expand Up @@ -339,10 +396,7 @@ wav_2D_decode (

if (w14)
{
wdec14 (*px, *p10, &i00, &i10);
wdec14 (*p01, *p11, &i01, &i11);
wdec14 (i00, i01, px, p01);
wdec14 (i10, i11, p10, p11);
wdec14_4 (px, p01, p10, p11);
}
else
{
Expand Down

0 comments on commit 621f2f8

Please sign in to comment.