Skip to content

Commit

Permalink
[Issue #99]: fix null value storage. (#102)
Browse files Browse the repository at this point in the history
We fixed the null value support in pixels-core, and optimized the decoding performance for isnull array.
  • Loading branch information
bianhq authored Apr 30, 2021
1 parent aeb08d9 commit 271a8d5
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
* pixels
*
* @author guodong
* @author hank
*/
public class BitUtils
{
Expand All @@ -38,14 +39,15 @@ private BitUtils()
public static byte[] bitWiseCompact(boolean[] values, int length)
{
ByteArrayOutputStream bitWiseOutput = new ByteArrayOutputStream();
int bitsToWrite = 1;
// Issue #99: remove to improve performance.
// int bitsToWrite = 1;
int bitsLeft = 8;
byte current = 0;

for (int i = 0; i < length; i++)
{
byte v = values[i] ? (byte) 1 : (byte) 0;
bitsLeft -= bitsToWrite;
bitsLeft --; // -= bitsToWrite;
current |= v << bitsLeft;
if (bitsLeft == 0)
{
Expand Down Expand Up @@ -94,14 +96,15 @@ public static byte[] bitWiseCompact(boolean[] values, int length)
public static byte[] bitWiseCompact(byte[] values, int length)
{
ByteArrayOutputStream bitWiseOutput = new ByteArrayOutputStream();
int bitsToWrite = 1;
// Issue #99: remove to improve performance.
// int bitsToWrite = 1;
int bitsLeft = 8;
byte current = 0;

for (int i = 0; i < length; i++)
{
byte v = values[i];
bitsLeft -= bitsToWrite;
bitsLeft --; // -= bitsToWrite;
current |= v << bitsLeft;
if (bitsLeft == 0)
{
Expand All @@ -127,20 +130,19 @@ public static byte[] bitWiseCompact(byte[] values, int length)
*/
public static void bitWiseDeCompact(byte[] bits, byte[] input)
{
int bitsToRead = 1;
int bitsLeft = 8;
int current;
byte mask = 0x01;

int index = 0;
/**
* Issue #99:
* Use as least as variables as possible to reduce stack footprint
* and thus improve performance.
*/
byte bitsLeft = 8;
int index = 0;
for (byte b : input)
{
while (bitsLeft > 0)
{
bitsLeft -= bitsToRead;
current = mask & (b >> bitsLeft);
bits[index] = (byte) current;
index++;
bitsLeft --;
bits[index++] = (byte) (0x01 & (b >> bitsLeft));
}
bitsLeft = 8;
}
Expand All @@ -156,19 +158,19 @@ public static void bitWiseDeCompact(byte[] bits, byte[] input)
*/
public static void bitWiseDeCompact(byte[] bits, byte[] input, int offset, int length)
{
int bitsToRead = 1;
int bitsLeft = 8;
int current;
byte mask = 0x01;

/**
* Issue #99:
* Use as least as variables as possible to reduce stack footprint
* and thus improve performance.
*/
byte bitsLeft = 8;
int index = 0;
for (int i = offset; i < offset + length; i++)
{
while (bitsLeft > 0)
{
bitsLeft -= bitsToRead;
current = mask & (input[i] >> bitsLeft);
bits[index++] = (byte) current;
bitsLeft --;
bits[index++] = (byte)(0x01 & (input[i] >> bitsLeft));
}
bitsLeft = 8;
}
Expand All @@ -184,23 +186,21 @@ public static void bitWiseDeCompact(byte[] bits, byte[] input, int offset, int l
*/
public static void bitWiseDeCompact(byte[] bits, ByteBuffer input, int offset, int length)
{
int bitsToRead = 1;
int bitsLeft = 8;
int current;
byte mask = 0x01;

/**
* Issue #99:
* Use as least as variables as possible to reduce stack footprint
* and thus improve performance.
*/
byte bitsLeft = 8, b;
int index = 0;
byte b;
while (offset < length)
// loop condition fixed in Issue #99.
for (int i = offset; i < offset + length; ++i)
{
b = input.get(offset);
offset++;
b = input.get(i);
while (bitsLeft > 0)
{
bitsLeft -= bitsToRead;
current = mask & (b >> bitsLeft);
bits[index] = (byte) current;
index++;
bitsLeft --;
bits[index++] = (byte) (0x01 & (b >> bitsLeft));
}
bitsLeft = 8;
}
Expand All @@ -216,23 +216,21 @@ public static void bitWiseDeCompact(byte[] bits, ByteBuffer input, int offset, i
*/
public static void bitWiseDeCompact(byte[] bits, ByteBuf input, int offset, int length)
{
int bitsToRead = 1;
int bitsLeft = 8;
int current;
byte mask = 0x01;

/**
* Issue #99:
* Use as least as variables as possible to reduce stack footprint
* and thus improve performance.
*/
byte bitsLeft = 8, b;
int index = 0;
byte b;
while (offset < length)
// loop condition fixed in Issue #99.
for (int i = offset; i < offset + length; ++i)
{
b = input.getByte(offset);
offset++;
b = input.getByte(i);
while (bitsLeft > 0)
{
bitsLeft -= bitsToRead;
current = mask & (b >> bitsLeft);
bits[index] = (byte) current;
index++;
bitsLeft --;
bits[index++] = (byte) (0x01 & (b >> bitsLeft));
}
bitsLeft = 8;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright 2021 PixelsDB.
*
* This file is part of Pixels.
*
* Pixels is free software: you can redistribute it and/or modify
* it under the terms of the Affero GNU General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Pixels is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Affero GNU General Public License for more details.
*
* You should have received a copy of the Affero GNU General Public
* License along with Pixels. If not, see
* <https://www.gnu.org/licenses/>.
*/
package io.pixelsdb.pixels.core.utils;

import org.junit.Test;

/**
* Created at: 30/04/2021
* Author: hank
*/
public class TestBitUtils
{
@Test
public void testPerformance()
{
/**
* Issue #99:
* Before issue #99, bitWiseDeCompact was implemented as follows:
*
* public static void bitWiseDeCompact(byte[] bits, byte[] input, int offset, int length)
* {
* int bitsToRead = 1;
* int bitsLeft = 8;
* int current;
* byte mask = 0x01;
*
* int index = 0;
* for (int i = offset; i < offset + length; i++)
* {
* while (bitsLeft > 0)
* {
* bitsLeft -= bitsToRead;
* current = 0x01 & (input[i] >> bitsLeft);
* bits[index++] = (byte) current;
* }
* bitsLeft = 8;
* }
* }
*
* The 4 variables declared at the beginning of this method takes 13 byte
* footprint in stack. By reducing the footprint to 1 byte, this method can
* be about 30% more efficient than the original implementation if length
* parameter is 1.
*
* Besides,
* for (int i = offset; i < offset + length; i++) {}
* is more efficient than:
* length += offset;
* for (int i = offset; i < length; i++) {}
*
* The reason is that memory access/allocation is more expensive than calculations
* in CPU registers. Not to mention that javac may have ways to optimize (offset +
* length) in the loop condition.
*
* It is similar to the other three overrides of bitWiseDeCompact.
*/
byte[] buffer = new byte[100];
byte[] bits = new byte[8];

long start = System.currentTimeMillis();

for (int i = 0; i < 1000000; ++i)
{
for (int j = 0; j < 100; ++j)
{
BitUtils.bitWiseDeCompact(bits, buffer, j, 1);
}
}

long end = System.currentTimeMillis();

System.out.println((end - start)/1000.0);
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
/*
* Copyright 2020 PixelsDB.
*
* This file is part of Pixels.
*
* Pixels is free software: you can redistribute it and/or modify
* it under the terms of the Affero GNU General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Pixels is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Affero GNU General Public License for more details.
*
* You should have received a copy of the Affero GNU General Public
* License along with Pixels. If not, see
* <https://www.gnu.org/licenses/>.
*/
package io.pixelsdb.pixels.core.utils;

import org.junit.Test;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,16 @@ public void testRead()
DateColumnVector fcv = (DateColumnVector) rowBatch.cols[5];
TimeColumnVector gcv = (TimeColumnVector) rowBatch.cols[6];
BinaryColumnVector hcv = (BinaryColumnVector) rowBatch.cols[7];
for (int i = 0; i < fcv.getLength(); ++i)
for (int i = 0, j = 0; i < rowBatch.size; ++i)
{
System.out.println(fcv.asScratchDate(i) + ", " + fcv.isNull[i]);
if (fcv.isNull[i])
{
System.out.println("null");
}
else
{
System.out.println(fcv.asScratchDate(j++));
}
}
}
catch (IOException e)
Expand Down

0 comments on commit 271a8d5

Please sign in to comment.