Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix decimal decoding #401

Merged
merged 4 commits into from
Jul 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 35 additions & 27 deletions tikv-client/src/main/java/com/pingcap/tikv/codec/MyDecimal.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

package com.pingcap.tikv.codec;

import com.google.common.annotations.VisibleForTesting;
import java.math.BigDecimal;
import java.util.Arrays;

Expand Down Expand Up @@ -58,12 +59,12 @@ public class MyDecimal {
/*
* Returns total precision of this decimal. Basically, it is sum of digitsInt and digitsFrac. But there
* are some special cases need to be token care of such as 000.001.
* Precision reflects the actual effective precision without leading zero
*/
public int precision() {
int frac = this.digitsFrac;
int digitsInt =
this.removeLeadingZeros()[
1]; /*this function return an array and the second element is digitsInt*/
this.removeLeadingZeros()[1]; /*this function return an array and the second element is digitsInt*/
int precision = digitsInt + frac;
// if no precision, it is just 0.
if (precision == 0) {
Expand All @@ -72,7 +73,10 @@ public int precision() {
return precision;
}

/** Returns fraction digits that counts how many digits after ".". */
/**
* Returns fraction digits that counts how many digits after ".".
* frac() reflects the actual effective fraction without trailing zero
*/
public int frac() {
return digitsFrac;
}
Expand All @@ -92,8 +96,7 @@ public void fromDecimal(double value) {
*
* @param precision precision specifies total digits that this decimal will be..
* @param frac frac specifies how many fraction digits
* @param bin bin is binary string which represents a decimal value. TODO: (zhexuany) overflow and
* truncated exception need to be done later.
* @param bin bin is binary string which represents a decimal value.
*/
public int fromBin(int precision, int frac, int[] bin) {
if (bin.length == 0) {
Expand Down Expand Up @@ -134,13 +137,13 @@ public int fromBin(int precision, int frac, int[] bin) {
wordsIntTo = wordBufLen;
wordsFracTo = 0;
overflow = true;
} else {
wordsIntTo = wordsInt;
wordsFracTo = wordBufLen - wordsInt;
truncated = true;
}
wordsIntTo = wordsInt;
wordsFracTo = wordBufLen - wordsInt;
truncated = true;
}
wordsIntTo = wordsInt;
wordsFracTo = wordsFrac;

if (overflow || truncated) {
if (wordsIntTo < oldWordsIntTo) {
binIdx += dig2bytes[leadingDigits] + (wordsInt - wordsIntTo) * wordSize;
Expand All @@ -151,8 +154,8 @@ public int fromBin(int precision, int frac, int[] bin) {
}

this.negative = mask != 0;
this.digitsInt = wordsInt * digitsPerWord + leadingDigits;
this.digitsFrac = wordsFrac * digitsPerWord + trailingDigits;
this.digitsInt = (byte)(wordsInt * digitsPerWord + leadingDigits);
this.digitsFrac = (byte)(wordsFrac * digitsPerWord + trailingDigits);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type conversion seems unnecessary. Digits of int and fraction can never go beyond 127. tidb does this simply because it is static type language. It has to do such conversion otherwise compiler will not be happy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To keep the same behavior as in TiDB which does a cap.


int wordIdx = 0;
if (leadingDigits > 0) {
Expand Down Expand Up @@ -264,25 +267,26 @@ private int digitsToWords(int digits) {
/**
* Reads a word from a array at given size.
*
* @param b b is source data.
* @param b b is source data of unsigned byte as int[]
* @param size is word size which can be used in switch statement.
* @param start start indicates the where start to read.
*/
private int readWord(int[] b, int size, int start) {
@VisibleForTesting
public static int readWord(int[] b, int size, int start) {
int x = 0;
switch (size) {
case 1:
x = b[start];
x = (byte)b[start];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same reason.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In java byte is signed. b[start] is unsigned in fact since we read as unsigned byte.
So actually convert to byte will cause it to be signed. It's not trivial.

break;
case 2:
x = (b[start] << 8) + b[start + 1];
x = (((byte)b[start]) << 8) + (b[start + 1] & 0xFF);
break;
case 3:
int sign = b[start] & 128;
if (sign > 0) {
x = 255 << 24 | (b[start] & 0xFF) << 16 | (b[start + 1] & 0xFF) << 8 | (b[start + 2]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a reminder: b's type is byte[] which shares a range from -128 to 127 whereas golang's byte shares range from 0 to 255. We are better take and operation with 0xFF.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, 255 << 24 could be overflow. We need convert 255 to a uint and then convert back to int.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my bad, java does not have uint..

Copy link
Contributor

@zhexuany zhexuany Jul 23, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

~(0x7F << 24 | b[start] << 16 | b[start + 1] << 8 | (b[start + 2]) + 1);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you may use ~(((b[start] & 7FFF) << 16 | (b[start + 1] & 0xFF) << 8 | (b[start + 2] & 0xFF)) + 1);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. b's type is int not byte[].
  2. 255 as const literal is int type.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

255 << 24 has correct binary form. Numeric overflow does not affect binary. ignore my comment.

x = 0xFF << 24 | (b[start] << 16) | (b[start + 1] << 8) | (b[start + 2]);
} else {
x = b[start] << 16 | b[start + 1] << 8 | b[start + 2];
x = b[start] << 16 | (b[start + 1] << 8) | b[start + 2];
}
break;
case 4:
Expand All @@ -292,6 +296,12 @@ private int readWord(int[] b, int size, int start) {
return x;
}

public static void main(String args[]) {
int[] b = new int[]{250,250,250};
int x = 255 << 24 | (b[0] << 16) | (b[0 + 1] << 8) | (b[0 + 2]);
System.out.println(x);
}

/**
* parser a decimal value from a string.
*
Expand Down Expand Up @@ -320,7 +330,7 @@ private void fromCharArray(char[] str) {
// [-, 1, 2, 3]
// [+, 1, 2, 3]
// for +/-, we need skip them and record sign information into negative field.
switch (str[0]) {
switch (str[startIdx]) {
case '-':
this.negative = true;
startIdx++;
Expand All @@ -335,8 +345,8 @@ private void fromCharArray(char[] str) {
}
// we initialize strIdx in case of sign notation, here we need substract startIdx from strIdx casue strIdx is used for counting the number of digits.
int digitsInt = strIdx - startIdx;
int digitsFrac = 0;
int endIdx = 0;
int digitsFrac;
int endIdx;
if (strIdx < str.length && str[strIdx] == '.') {
endIdx = strIdx + 1;
// detect where is the end index of this char array.
Expand All @@ -363,13 +373,12 @@ private void fromCharArray(char[] str) {
wordsInt = wordBufLen;
wordsFrac = 0;
overflow = true;
} else {
wordsFrac = wordBufLen - wordsInt;
truncated = true;
}
// wordsIntTo = wordsInt;
wordsFrac = wordBufLen - wordsInt;
truncated = true;

}
// wordsIntTo = wordsInt;
// wordsFracTo = wordsFrac;

if (overflow || truncated) {
digitsFrac = wordsFrac * digitsPerWord;
Expand Down Expand Up @@ -700,7 +709,6 @@ public int[] toBin(int precision, int frac) {
int originFracSize = fracSize;
int[] bin = new int[intSize + fracSize];
int binIdx = 0;
//TODO, overflow and truncated later
int[] res = this.removeLeadingZeros();
int wordIdxFrom = res[0];
int digitsIntFrom = res[1];
Expand Down
95 changes: 56 additions & 39 deletions tikv-client/src/test/java/com/pingcap/tikv/codec/MyDecimalTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,48 +27,66 @@
public class MyDecimalTest {
@Test
public void fromStringTest() throws Exception {
List<MyDecimalTestStruct> test = new ArrayList<>();
test.add(new MyDecimalTestStruct("12345", "12345", 5, 0));
test.add(new MyDecimalTestStruct("12345.", "12345", 5, 0));
test.add(new MyDecimalTestStruct("123.45", "123.45", 5, 2));
test.add(new MyDecimalTestStruct("-123.45", "-123.45", 5, 2));
test.add(new MyDecimalTestStruct(".00012345000098765", "0.00012345000098765", 17, 17));
test.add(new MyDecimalTestStruct(".12345000098765", "0.12345000098765", 14, 14));
test.add(
List<MyDecimalTestStruct> tests = new ArrayList<>();
tests.add(new MyDecimalTestStruct("1111111111111111111111111.111111111111111111111111111111",
"1111111111111111111111111.111111111111111111111111111111", 65, 30));
tests.add(new MyDecimalTestStruct("12345", "12345", 5, 0));
tests.add(new MyDecimalTestStruct("12345.", "12345", 5, 0));
tests.add(new MyDecimalTestStruct("123.45", "123.45", 5, 2));
tests.add(new MyDecimalTestStruct("-123.45", "-123.45", 5, 2));
tests.add(new MyDecimalTestStruct(".00012345000098765", "0.00012345000098765", 17, 17));
tests.add(new MyDecimalTestStruct(".12345000098765", "0.12345000098765", 14, 14));
tests.add(
new MyDecimalTestStruct("-.000000012345000098765", "-0.000000012345000098765", 21, 21));
test.add(new MyDecimalTestStruct("0000000.001", "0.001", 3, 3));
test.add(new MyDecimalTestStruct("1234500009876.5", "1234500009876.5", 14, 1));
test.forEach(
(a) -> {
MyDecimal dec = new MyDecimal();
dec.fromString(a.in);
assertEquals(a.precision, dec.precision());
assertEquals(a.frac, dec.frac());
assertEquals(a.out, dec.toString());
});
tests.add(
new MyDecimalTestStruct("-.000000012345000098765", "-0.000000012345000098765", 2, 21));
tests.add(
new MyDecimalTestStruct("-.000000012345000098765", "-0.000000012345000098765", 21, 2));
tests.add(new MyDecimalTestStruct("0000000.001", "0.001", 3, 3));
tests.add(new MyDecimalTestStruct("1234500009876.5", "1234500009876.5", 14, 1));
for(MyDecimalTestStruct t : tests) {
MyDecimal dec = new MyDecimal();
dec.fromString(t.in);
assertEquals(t.out, dec.toString());
}
}

@Test
public void readWordTest() throws Exception {
assertEquals(MyDecimal.readWord(new int[]{250}, 1, 0), -6);
assertEquals(MyDecimal.readWord(new int[]{50}, 1, 0), 50);

assertEquals(MyDecimal.readWord(new int[]{250, 250}, 2, 0), -1286);
assertEquals(MyDecimal.readWord(new int[]{50, 50}, 2, 0), 12850);

assertEquals(MyDecimal.readWord(new int[]{250, 250, 250}, 3, 0), -328966);
assertEquals(MyDecimal.readWord(new int[]{50, 50, 50}, 3, 0), 3289650);

assertEquals(MyDecimal.readWord(new int[]{250, 250, 250, 250}, 4, 0), -84215046);
assertEquals(MyDecimal.readWord(new int[]{50, 50, 50, 50}, 4, 0), 842150450);
}

@Test
public void toBinToBinFromBinTest() throws Exception {
List<MyDecimalTestStruct> test = new ArrayList<>();
test.add(new MyDecimalTestStruct("-10.55", "-10.55", 4, 2));
test.add(new MyDecimalTestStruct("12345", "12345", 5, 0));
test.add(new MyDecimalTestStruct("-12345", "-12345", 5, 0));
test.add(new MyDecimalTestStruct("0000000.001", "0.001", 3, 3));
test.add(new MyDecimalTestStruct("0.00012345000098765", "0.00012345000098765", 17, 17));
test.add(new MyDecimalTestStruct("-0.00012345000098765", "-0.00012345000098765", 17, 17));
test.forEach(
(a) -> {
MyDecimal dec = new MyDecimal();
dec.fromString(a.in);
assertEquals(a.out, dec.toString());
int[] bin = dec.toBin(dec.precision(), dec.frac());
dec.clear();
dec.fromBin(a.precision, a.frac, bin);
assertEquals(a.precision, dec.precision());
assertEquals(a.frac, dec.frac());
assertEquals(a.out, dec.toString());
});
public void toBinFromBinTest() throws Exception {
List<MyDecimalTestStruct> tests = new ArrayList<>();
String decValStr = "11111111111111111111111111111111111.111111111111111111111111111111";
tests.add(new MyDecimalTestStruct(decValStr, decValStr, 65, 30));
tests.add(new MyDecimalTestStruct("12345000098765", "12345000098765", 14, 0));
tests.add(new MyDecimalTestStruct("-10.55", "-10.55", 4, 2));
tests.add(new MyDecimalTestStruct("12345", "12345", 5, 0));
tests.add(new MyDecimalTestStruct("-12345", "-12345", 5, 0));
tests.add(new MyDecimalTestStruct("0000000.001", "0.001", 3, 3));
tests.add(new MyDecimalTestStruct("0.00012345000098765", "0.00012345000098765", 17, 17));
tests.add(new MyDecimalTestStruct("-0.00012345000098765", "-0.00012345000098765", 17, 17));
for (MyDecimalTestStruct a : tests) {
MyDecimal dec = new MyDecimal();
dec.fromString(a.in);
assertEquals(a.out, dec.toString());
int[] bin = dec.toBin(a.precision, a.frac);
dec.clear();
dec.fromBin(a.precision, a.frac, bin);
assertEquals(a.out, dec.toString());
}
}

@Test
Expand All @@ -80,7 +98,6 @@ public void toBinTest() throws Exception {
new int[] {
0x7E, 0xF2, 0x04, 0xC7, 0x2D, 0xFB, 0x2D,
};
// something wrong with toBin and fromBin
assertArrayEquals(expected, data);
}

Expand Down