Skip to content

Commit

Permalink
GH-40959: [JS] Store Timestamps in 64 bits (#40960)
Browse files Browse the repository at this point in the history
Merge after #40892.

This pull request also changes Dates to return timestamps instead of
Date instances (similar to Timestamps and for the same reason.

* GitHub Issue: #40959
  • Loading branch information
domoritz authored Apr 16, 2024
1 parent 61dde71 commit 18876b2
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 121 deletions.
26 changes: 18 additions & 8 deletions js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,28 @@ export class Decimal extends DataType<Type.Decimal> {
/** @ignore */
export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
/** @ignore */
export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
type DateType = {
[Type.Date]: { TArray: Int32Array | BigInt64Array };
[Type.DateDay]: { TArray: Int32Array };
[Type.DateMillisecond]: { TArray: BigInt64Array };
};
/** @ignore */
export interface Date_<T extends Dates = Dates> extends DataType<T> {
TArray: DateType[T]['TArray'];
TValue: number;
}
/** @ignore */
export class Date_<T extends Dates = Dates> extends DataType<T> {
constructor(public readonly unit: DateUnit) {
super(Type.Date as T);
}
public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }

public get ArrayType() {
return this.unit === DateUnit.DAY ? Int32Array : BigInt64Array;
}
protected static [Symbol.toStringTag] = ((proto: Date_) => {
(<any>proto).unit = null;
(<any>proto).ArrayType = Int32Array;
return proto[Symbol.toStringTag] = 'Date';
})(Date_.prototype);
}
Expand Down Expand Up @@ -417,9 +429,9 @@ export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() {
type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
/** @ignore */
interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
TArray: Int32Array;
TArray: BigInt64Array;
TValue: number;
ArrayType: TypedArrayConstructor<Int32Array>;
ArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
Expand All @@ -432,7 +444,7 @@ class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
(<any>proto).unit = null;
(<any>proto).timezone = null;
(<any>proto).ArrayType = Int32Array;
(<any>proto).ArrayType = BigInt64Array;
return proto[Symbol.toStringTag] = 'Timestamp';
})(Timestamp_.prototype);
}
Expand Down Expand Up @@ -483,7 +495,7 @@ type Durations = Type.Duration | Type.DurationSecond | Type.DurationMillisecond
export interface Duration<T extends Durations = Durations> extends DataType<T> {
TArray: BigInt64Array;
TValue: bigint;
ArrayType: BigInt64Array;
ArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
Expand Down Expand Up @@ -737,8 +749,6 @@ export function strideForType(type: DataType) {
const t: any = type;
switch (type.typeId) {
case Type.Decimal: return (type as Decimal).bitWidth / 32;
case Type.Timestamp: return 2;
case Type.Date: return 1 + (t as Date_).unit;
case Type.Interval: return 1 + (t as Interval_).unit;
// case Type.Int: return 1 + +((t as Int_).bitWidth > 32);
// case Type.Time: return 1 + +((t as Time_).bitWidth > 32);
Expand Down
13 changes: 13 additions & 0 deletions js/src/util/bigint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,16 @@ export function bigIntToNumber(number: bigint | number): number {
}
return Number(number);
}

/**
* Duivides the bigint number by the divisor and returns the result as a number.
* Dividing bigints always results in bigints so we don't get the remainder.
* This function gives us the remainder but assumes that the result fits into a number.
*
* @param number The number to divide.
* @param divisor The divisor.
* @returns The result of the division as a number.
*/
export function divideBigInts(number: bigint, divisor: bigint): number {
return bigIntToNumber(number / divisor) + bigIntToNumber(number % divisor) / bigIntToNumber(divisor);
}
21 changes: 7 additions & 14 deletions js/src/visitor/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { Vector } from '../vector.js';
import { Visitor } from '../visitor.js';
import { MapRow } from '../row/map.js';
import { StructRow, StructRowProxy } from '../row/struct.js';
import { bigIntToNumber } from '../util/bigint.js';
import { bigIntToNumber, divideBigInts } from '../util/bigint.js';
import { decodeUtf8 } from '../util/utf8.js';
import { TypeToDataType } from '../interfaces.js';
import { uint16ToFloat64 } from '../util/math.js';
Expand Down Expand Up @@ -106,13 +106,6 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
}

/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);

/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));

/** @ignore */
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
Expand All @@ -139,9 +132,9 @@ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Floa
type Numeric2X = Int64 | Uint64;

/** @ignore */
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToMs(values, index);
/** @ignore */
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
/** @ignore */
const getNumeric = <T extends Numeric1X>({ stride, values }: Data<T>, index: number): T['TValue'] => values[stride * index];
/** @ignore */
Expand Down Expand Up @@ -178,13 +171,13 @@ const getDate = <T extends Date_>(data: Data<T>, index: number): T['TValue'] =>
);

/** @ignore */
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * bigIntToNumber(values[index]);
/** @ignore */
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
/** @ignore */
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000));
/** @ignore */
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000000));
/* istanbul ignore next */
/** @ignore */
const getTimestamp = <T extends Timestamp>(data: Data<T>, index: number): T['TValue'] => {
Expand Down
9 changes: 5 additions & 4 deletions js/src/visitor/iterator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,11 @@ function vectorIterator<T extends DataType>(vector: Vector<T>): IterableIterator

// Fast case, defer to native iterators if possible
if (vector.nullCount === 0 && vector.stride === 1 && (
(type.typeId === Type.Timestamp) ||
(type instanceof Int && (type as Int).bitWidth !== 64) ||
(type instanceof Time && (type as Time).bitWidth !== 64) ||
(type instanceof Float && (type as Float).precision !== Precision.HALF)
// Don't defer to native iterator for timestamps since Numbers are expected
// (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND ||
(DataType.isInt(type) && type.bitWidth !== 64) ||
(DataType.isTime(type) && type.bitWidth !== 64) ||
(DataType.isFloat(type) && type.precision !== Precision.HALF)
)) {
return new ChunkedIterator(vector.data.length, (chunkIndex) => {
const data = vector.data[chunkIndex];
Expand Down
25 changes: 5 additions & 20 deletions js/src/visitor/set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,6 @@ function wrapSet<T extends DataType>(fn: (data: Data<T>, _1: any, _2: any) => vo

/** @ignore */
export const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = Math.floor(epochMs / 86400000); };
/** @ignore */
export const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor(epochMs % 4294967296);
data[index + 1] = Math.floor(epochMs / 4294967296);
};
/** @ignore */
export const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor((epochMs * 1000) % 4294967296);
data[index + 1] = Math.floor((epochMs * 1000) / 4294967296);
};
/** @ignore */
export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor((epochMs * 1000000) % 4294967296);
data[index + 1] = Math.floor((epochMs * 1000000) / 4294967296);
};

/** @ignore */
export const setVariableWidthBytes = <T extends Int32Array | BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
Expand Down Expand Up @@ -161,7 +146,7 @@ export const setAnyFloat = <T extends Float>(data: Data<T>, index: number, value
/** @ignore */
export const setDateDay = <T extends DateDay>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); };
/** @ignore */
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); };
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
/** @ignore */
export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };

Expand All @@ -178,13 +163,13 @@ export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T[
};

/** @ignore */
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000);
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value / 1000); };
/** @ignore */
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value);
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
/** @ignore */
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value);
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000); };
/** @ignore */
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value);
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000000); };
/* istanbul ignore next */
/** @ignore */
export const setTimestamp = <T extends Timestamp>(data: Data<T>, index: number, value: T['TValue']): void => {
Expand Down
53 changes: 24 additions & 29 deletions js/test/generate-test-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,7 @@ function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, l
const data = type.unit === DateUnit.DAY
? createDate32(length, nullBitmap, values)
: createDate64(length, nullBitmap, values);
return {
values: () => values.map((x) => x == null ? null : new Date(x)),
vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })])
};
return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) };
}

function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
Expand Down Expand Up @@ -649,6 +646,7 @@ type TypedArrayConstructor =


const rand = Math.random.bind(Math);
const randSign = () => rand() > 0.5 ? -1 : 1;
const randomBytes = (length: number) => fillRandom(Uint8Array, length);

const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
Expand All @@ -661,15 +659,15 @@ function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: numbe
const BPE = ArrayType.BYTES_PER_ELEMENT;
const array = new ArrayType(length);
const max = (2 ** (8 * BPE)) - 1;
for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1));
for (let i = -1; ++i < length; array[i] = rand() * max * randSign());
return array as InstanceType<T>;
}

function fillRandomBigInt<T extends (typeof BigInt64Array) | (typeof BigUint64Array)>(ArrayType: T, length: number) {
const BPE = ArrayType.BYTES_PER_ELEMENT;
const array = new ArrayType(length);
const max = (2 ** (8 * BPE)) - 1;
for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * (rand() > 0.5 ? -1 : 1)));
for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * randSign()));
return array as InstanceType<T>;
}

Expand Down Expand Up @@ -735,47 +733,44 @@ function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offset
return bytes;
}

/**
* Creates timestamps with the accuracy of days (86400000 millisecond).
*/
function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
const data = new Int32Array(length).fill(Math.trunc(Date.now() / 86400000));
iterateBitmap(length, nullBitmap, (i, valid) => {
if (!valid) {
data[i] = 0;
values[i] = null;
} else {
data[i] = Math.trunc(data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)));
data[i] = Math.trunc(data[i] + (rand() * 10000 * randSign()));
values[i] = data[i] * 86400000;
}
});
return data;
}

function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
const data = new Int32Array(length * 2).fill(0);
const data32 = createDate32(length, nullBitmap, values);
iterateBitmap(length, nullBitmap, (i, valid) => {
if (valid) {
const value = data32[i] * 86400000;
const hi = Math.trunc(value / 4294967296);
const lo = Math.trunc(value - 4294967296 * hi);
values[i] = value;
data[i * 2 + 0] = lo;
data[i * 2 + 1] = hi;
}
});
return data;
return BigInt64Array.from(data32, x => BigInt(x * 86400000));
}

function divideBigInts(number: bigint, divisor: bigint): number {
return Number(number / divisor) + Number(number % divisor) / Number(divisor);
}

function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
const mult = 86400 * multiple;
const data = new Int32Array(length * 2).fill(0);
const data32 = createDate32(length, nullBitmap, values);
const data = new BigInt64Array(length).fill(0n);
const tenYears = 10 * 365 * 24 * 60 * 60 * multiple;
const now = Math.trunc(Date.now() / 1000 * multiple);
iterateBitmap(length, nullBitmap, (i, valid) => {
if (valid) {
const value = data32[i] * mult;
const hi = Math.trunc(value / 4294967296);
const lo = Math.trunc(value - 4294967296 * hi);
data[i * 2 + 0] = lo;
data[i * 2 + 1] = hi;
if (!valid) {
data[i] = 0n;
values[i] = null;
} else {
const value = BigInt(now + Math.trunc(rand() * randSign() * tenYears));
data[i] = value;
values[i] = divideBigInts(value * 1000n, BigInt(multiple));
}
});
return data;
Expand All @@ -788,7 +783,7 @@ function createTime32(length: number, nullBitmap: Uint8Array, multiple: number,
data[i] = 0;
values[i] = null;
} else {
values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1);
values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * randSign();
}
});
return data;
Expand Down
Loading

0 comments on commit 18876b2

Please sign in to comment.