Skip to content

Commit

Permalink
GH-40891: [JS] Store Dates as TimestampMillisecond (#40892)
Browse files Browse the repository at this point in the history
Fixes #40891

Tested with 

```ts
const date = new Date("2023-03-29T12:34:56Z");
console.log("original", date)

console.log("=> vec")
const vec = arrow.vectorFromArray([date])
console.log(vec.toArray())
console.log(vec.toJSON())
console.log(vec.type)
console.log(vec.get(0))

console.log("=> vec2")
const vec2 = arrow.vectorFromArray([date], new arrow.DateMillisecond)
console.log(vec2.toArray())
console.log(vec.toJSON())
console.log(vec2.type)
console.log(vec2.get(0))

console.log("=> table")
const table = arrow.tableFromJSON([{ date }])
console.log(table.toArray())
console.log(table.schema.fields[0].type)
console.log(table.getChildAt(0)?.get(0))

console.log("=> table2")
const table2 = arrow.tableFromIPC(arrow.tableToIPC(table));
console.log(table2.toArray())
console.log(table2.schema.fields[0].type)
console.log(table2.getChildAt(0)?.get(0))

console.log("=> table3")
const table3 = new arrow.Table({ dates: vec2 })
console.log(table3.toArray())
console.log(table3.schema.fields[0].type)
console.log(table3.getChildAt(0)?.get(0))
```

```
=> table
[
  {"date": Wed Mar 29 2023 08:34:56 GMT-0400 (Eastern Daylight Time)}
]
TimestampMillisecond {
  typeId: 10,
  unit: 1,
  timezone: undefined,
  toString: [Function: toString],
  ArrayType: [class Int32Array],
  [Symbol(Symbol.toStringTag)]: "Timestamp",
  children: null,
  OffsetArrayType: [class Int32Array],
}
2023-03-29T12:34:56.000Z
=> table2
[
  {"date": Wed Mar 29 2023 08:34:56 GMT-0400 (Eastern Daylight Time)}
]
Timestamp_ {
  typeId: 10,
  unit: 1,
  timezone: null,
  toString: [Function: toString],
  ArrayType: [class Int32Array],
  children: null,
  OffsetArrayType: [class Int32Array],
}
2023-03-29T12:34:56.000Z
=> table3
[
  {"dates": Wed Mar 29 2023 08:34:56 GMT-0400 (Eastern Daylight Time)}
]
DateMillisecond {
  typeId: 8,
  unit: 1,
  toString: [Function: toString],
  ArrayType: [class Int32Array],
  [Symbol(Symbol.toStringTag)]: "Date",
  children: null,
  OffsetArrayType: [class Int32Array],
}
2023-03-29T12:34:56.000Z
```
* GitHub Issue: #40891
  • Loading branch information
domoritz authored Apr 3, 2024
1 parent 5b09059 commit 2caec86
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 8 deletions.
4 changes: 2 additions & 2 deletions js/src/factories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export function makeBuilder<T extends dtypes.DataType = any, TNull = any>(option
export function vectorFromArray(values: readonly (null | undefined)[], type?: dtypes.Null): Vector<dtypes.Null>;
export function vectorFromArray(values: readonly (null | undefined | boolean)[], type?: dtypes.Bool): Vector<dtypes.Bool>;
export function vectorFromArray<T extends dtypes.Utf8 | dtypes.Dictionary<dtypes.Utf8> = dtypes.Dictionary<dtypes.Utf8, dtypes.Int32>>(values: readonly (null | undefined | string)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Date_>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.TimestampMillisecond>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Int>(values: readonly (null | undefined | number)[], type: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Int64 | dtypes.Uint64 = dtypes.Int64>(values: readonly (null | undefined | bigint)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Float = dtypes.Float64>(values: readonly (null | undefined | number)[], type?: T): Vector<T>;
Expand Down Expand Up @@ -145,7 +145,7 @@ function inferType(value: readonly unknown[]): dtypes.DataType {
} else if (booleansCount + nullsCount === value.length) {
return new dtypes.Bool;
} else if (datesCount + nullsCount === value.length) {
return new dtypes.DateMillisecond;
return new dtypes.TimestampMillisecond;
} else if (arraysCount + nullsCount === value.length) {
const array = value as Array<unknown>[];
const childType = inferType(array[array.findIndex((ary) => ary != null)]);
Expand Down
14 changes: 13 additions & 1 deletion js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,19 @@ export class Date_<T extends Dates = Dates> extends DataType<T> {

/** @ignore */
export class DateDay extends Date_<Type.DateDay> { constructor() { super(DateUnit.DAY); } }
/** @ignore */
/**
* A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) in milliseconds.
* According to the specification, this should be treated as the number of days, in milliseconds, since the UNIX epoch.
* Therefore, values must be evenly divisible by `86_400_000` (the number of milliseconds in a standard day).
*
* Practically, validation that values of this type are evenly divisible by `86_400_000` is not enforced by this library
* for performance and usability reasons.
*
* Users should prefer to use {@link DateDay} to cleanly represent the number of days. For JS dates,
* {@link TimestampMillisecond} is the preferred type.
*
* @ignore
*/
export class DateMillisecond extends Date_<Type.DateMillisecond> { constructor() { super(DateUnit.MILLISECOND); } }

/** @ignore */
Expand Down
19 changes: 14 additions & 5 deletions js/test/unit/vector/date-vector-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,19 @@
// specific language governing permissions and limitations
// under the License.

import { DateDay, DateMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow';
import { DateDay, DateMillisecond, TimestampMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow';

describe(`TimestampVector`, () => {
test(`Dates are stored in TimestampMillisecond`, () => {
const date = new Date('2023-02-01T12:34:56Z');
const vec = vectorFromArray([date]);
expect(vec.type).toBeInstanceOf(TimestampMillisecond);
expect(vec.get(0)).toBe(date.valueOf());
});
});

describe(`DateVector`, () => {
it('returns days since the epoch as correct JS Dates', () => {
test(`returns days since the epoch as correct JS Dates`, () => {
const table = new Table(RecordBatchReader.from(test_data));
const expectedMillis = expectedMillis32();
const date32 = table.getChildAt<DateDay>(0)!;
Expand All @@ -28,7 +37,7 @@ describe(`DateVector`, () => {
}
});

it('returns millisecond longs since the epoch as correct JS Dates', () => {
test(`returns millisecond longs since the epoch as correct JS Dates`, () => {
const table = new Table(RecordBatchReader.from(test_data));
const expectedMillis = expectedMillis64();
const date64 = table.getChildAt<DateMillisecond>(1)!;
Expand All @@ -38,9 +47,9 @@ describe(`DateVector`, () => {
}
});

it('returns the same date that was in the vector', () => {
test(`returns the same date that was in the vector`, () => {
const dates = [new Date(1950, 1, 0)];
const vec = vectorFromArray(dates);
const vec = vectorFromArray(dates, new DateMillisecond());
for (const date of vec) {
expect(date).toEqual(dates.shift());
}
Expand Down

0 comments on commit 2caec86

Please sign in to comment.