From 6c2665be6b99600e6359f4c441ba91e76073a6c7 Mon Sep 17 00:00:00 2001 From: Brendan Nee Date: Thu, 14 Nov 2024 10:51:04 -0800 Subject: [PATCH] Updates to time columns and timestamp columns --- CHANGELOG.md | 2 +- src/lib/export.ts | 13 +-- src/lib/import-gtfs.ts | 122 +++++++++++++++----------- src/lib/utils.ts | 11 +++ src/models/gtfs-ride/board-alight.ts | 14 +-- src/models/gtfs-ride/rider-trip.ts | 14 +-- src/models/gtfs-ride/ridership.ts | 14 +-- src/models/gtfs/booking-rules.ts | 14 +-- src/models/gtfs/frequencies.ts | 12 +-- src/models/gtfs/stop-times.ts | 21 +---- src/models/gtfs/timeframes.ts | 4 +- src/models/non-standard/timetables.ts | 12 +-- src/models/ods/deadhead-times.ts | 14 +-- 13 files changed, 102 insertions(+), 165 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4a222d5..06c34bf2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Updated -- Renamed time caching functions +- Updates to time columns and timestamp columns ### Fixed - Better GTFS export for currency diff --git a/src/lib/export.ts b/src/lib/export.ts index 248fc94b..0e0fd7dd 100644 --- a/src/lib/export.ts +++ b/src/lib/export.ts @@ -94,18 +94,7 @@ export const exportGtfs = async (initialConfig: Config) => { } if (model.filenameExtension === 'txt') { - const excludeColumns = [ - 'arrival_timestamp', - 'departure_timestamp', - 'start_timestamp', - 'end_timestamp', - 'service_arrival_timestamp', - 'service_departure_timestamp', - 'boarding_timestamp', - 'alighting_timestamp', - 'ridership_start_timestamp', - 'ridership_end_timestamp', - ]; + const excludeColumns = []; // If no routes have values for agency_id, add it to the excludeColumns list if (model.filenameBase === 'routes') { diff --git a/src/lib/import-gtfs.ts b/src/lib/import-gtfs.ts index aedd33ec..8129416a 100644 --- a/src/lib/import-gtfs.ts +++ b/src/lib/import-gtfs.ts @@ -18,9 +18,10 @@ import { updateGtfsRealtimeData } from './import-gtfs-realtime.ts'; import { log, logError, logWarning } from './log-utils.ts'; import { calculateSecondsFromMidnight, + getTimestampColumnName, + padLeadingZeros, setDefaultConfig, validateConfigForImport, - padLeadingZeros, } from './utils.ts'; import { Config, ConfigAgency, Model } from '../types/global_interfaces.ts'; @@ -81,21 +82,6 @@ const getTextFiles = async (folderPath: string): Promise => { return files.filter((filename) => filename.slice(-3) === 'txt'); }; -const TIME_COLUMN_NAMES = [ - 'start_time', - 'end_time', - 'arrival_time', - 'departure_time', - 'prior_notice_last_time', - 'prior_notice_start_time', - 'start_pickup_drop_off_window', -]; - -const TIME_COLUMN_PAIRS = TIME_COLUMN_NAMES.map((name) => [ - name, - name.endsWith('time') ? `${name}stamp` : `${name}_timestamp`, -]); - const downloadGtfsFiles = async (task: GtfsImportTask): Promise => { if (!task.url) { throw new Error('No `url` specified in config'); @@ -196,7 +182,9 @@ const createGtfsTables = (db: Database.Database): void => { return; } - const columns = model.schema.map((column) => { + const sqlColumnCreateStatements = []; + + for (const column of model.schema) { const checks = []; if (column.min !== undefined && column.max) { checks.push( @@ -212,9 +200,7 @@ const createGtfsTables = (db: Database.Database): void => { checks.push( `(TYPEOF(${column.name}) = 'integer' OR ${column.name} IS NULL)`, ); - } - - if (column.type === 'real') { + } else if (column.type === 'real') { checks.push( `(TYPEOF(${column.name}) = 'real' OR ${column.name} IS NULL)`, ); @@ -225,24 +211,32 @@ const createGtfsTables = (db: Database.Database): void => { const columnCollation = column.nocase ? 'COLLATE NOCASE' : ''; const checkClause = checks.length > 0 ? `CHECK(${checks.join(' AND ')})` : ''; - return `${column.name} ${column.type} ${checkClause} ${required} ${columnDefault} ${columnCollation}`; - }); + + sqlColumnCreateStatements.push( + `${column.name} ${column.type} ${checkClause} ${required} ${columnDefault} ${columnCollation}`, + ); + + // Add an additional timestamp column for time columns + if (column.type === 'time') { + sqlColumnCreateStatements.push( + `${getTimestampColumnName(column.name)} INTEGER`, + ); + } + } // Find Primary Key fields const primaryColumns = model.schema.filter((column) => column.primary); if (primaryColumns.length > 0) { - columns.push( - `PRIMARY KEY (${primaryColumns - .map((column) => column.name) - .join(', ')})`, + sqlColumnCreateStatements.push( + `PRIMARY KEY (${primaryColumns.map(({ name }) => name).join(', ')})`, ); } db.prepare(`DROP TABLE IF EXISTS ${model.filenameBase};`).run(); db.prepare( - `CREATE TABLE ${model.filenameBase} (${columns.join(', ')});`, + `CREATE TABLE ${model.filenameBase} (${sqlColumnCreateStatements.join(', ')});`, ).run(); } }; @@ -252,10 +246,20 @@ const createGtfsIndexes = (db: Database.Database): void => { if (!model.schema) { return; } - for (const column of model.schema.filter((column) => column.index)) { - db.prepare( - `CREATE INDEX idx_${model.filenameBase}_${column.name} ON ${model.filenameBase} (${column.name});`, - ).run(); + for (const column of model.schema) { + if (column.index) { + db.prepare( + `CREATE INDEX idx_${model.filenameBase}_${column.name} ON ${model.filenameBase} (${column.name});`, + ).run(); + } + + if (column.type === 'time') { + // Index all timestamp columns + const timestampColumnName = getTimestampColumnName(column.name); + db.prepare( + `CREATE INDEX idx_${model.filenameBase}_${timestampColumnName} ON ${model.filenameBase} (${timestampColumnName});`, + ).run(); + } } } }; @@ -272,13 +276,17 @@ const formatGtfsLine = ( const filenameBase = model.filenameBase; const filenameExtension = model.filenameExtension; - for (const columnSchema of model.schema) { - const { name, type, required, min, max } = columnSchema; + for (const { name, type, required } of model.schema) { let value = line[name]; // Early null check if (value === '' || value === undefined || value === null) { formattedLine[name] = null; + + if (type === 'time') { + formattedLine[getTimestampColumnName(name)] = null; + } + if (required) { throw new Error( `Missing required value in ${filenameBase}.${filenameExtension} for ${name} on line ${lineNumber}.`, @@ -295,20 +303,18 @@ const formatGtfsLine = ( `Invalid date in ${filenameBase}.${filenameExtension} for ${name} on line ${lineNumber}.`, ); } - } - - formattedLine[name] = value; - } - - // Process time columns - for (const [timeColumnName, timestampColumnName] of TIME_COLUMN_PAIRS) { - const value = formattedLine[timeColumnName]; - if (value) { + } else if (type === 'time') { + // Add an additional timestamp column for time columns const [timeAsSecondsFromMidnight, timeAsString] = formatAndCacheTime(value); - formattedLine[timestampColumnName] = timeAsSecondsFromMidnight; - formattedLine[timeColumnName] = timeAsString; + + value = timeAsString; + + formattedLine[getTimestampColumnName(name)] = + timeAsSecondsFromMidnight ?? null; } + + formattedLine[name] = value; } return formattedLine; @@ -354,22 +360,34 @@ const importGtfsFiles = ( task.log(`Importing - ${filename}\r`); - const placeholder = model.schema - .map(({ name }) => `@${name}`) - .join(', '); + // Create a list of all columns + const columns = model.schema.flatMap((column) => { + if (column.type === 'time') { + // Add an additional timestamp column for time columns + return [ + column, + { + name: getTimestampColumnName(column.name), + type: 'integer', + index: true, + }, + ]; + } + return column; + }); // Create a map of which columns need prefixing const prefixedColumns = new Set( - model.schema + columns .filter((column) => column.prefix) .map((column) => column.name), ); const prepareStatement = `INSERT ${task.ignoreDuplicates ? 'OR IGNORE' : ''} INTO ${ model.filenameBase - } (${model.schema - .map((column) => column.name) - .join(', ')}) VALUES (${placeholder})`; + } (${columns.map(({ name }) => name).join(', ')}) VALUES (${columns + .map(({ name }) => `@${name}`) + .join(', ')})`; const insert = db.prepare(prepareStatement); @@ -393,7 +411,7 @@ const importGtfsFiles = ( } } catch (error: any) { if (error.code === 'SQLITE_CONSTRAINT_PRIMARYKEY') { - const primaryColumns = model.schema.filter( + const primaryColumns = columns.filter( (column) => column.primary, ); task.logWarning( diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 2e80a225..132f72e0 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -324,3 +324,14 @@ export function formatCurrency(value: number, currency: string) { return `${integerPart}${fractionPart !== '' ? `.${fractionPart}` : ''}`; } + +/** + * Gets the timestamp column name for a given column name + * @param columnName The column name + * @returns The timestamp column name + */ +export function getTimestampColumnName(columnName: string) { + return columnName.endsWith('time') + ? `${columnName}stamp` + : `${columnName}_timestamp`; +} diff --git a/src/models/gtfs-ride/board-alight.ts b/src/models/gtfs-ride/board-alight.ts index 675dadb7..d433bfb1 100644 --- a/src/models/gtfs-ride/board-alight.ts +++ b/src/models/gtfs-ride/board-alight.ts @@ -104,21 +104,11 @@ export const boardAlight = { }, { name: 'service_arrival_time', - type: 'text', - }, - { - name: 'service_arrival_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'service_departure_time', - type: 'text', - }, - { - name: 'service_departure_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'source', diff --git a/src/models/gtfs-ride/rider-trip.ts b/src/models/gtfs-ride/rider-trip.ts index ad70d0d7..a1df18a8 100644 --- a/src/models/gtfs-ride/rider-trip.ts +++ b/src/models/gtfs-ride/rider-trip.ts @@ -53,21 +53,11 @@ export const riderTrip = { }, { name: 'boarding_time', - type: 'text', - }, - { - name: 'boarding_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'alighting_time', - type: 'text', - }, - { - name: 'alighting_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'rider_type', diff --git a/src/models/gtfs-ride/ridership.ts b/src/models/gtfs-ride/ridership.ts index 04974711..c7ad0e5e 100644 --- a/src/models/gtfs-ride/ridership.ts +++ b/src/models/gtfs-ride/ridership.ts @@ -28,21 +28,11 @@ export const ridership = { }, { name: 'ridership_start_time', - type: 'text', - }, - { - name: 'ridership_start_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'ridership_end_time', - type: 'text', - }, - { - name: 'ridership_end_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'service_id', diff --git a/src/models/gtfs/booking-rules.ts b/src/models/gtfs/booking-rules.ts index f89c295d..942d4ecd 100644 --- a/src/models/gtfs/booking-rules.ts +++ b/src/models/gtfs/booking-rules.ts @@ -32,12 +32,7 @@ export const bookingRules = { }, { name: 'prior_notice_last_time', - type: 'text', - }, - { - name: 'prior_notice_last_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'prior_notice_start_day', @@ -46,12 +41,7 @@ export const bookingRules = { }, { name: 'prior_notice_start_time', - type: 'text', - }, - { - name: 'prior_notice_start_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'prior_notice_service_id', diff --git a/src/models/gtfs/frequencies.ts b/src/models/gtfs/frequencies.ts index 2734eee2..285c1358 100644 --- a/src/models/gtfs/frequencies.ts +++ b/src/models/gtfs/frequencies.ts @@ -11,23 +11,15 @@ export const frequencies = { }, { name: 'start_time', - type: 'text', + type: 'time', required: true, primary: true, }, - { - name: 'start_timestamp', - type: 'integer', - }, { name: 'end_time', - type: 'text', + type: 'time', required: true, }, - { - name: 'end_timestamp', - type: 'integer', - }, { name: 'headway_secs', type: 'integer', diff --git a/src/models/gtfs/stop-times.ts b/src/models/gtfs/stop-times.ts index 6b96b8df..f5d26953 100644 --- a/src/models/gtfs/stop-times.ts +++ b/src/models/gtfs/stop-times.ts @@ -11,21 +11,11 @@ export const stopTimes = { }, { name: 'arrival_time', - type: 'text', - }, - { - name: 'arrival_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'departure_time', - type: 'text', - }, - { - name: 'departure_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'location_group_id', @@ -60,12 +50,7 @@ export const stopTimes = { }, { name: 'start_pickup_drop_off_window', - type: 'text', - }, - { - name: 'start_pickup_drop_off_window_timestamp', - type: 'integer', - index: true, + type: 'time', }, { name: 'pickup_type', diff --git a/src/models/gtfs/timeframes.ts b/src/models/gtfs/timeframes.ts index d1fd7e66..b4020ac7 100644 --- a/src/models/gtfs/timeframes.ts +++ b/src/models/gtfs/timeframes.ts @@ -10,12 +10,12 @@ export const timeframes = { }, { name: 'start_time', - type: 'text', + type: 'time', primary: true, }, { name: 'end_time', - type: 'text', + type: 'time', primary: true, }, { diff --git a/src/models/non-standard/timetables.ts b/src/models/non-standard/timetables.ts index ee80b5b8..3b2116df 100644 --- a/src/models/non-standard/timetables.ts +++ b/src/models/non-standard/timetables.ts @@ -82,19 +82,11 @@ export const timetables = { }, { name: 'start_time', - type: 'text', - }, - { - name: 'start_timestamp', - type: 'integer', + type: 'time', }, { name: 'end_time', - type: 'text', - }, - { - name: 'end_timestamp', - type: 'integer', + type: 'time', }, { name: 'timetable_label', diff --git a/src/models/ods/deadhead-times.ts b/src/models/ods/deadhead-times.ts index f70e728a..6eefebcf 100644 --- a/src/models/ods/deadhead-times.ts +++ b/src/models/ods/deadhead-times.ts @@ -14,24 +14,14 @@ export const deadheadTimes = { }, { name: 'arrival_time', - type: 'text', + type: 'time', required: true, }, - { - name: 'arrival_timestamp', - type: 'integer', - index: true, - }, { name: 'departure_time', - type: 'text', + type: 'time', required: true, }, - { - name: 'departure_timestamp', - type: 'integer', - index: true, - }, { name: 'ops_location_id', type: 'text',