From 83a8b88df826598acd9278948c1fab9aab0df8ef Mon Sep 17 00:00:00 2001 From: Aaron Claydon Date: Tue, 5 Nov 2024 23:29:33 +0000 Subject: [PATCH] yaml datasources --- data/datasources/de-gtfs.yaml | 21 + data/datasources/fr-ilevia-lille.yaml | 21 + data/datasources/gb-dft.yaml | 71 ++ data/datasources/gb-nationalrail.yaml | 22 + data/datasources/gb-networkrail.yaml | 16 + data/datasources/gb-traveline.yaml | 12 + data/datasources/ie-tfi.yaml | 24 + data/datasources/se-trafiklab.yaml | 107 +++ .../charts/travigo-data-importer/values.yaml | 52 +- pkg/dataimporter/datasets/dataset.go | 13 +- pkg/dataimporter/datasets/datasources.go | 8 + .../manager/customauthenticators.go | 53 ++ pkg/dataimporter/manager/manager.go | 50 +- pkg/dataimporter/manager/registeredsources.go | 802 +++--------------- 14 files changed, 563 insertions(+), 709 deletions(-) create mode 100644 data/datasources/de-gtfs.yaml create mode 100644 data/datasources/fr-ilevia-lille.yaml create mode 100644 data/datasources/gb-dft.yaml create mode 100644 data/datasources/gb-nationalrail.yaml create mode 100644 data/datasources/gb-networkrail.yaml create mode 100644 data/datasources/gb-traveline.yaml create mode 100644 data/datasources/ie-tfi.yaml create mode 100644 data/datasources/se-trafiklab.yaml create mode 100644 pkg/dataimporter/datasets/datasources.go create mode 100644 pkg/dataimporter/manager/customauthenticators.go diff --git a/data/datasources/de-gtfs.yaml b/data/datasources/de-gtfs.yaml new file mode 100644 index 0000000..3b58ef5 --- /dev/null +++ b/data/datasources/de-gtfs.yaml @@ -0,0 +1,21 @@ +identifier: de-gtfs +region: de +provider: + name: GTFS.de + website: "https://gtfs.de" +datasets: +- identifier: gtfs-schedule + format: gtfs-schedule + source: "https://download.gtfs.de/germany/free/latest.zip" + supportedobjects: + operators: true + stops: true + services: true + journeys: true +- identifier: gtfs-realtime + format: gtfs-realtime + source: "https://realtime.gtfs.de/realtime-free.pb" + supportedobjects: + realtimejourneys: true + linkeddataset: de-gtfs-gtfs-schedule + importdestination: realtime-queue \ No newline at end of file diff --git a/data/datasources/fr-ilevia-lille.yaml b/data/datasources/fr-ilevia-lille.yaml new file mode 100644 index 0000000..21a8f0f --- /dev/null +++ b/data/datasources/fr-ilevia-lille.yaml @@ -0,0 +1,21 @@ +identifier: fr-ilevia-lille +region: fr +provider: + name: Ilévia + website: "http://www.ilevia.fr" +datasets: +- identifier: gtfs-schedule + format: gtfs-schedule + source: "https://media.ilevia.fr/opendata/gtfs.zip" + supportedobjects: + operators: true + stops: true + services: true + journeys: true +- identifier: gtfs-realtime + format: gtfs-realtime + source: "https://proxy.transport.data.gouv.fr/resource/ilevia-lille-gtfs-rt" + supportedobjects: + realtimejourneys: true + linkeddataset: fr-ilevia-lille-gtfs-schedule + importdestination: realtime-queue \ No newline at end of file diff --git a/data/datasources/gb-dft.yaml b/data/datasources/gb-dft.yaml new file mode 100644 index 0000000..f6f630a --- /dev/null +++ b/data/datasources/gb-dft.yaml @@ -0,0 +1,71 @@ +identifier: gb-dft +region: gb +provider: + name: Department for Transport + website: "https://www.gov.uk/government/organisations/department-for-transport" +datasets: +- identifier: naptan + format: gb-naptan + source: "https://naptan.api.dft.gov.uk/v1/access-nodes?dataFormat=xml" + supportedobjects: + stops: true + stopgroups: true +- identifier: bods-gtfs-schedule + format: gtfs-schedule + source: "https://data.bus-data.dft.gov.uk/timetable/download/gtfs-file/all/" + supportedobjects: + services: true + journeys: true + ignoreobjects: + services: + byoperator: + - gb-noc-NATX + journeys: + byoperator: + - gb-noc-NATX +- identifier: bods-transxchange-coach + format: gb-transxchange + source: "https://coach.bus-data.dft.gov.uk/TxC-2.4.zip" + unpackbundle: zip + supportedobjects: + services: true + journeys: true + ignoreobjects: + services: + byoperator: + - gb-noc-APBX + - gb-noc-BHAT + - gb-noc-FLIX + - gb-noc-MEGA + - gb-noc-SCLK + - gb-noc-ULSL + journeys: + byoperator: + - gb-noc-APBX + - gb-noc-BHAT + - gb-noc-FLIX + - gb-noc-MEGA + - gb-noc-SCLK + - gb-noc-ULSL +- identifier: bods-sirivm-all + format: eu-siri-vm + source: "https://data.bus-data.dft.gov.uk/avl/download/bulk_archive" + sourceauthentication: + query: + api_key: TRAVIGO_BODS_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: gb-dft-bods-gtfs-schedule + unpackbundle: zip + importdestination: realtime-queue +- identifier: bods-gtfs-realtime + format: gtfs-realtime + source: "https://data.bus-data.dft.gov.uk/avl/download/gtfsrt" + sourceauthentication: + query: + api_key: TRAVIGO_BODS_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: gb-dft-bods-gtfs-schedule + unpackbundle: zip + importdestination: realtime-queue diff --git a/data/datasources/gb-nationalrail.yaml b/data/datasources/gb-nationalrail.yaml new file mode 100644 index 0000000..c53df4d --- /dev/null +++ b/data/datasources/gb-nationalrail.yaml @@ -0,0 +1,22 @@ +identifier: gb-nationalrail +region: gb +provider: + name: National Rail + website: "https://nationalrail.co.uk" +datasets: +- identifier: toc + format: gb-nationalrailtoc + source: "https://opendata.nationalrail.co.uk/api/staticfeeds/4.0/tocs" + sourceauthentication: + custom: "gb-nationalrail-login" + supportedobjects: + operators: true + services: true +- identifier: timetable + format: gb-cif + source: "https://opendata.nationalrail.co.uk/api/staticfeeds/3.0/timetable" + sourceauthentication: + custom: "gb-nationalrail-login" + supportedobjects: + journeys: true + services: true \ No newline at end of file diff --git a/data/datasources/gb-networkrail.yaml b/data/datasources/gb-networkrail.yaml new file mode 100644 index 0000000..ae0133f --- /dev/null +++ b/data/datasources/gb-networkrail.yaml @@ -0,0 +1,16 @@ +identifier: gb-networkrail +region: gb +provider: + name: Network Rail + website: "https://networkrail.co.uk" +datasets: +- identifier: corpus + format: gb-networkrailcorpus + source: "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS" + sourceauthentication: + basic: + username: TRAVIGO_NETWORKRAIL_USERNAME + password: TRAVIGO_NETWORKRAIL_PASSWORD + supportedobjects: + stops: true + unpackbundle: gz diff --git a/data/datasources/gb-traveline.yaml b/data/datasources/gb-traveline.yaml new file mode 100644 index 0000000..00ad58d --- /dev/null +++ b/data/datasources/gb-traveline.yaml @@ -0,0 +1,12 @@ +identifier: gb-traveline +region: gb +provider: + name: Traveline + website: "https://www.travelinedata.org.uk/" +datasets: +- identifier: noc + format: gb-travelinenoc + source: "https://www.travelinedata.org.uk/noc/api/1.0/nocrecords.xml" + supportedobjects: + operators: true + operatorgroups: true \ No newline at end of file diff --git a/data/datasources/ie-tfi.yaml b/data/datasources/ie-tfi.yaml new file mode 100644 index 0000000..fd0b2e9 --- /dev/null +++ b/data/datasources/ie-tfi.yaml @@ -0,0 +1,24 @@ +identifier: ie-tfi +region: ie +provider: + name: Transport for Ireland + website: "https://www.transportforireland.ie" +datasets: +- identifier: gtfs-schedule + format: gtfs-schedule + source: "https://www.transportforireland.ie/transitData/Data/GTFS_Realtime.zip" + supportedobjects: + operators: true + stops: true + services: true + journeys: true +- identifier: gtfs-realtime + format: gtfs-realtime + source: "https://api.nationaltransport.ie/gtfsr/v2/gtfsr" + sourceauthentication: + header: + x-api-key: TRAVIGO_IE_NATIONALTRANSPORT_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: ie-tfi-gtfs-schedule + importdestination: realtime-queue \ No newline at end of file diff --git a/data/datasources/se-trafiklab.yaml b/data/datasources/se-trafiklab.yaml new file mode 100644 index 0000000..a8153ab --- /dev/null +++ b/data/datasources/se-trafiklab.yaml @@ -0,0 +1,107 @@ +identifier: se-trafiklab +region: se +provider: + name: Trafiklab + website: "https://trafiklab.se" +datasets: +- identifier: gtfs-schedule + format: gtfs-schedule + source: "https://opendata.samtrafiken.se/gtfs-sweden/sweden.zip" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_STATIC_API_KEY + supportedobjects: + operators: true + stops: true + services: true + journeys: true +- identifier: gtfs-realtime-sl-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/sl/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-ul-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/ul/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-otraf-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/otraf/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-klt-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/klt/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-skane-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/skane/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-dt-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/dt/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-varm-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/varm/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-xt-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/xt/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue +- identifier: gtfs-realtime-vastmanland-trip + format: gtfs-realtime + source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/vastmanland/TripUpdatesSweden.pb" + sourceauthentication: + query: + key: TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY + supportedobjects: + realtimejourneys: true + linkeddataset: se-trafiklab-gtfs-schedule + importdestination: realtime-queue diff --git a/deploy/charts/travigo-data-importer/values.yaml b/deploy/charts/travigo-data-importer/values.yaml index ae60ec0..3505a53 100644 --- a/deploy/charts/travigo-data-importer/values.yaml +++ b/deploy/charts/travigo-data-importer/values.yaml @@ -61,19 +61,19 @@ cronjobs: args: ["data-importer", "dataset", "--id", "gb-dft-bods-transxchange-coach"] - name: ie-gtfs-schedule schedule: "50 6 * * *" - args: ["data-importer", "dataset", "--id", "ie-gtfs-schedule"] - batchburst: true - - name: us-nyc-subway-schedule - schedule: "0 8 * * *" - args: ["data-importer", "dataset", "--id", "us-nyc-subway-schedule"] + args: ["data-importer", "dataset", "--id", "ie-tfi-gtfs-schedule"] batchburst: true + # - name: us-nyc-subway-schedule + # schedule: "0 8 * * *" + # args: ["data-importer", "dataset", "--id", "us-nyc-subway-schedule"] + # batchburst: true - name: de-gtfs-full-schedule schedule: "0 7 * * *" args: ["data-importer", "dataset", "--id", "de-gtfs-full-schedule"] batchburst: true - - name: se-gtfs-schedule + - name: se-trafiklab-gtfs-schedule schedule: "30 7 * * *" - args: ["data-importer", "dataset", "--id", "se-gtfs-schedule"] + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-schedule"] batchburst: true - name: fr-ilevia-lille-gtfs-schedule schedule: "0 8 * * *" @@ -92,29 +92,29 @@ deployments: - name: gb-bods-gtfs-rt args: ["data-importer", "dataset", "--id", "gb-dft-bods-gtfs-realtime", "--repeat-every", "10s"] - name: ie-gtfs-realtime - args: ["data-importer", "dataset", "--id", "ie-gtfs-realtime", "--repeat-every", "90s"] + args: ["data-importer", "dataset", "--id", "ie-tfi-gtfs-realtime", "--repeat-every", "90s"] - name: fr-ilevia-lille-gtfs-rt args: ["data-importer", "dataset", "--id", "fr-ilevia-lille-gtfs-realtime", "--repeat-every", "90s"] - name: de-gtfs-full-realtime args: ["data-importer", "dataset", "--id", "de-gtfs-full-realtime", "--repeat-every", "90s"] - - name: se-gtfs-realtime-sl-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-sl-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-ul-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-ul-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-otraf-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-otraf-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-klt-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-klt-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-skane-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-skane-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-dt-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-dt-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-varm-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-varm-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-xt-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-xt-trip", "--repeat-every", "90s"] - - name: se-gtfs-realtime-vastmanland-trip - args: ["data-importer", "dataset", "--id", "se-gtfs-realtime-vastmanland-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-sl-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-sl-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-ul-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-ul-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-otraf-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-otraf-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-klt-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-klt-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-skane-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-skane-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-dt-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-dt-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-varm-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-varm-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-xt-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-xt-trip", "--repeat-every", "90s"] + - name: se-trafiklab-gtfs-realtime-vastmanland-trip + args: ["data-importer", "dataset", "--id", "se-trafiklab-gtfs-realtime-vastmanland-trip", "--repeat-every", "90s"] # - name: us-nyc-subway-realtime-numbers # args: ["data-importer", "dataset", "--id", "us-nyc-subway-relatime-1-2-3-4-5-6-7", "--repeat-every", "60s"] diff --git a/pkg/dataimporter/datasets/dataset.go b/pkg/dataimporter/datasets/dataset.go index c6fac21..6638714 100644 --- a/pkg/dataimporter/datasets/dataset.go +++ b/pkg/dataimporter/datasets/dataset.go @@ -12,7 +12,8 @@ type DataSet struct { Provider Provider - Source string + Source string + SourceAuthentication SourceAuthentication UnpackBundle BundleFormat SupportedObjects SupportedObjects @@ -27,6 +28,16 @@ type DataSet struct { Queue *rmq.Queue } +type SourceAuthentication struct { + Query map[string]string + Header map[string]string + Basic struct { + Username string + Password string + } + Custom string +} + type DataSetFormat string const ( diff --git a/pkg/dataimporter/datasets/datasources.go b/pkg/dataimporter/datasets/datasources.go new file mode 100644 index 0000000..adbf69e --- /dev/null +++ b/pkg/dataimporter/datasets/datasources.go @@ -0,0 +1,8 @@ +package datasets + +type DataSource struct { + Identifier string + Region string + Provider Provider + Datasets []DataSet +} diff --git a/pkg/dataimporter/manager/customauthenticators.go b/pkg/dataimporter/manager/customauthenticators.go new file mode 100644 index 0000000..3d82c63 --- /dev/null +++ b/pkg/dataimporter/manager/customauthenticators.go @@ -0,0 +1,53 @@ +package manager + +import ( + "encoding/json" + "io" + "net/http" + "net/url" + "strings" + + "github.com/rs/zerolog/log" + "github.com/travigo/travigo/pkg/util" +) + +func customAuthNationalRailLogin() string { + env := util.GetEnvironmentVariables() + if env["TRAVIGO_NATIONALRAIL_USERNAME"] == "" { + log.Fatal().Msg("TRAVIGO_NATIONALRAIL_USERNAME must be set") + } + if env["TRAVIGO_NATIONALRAIL_PASSWORD"] == "" { + log.Fatal().Msg("TRAVIGO_NATIONALRAIL_PASSWORD must be set") + } + + formData := url.Values{ + "username": {env["TRAVIGO_NATIONALRAIL_USERNAME"]}, + "password": {env["TRAVIGO_NATIONALRAIL_PASSWORD"]}, + } + + client := &http.Client{} + req, err := http.NewRequest("POST", "https://opendata.nationalrail.co.uk/authenticate", strings.NewReader(formData.Encode())) + if err != nil { + log.Fatal().Err(err).Msg("Failed to create auth HTTP request") + } + + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, err := client.Do(req) + if err != nil { + log.Fatal().Err(err).Msg("Failed to perform auth HTTP request") + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + log.Fatal().Err(err).Msg("Failed to read auth HTTP request") + } + + var loginResponse struct { + Token string `json:"token"` + } + json.Unmarshal(body, &loginResponse) + + return loginResponse.Token +} diff --git a/pkg/dataimporter/manager/manager.go b/pkg/dataimporter/manager/manager.go index ff7dd81..bbdda0b 100644 --- a/pkg/dataimporter/manager/manager.go +++ b/pkg/dataimporter/manager/manager.go @@ -28,6 +28,7 @@ import ( "github.com/travigo/travigo/pkg/dataimporter/formats/transxchange" "github.com/travigo/travigo/pkg/dataimporter/formats/travelinenoc" "github.com/travigo/travigo/pkg/redis_client" + "github.com/travigo/travigo/pkg/util" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo/options" ) @@ -156,7 +157,7 @@ func ImportDataset(dataset *datasets.DataSet, forceImport bool) error { } switch dataset.UnpackBundle { - case datasets.BundleFormatNone: + case datasets.BundleFormatNone, "": sourceFileReaders = append(sourceFileReaders, file) case datasets.BundleFormatGZ: gzipDecoder, err := gzip.NewReader(file) @@ -264,6 +265,45 @@ func tempDownloadFile(dataset *datasets.DataSet, etag string) (bool, *os.File, s req.Header.Set("If-None-Match", etag) } + //// Handle authentication //// + env := util.GetEnvironmentVariables() + // Query paramaters + for queryKey, queryValue := range dataset.SourceAuthentication.Query { + if env[queryValue] == "" { + log.Fatal().Msgf("%s must be set", queryValue) + } + + q := req.URL.Query() + q.Add(queryKey, env[queryValue]) + req.URL.RawQuery = q.Encode() + } + // Basic auth + if dataset.SourceAuthentication.Basic.Username != "" && dataset.SourceAuthentication.Basic.Password != "" { + if env[dataset.SourceAuthentication.Basic.Username] == "" { + log.Fatal().Msgf("%s must be set", dataset.SourceAuthentication.Basic.Username) + } + if env[dataset.SourceAuthentication.Basic.Password] == "" { + log.Fatal().Msgf("%s must be set", dataset.SourceAuthentication.Basic.Password) + } + + req.SetBasicAuth(env[dataset.SourceAuthentication.Basic.Username], env[dataset.SourceAuthentication.Basic.Password]) + } + // Headers + for headerKey, headerValue := range dataset.SourceAuthentication.Header { + if env[headerValue] == "" { + log.Fatal().Msgf("%s must be set", headerValue) + } + + req.Header.Set(headerKey, env[headerValue]) + } + // Customs + switch dataset.SourceAuthentication.Custom { + case "gb-nationalrail-login": + token := customAuthNationalRailLogin() + req.Header.Set("X-Auth-Token", token) + } + + // TODO delete me later if dataset.DownloadHandler != nil { dataset.DownloadHandler(req) } @@ -271,15 +311,15 @@ func tempDownloadFile(dataset *datasets.DataSet, etag string) (bool, *os.File, s client := &http.Client{} resp, err := client.Do(req) - if resp.StatusCode == http.StatusNotModified { - return false, nil, "" - } - if err != nil { log.Fatal().Err(err).Msg("Download file") } defer resp.Body.Close() + if resp.StatusCode == http.StatusNotModified { + return false, nil, "" + } + tmpFile, err := os.CreateTemp(os.TempDir(), "travigo-data-importer-") if err != nil { log.Fatal().Err(err).Msg("Cannot create temporary file") diff --git a/pkg/dataimporter/manager/registeredsources.go b/pkg/dataimporter/manager/registeredsources.go index 9f6d59a..9298723 100644 --- a/pkg/dataimporter/manager/registeredsources.go +++ b/pkg/dataimporter/manager/registeredsources.go @@ -1,704 +1,152 @@ package manager import ( - "encoding/json" - "io" - "net/http" - "net/url" - "strings" + "bytes" + "fmt" + "os" + "path/filepath" "github.com/rs/zerolog/log" "github.com/travigo/travigo/pkg/dataimporter/datasets" - "github.com/travigo/travigo/pkg/util" + "gopkg.in/yaml.v3" ) // Just a static list for now func GetRegisteredDataSets() []datasets.DataSet { - return []datasets.DataSet{ - { - Identifier: "gb-traveline-noc", - Format: datasets.DataSetFormatTravelineNOC, - Provider: datasets.Provider{ - Name: "Traveline", - Website: "https://www.travelinedata.org.uk/", - }, - Source: "https://www.travelinedata.org.uk/noc/api/1.0/nocrecords.xml", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - OperatorGroups: true, - }, - }, - { - Identifier: "gb-dft-naptan", - Format: datasets.DataSetFormatNaPTAN, - Provider: datasets.Provider{ - Name: "Department for Transport", - Website: "https://www.gov.uk/government/organisations/department-for-transport", - }, - Source: "https://naptan.api.dft.gov.uk/v1/access-nodes?dataFormat=xml", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Stops: true, - StopGroups: true, - }, - }, - { - Identifier: "gb-nationalrail-toc", - Format: datasets.DataSetFormatNationalRailTOC, - Provider: datasets.Provider{ - Name: "National Rail", - Website: "https://nationalrail.co.uk", - }, - Source: "https://opendata.nationalrail.co.uk/api/staticfeeds/4.0/tocs", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Services: true, - }, + var registeredDatasets []datasets.DataSet - DownloadHandler: func(r *http.Request) { - token := nationalRailLogin() - r.Header.Set("X-Auth-Token", token) - }, - }, - { - // Import STANOX Stop IDs to Stops from Network Rail CORPUS dataset - Identifier: "gb-networkrail-corpus", - Format: datasets.DataSetFormatNetworkRailCorpus, - Provider: datasets.Provider{ - Name: "Network Rail", - Website: "https://networkrail.co.uk", - }, - Source: "https://publicdatafeeds.networkrail.co.uk/ntrod/SupportingFileAuthenticate?type=CORPUS", - UnpackBundle: datasets.BundleFormatGZ, - SupportedObjects: datasets.SupportedObjects{ - Stops: true, - }, + err := filepath.Walk("data/datasources/", + func(path string, fileInfo os.FileInfo, err error) error { + if err != nil { + return err + } - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_NETWORKRAIL_USERNAME"] == "" { - log.Fatal().Msg("TRAVIGO_NETWORKRAIL_USERNAME must be set") - } - if env["TRAVIGO_NETWORKRAIL_PASSWORD"] == "" { - log.Fatal().Msg("TRAVIGO_NETWORKRAIL_PASSWORD must be set") - } - - r.SetBasicAuth(env["TRAVIGO_NETWORKRAIL_USERNAME"], env["TRAVIGO_NETWORKRAIL_PASSWORD"]) - }, - }, - { - Identifier: "gb-dft-bods-sirivm-all", - Format: datasets.DataSetFormatSiriVM, - Provider: datasets.Provider{ - Name: "Department for Transport", - Website: "https://www.gov.uk/government/organisations/department-for-transport", - }, - Source: "https://data.bus-data.dft.gov.uk/avl/download/bulk_archive", - UnpackBundle: datasets.BundleFormatZIP, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - - LinkedDataset: "gb-dft-bods-gtfs-schedule", - - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_BODS_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_BODS_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("api_key", env["TRAVIGO_BODS_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "gb-dft-bods-gtfs-realtime", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Department for Transport", - Website: "https://www.gov.uk/government/organisations/department-for-transport", - }, - Source: "https://data.bus-data.dft.gov.uk/avl/download/gtfsrt", - UnpackBundle: datasets.BundleFormatZIP, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - - LinkedDataset: "gb-dft-bods-gtfs-schedule", - - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_BODS_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_BODS_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("api_key", env["TRAVIGO_BODS_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "gb-dft-bods-gtfs-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "Department for Transport", - Website: "https://www.gov.uk/government/organisations/department-for-transport", - }, - Source: "https://data.bus-data.dft.gov.uk/timetable/download/gtfs-file/all/", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Services: true, - Journeys: true, - }, - IgnoreObjects: datasets.IgnoreObjects{ - Services: datasets.IgnoreObjectServiceJourney{ - ByOperator: []string{"gb-noc-NATX"}, - }, - Journeys: datasets.IgnoreObjectServiceJourney{ - ByOperator: []string{"gb-noc-NATX"}, - }, - }, - - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_BODS_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_BODS_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("api_key", env["TRAVIGO_BODS_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "gb-dft-bods-transxchange-coach", - Format: datasets.DataSetFormatTransXChange, - Provider: datasets.Provider{ - Name: "Department for Transport", - Website: "https://www.gov.uk/government/organisations/department-for-transport", - }, - Source: "https://coach.bus-data.dft.gov.uk/TxC-2.4.zip", - UnpackBundle: datasets.BundleFormatZIP, - SupportedObjects: datasets.SupportedObjects{ - Services: true, - Journeys: true, - }, - IgnoreObjects: datasets.IgnoreObjects{ - Services: datasets.IgnoreObjectServiceJourney{ - ByOperator: []string{"gb-noc-APBX", "gb-noc-BHAT", "gb-noc-FLIX", "gb-noc-MEGA", "gb-noc-SCLK", "gb-noc-ULSL"}, - }, - Journeys: datasets.IgnoreObjectServiceJourney{ - ByOperator: []string{"gb-noc-APBX", "gb-noc-BHAT", "gb-noc-FLIX", "gb-noc-MEGA", "gb-noc-SCLK", "gb-noc-ULSL"}, - }, - }, - }, - { - Identifier: "gb-nationalrail-timetable", - Format: datasets.DataSetFormatCIF, - Provider: datasets.Provider{ - Name: "National Rail", - Website: "https://nationalrail.co.uk", - }, - Source: "https://opendata.nationalrail.co.uk/api/staticfeeds/3.0/timetable", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Services: true, - Journeys: true, - }, - - DownloadHandler: func(r *http.Request) { - token := nationalRailLogin() - r.Header.Set("X-Auth-Token", token) - }, - }, - { - Identifier: "ie-gtfs-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "Transport for Ireland", - Website: "https://www.transportforireland.ie", - }, - Source: "https://www.transportforireland.ie/transitData/Data/GTFS_Realtime.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Stops: true, - Services: true, - Journeys: true, - }, - }, - { - Identifier: "ie-gtfs-realtime", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Transport for Ireland", - Website: "https://www.transportforireland.ie", - }, - Source: "https://api.nationaltransport.ie/gtfsr/v2/gtfsr", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "ie-gtfs-schedule", - - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_IE_NATIONALTRANSPORT_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_IE_NATIONALTRANSPORT_API_KEY must be set") - } - - r.Header.Set("x-api-key", env["TRAVIGO_IE_NATIONALTRANSPORT_API_KEY"]) - }, - }, - { - Identifier: "us-nyc-subway-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "Metropolitan Transportation Authority", - Website: "https://mta.info", - }, - Source: "http://web.mta.info/developers/data/nyct/subway/google_transit.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Stops: true, - Services: true, - Journeys: true, - }, - }, - // { - // Identifier: "us-nyc-subway-relatime-1-2-3-4-5-6-7", - // Format: datasets.DataSetFormatGTFSRealtime, - // Provider: datasets.Provider{ - // Name: "Metropolitan Transportation Authority", - // Website: "https://mta.info", - // }, - // Source: "https://api-endpoint.mta.info/Dataservice/mtagtfsfeeds/nyct%2Fgtfs", - // UnpackBundle: datasets.BundleFormatNone, - // SupportedObjects: datasets.SupportedObjects{ - // RealtimeJourneys: true, - // }, - // ImportDestination: datasets.ImportDestinationRealtimeQueue, - // LinkedDataset: "us-nyc-subway-schedule", + if !fileInfo.IsDir() { + log.Debug().Str("path", path).Msg("Loading transforms file") - // DownloadHandler: func(r *http.Request) { - // env := util.GetEnvironmentVariables() - // if env["TRAVIGO_US_NYC_MTA_API_KEY"] == "" { - // log.Fatal().Msg("TRAVIGO_US_NYC_MTA_API_KEY must be set") - // } + extension := filepath.Ext(path) - // r.Header.Set("x-api-key", env["TRAVIGO_US_NYC_MTA_API_KEY"]) - // }, - // }, - { - Identifier: "eu-flixbus-gtfs-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "FlixBus", - Website: "https://global.flixbus.com", - }, - Source: "http://gtfs.gis.flix.tech/gtfs_generic_eu.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: false, - Stops: false, - Services: false, - Journeys: true, - }, - }, - { - Identifier: "us-bart-gtfs-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "BART", - Website: "http://www.bart.gov", - }, - Source: "https://www.bart.gov/dev/schedules/google_transit.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Stops: true, - Services: true, - Journeys: true, - }, - }, - { - Identifier: "us-bart-gtfs-realtime", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "BART", - Website: "http://www.bart.gov", - }, - Source: "https://api.bart.gov/gtfsrt/tripupdate.aspx", // https://api.bart.gov/gtfsrt/alerts.aspx - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "us-bart-gtfs-schedule", - }, - { - Identifier: "fr-ilevia-lille-gtfs-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "Ilévia", - Website: "http://www.ilevia.fr", - }, - Source: "https://media.ilevia.fr/opendata/gtfs.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Stops: true, - Services: true, - Journeys: true, - }, - }, - { - Identifier: "fr-ilevia-lille-gtfs-realtime", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Ilévia", - Website: "http://www.ilevia.fr", - }, - Source: "https://proxy.transport.data.gouv.fr/resource/ilevia-lille-gtfs-rt", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "fr-ilevia-lille-gtfs-schedule", - }, - // Germany - { - Identifier: "de-gtfs-full-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "GTFS.de", - Website: "https://gtfs.de", - }, - Source: "https://download.gtfs.de/germany/free/latest.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Stops: true, - Services: true, - Journeys: true, - }, - }, - { - Identifier: "de-gtfs-full-realtime", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "GTFS.de", - Website: "https://gtfs.de", - }, - Source: "https://realtime.gtfs.de/realtime-free.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "de-gtfs-full-schedule", - }, - // Sweden - { - Identifier: "se-gtfs-schedule", - Format: datasets.DataSetFormatGTFSSchedule, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-sweden/sweden.zip", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - Operators: true, - Stops: true, - Services: true, - Journeys: true, - }, - - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_STATIC_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_STATIC_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_STATIC_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-sl-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/sl/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") + if extension != ".yaml" { + return nil } - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-ul-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/ul/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") + transformYaml, err := os.ReadFile(path) + if err != nil { + return err } - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-otraf-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/otraf/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") - } + decoder := yaml.NewDecoder(bytes.NewReader(transformYaml)) - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-klt-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/klt/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") - } + for { + var datasource datasets.DataSource + if decoder.Decode(&datasource) != nil { + break + } - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-skane-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/skane/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") - } + for _, dataset := range datasource.Datasets { + dataset.Identifier = fmt.Sprintf("%s-%s", datasource.Identifier, dataset.Identifier) + dataset.Provider = datasource.Provider - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-dt-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/dt/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") + registeredDatasets = append(registeredDatasets, dataset) + } } + } - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-varm-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/varm/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-xt-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/xt/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - { - Identifier: "se-gtfs-realtime-vastmanland-trip", - Format: datasets.DataSetFormatGTFSRealtime, - Provider: datasets.Provider{ - Name: "Trafiklab", - Website: "https://trafiklab.se", - }, - Source: "https://opendata.samtrafiken.se/gtfs-rt-sweden/vastmanland/TripUpdatesSweden.pb", - UnpackBundle: datasets.BundleFormatNone, - SupportedObjects: datasets.SupportedObjects{ - RealtimeJourneys: true, - }, - ImportDestination: datasets.ImportDestinationRealtimeQueue, - LinkedDataset: "se-gtfs-schedule", - DownloadHandler: func(r *http.Request) { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"] == "" { - log.Fatal().Msg("TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY must be set") - } - - q := r.URL.Query() - q.Add("key", env["TRAVIGO_SE_TRAFIKLAB_REALTIME_API_KEY"]) - r.URL.RawQuery = q.Encode() - }, - }, - } -} - -func nationalRailLogin() string { - env := util.GetEnvironmentVariables() - if env["TRAVIGO_NATIONALRAIL_USERNAME"] == "" { - log.Fatal().Msg("TRAVIGO_NATIONALRAIL_USERNAME must be set") - } - if env["TRAVIGO_NATIONALRAIL_PASSWORD"] == "" { - log.Fatal().Msg("TRAVIGO_NATIONALRAIL_PASSWORD must be set") - } - - formData := url.Values{ - "username": {env["TRAVIGO_NATIONALRAIL_USERNAME"]}, - "password": {env["TRAVIGO_NATIONALRAIL_PASSWORD"]}, - } - - client := &http.Client{} - req, err := http.NewRequest("POST", "https://opendata.nationalrail.co.uk/authenticate", strings.NewReader(formData.Encode())) - if err != nil { - log.Fatal().Err(err).Msg("Failed to create auth HTTP request") - } - - req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - - resp, err := client.Do(req) + return nil + }) if err != nil { - log.Fatal().Err(err).Msg("Failed to perform auth HTTP request") - } - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - log.Fatal().Err(err).Msg("Failed to read auth HTTP request") - } - - var loginResponse struct { - Token string `json:"token"` + log.Fatal().Err(err).Msg("Failed to load transforms directory") } - json.Unmarshal(body, &loginResponse) - return loginResponse.Token + return registeredDatasets + + // return []datasets.DataSet{ + // { + // Identifier: "us-nyc-subway-schedule", + // Format: datasets.DataSetFormatGTFSSchedule, + // Provider: datasets.Provider{ + // Name: "Metropolitan Transportation Authority", + // Website: "https://mta.info", + // }, + // Source: "http://web.mta.info/developers/data/nyct/subway/google_transit.zip", + // UnpackBundle: datasets.BundleFormatNone, + // SupportedObjects: datasets.SupportedObjects{ + // Operators: true, + // Stops: true, + // Services: true, + // Journeys: true, + // }, + // }, + // { + // Identifier: "us-nyc-subway-relatime-1-2-3-4-5-6-7", + // Format: datasets.DataSetFormatGTFSRealtime, + // Provider: datasets.Provider{ + // Name: "Metropolitan Transportation Authority", + // Website: "https://mta.info", + // }, + // Source: "https://api-endpoint.mta.info/Dataservice/mtagtfsfeeds/nyct%2Fgtfs", + // UnpackBundle: datasets.BundleFormatNone, + // SupportedObjects: datasets.SupportedObjects{ + // RealtimeJourneys: true, + // }, + // ImportDestination: datasets.ImportDestinationRealtimeQueue, + // LinkedDataset: "us-nyc-subway-schedule", + + // DownloadHandler: func(r *http.Request) { + // env := util.GetEnvironmentVariables() + // if env["TRAVIGO_US_NYC_MTA_API_KEY"] == "" { + // log.Fatal().Msg("TRAVIGO_US_NYC_MTA_API_KEY must be set") + // } + + // r.Header.Set("x-api-key", env["TRAVIGO_US_NYC_MTA_API_KEY"]) + // }, + // }, + // { + // Identifier: "eu-flixbus-gtfs-schedule", + // Format: datasets.DataSetFormatGTFSSchedule, + // Provider: datasets.Provider{ + // Name: "FlixBus", + // Website: "https://global.flixbus.com", + // }, + // Source: "http://gtfs.gis.flix.tech/gtfs_generic_eu.zip", + // UnpackBundle: datasets.BundleFormatNone, + // SupportedObjects: datasets.SupportedObjects{ + // Operators: false, + // Stops: false, + // Services: false, + // Journeys: true, + // }, + // }, + // { + // Identifier: "us-bart-gtfs-schedule", + // Format: datasets.DataSetFormatGTFSSchedule, + // Provider: datasets.Provider{ + // Name: "BART", + // Website: "http://www.bart.gov", + // }, + // Source: "https://www.bart.gov/dev/schedules/google_transit.zip", + // UnpackBundle: datasets.BundleFormatNone, + // SupportedObjects: datasets.SupportedObjects{ + // Operators: true, + // Stops: true, + // Services: true, + // Journeys: true, + // }, + // }, + // { + // Identifier: "us-bart-gtfs-realtime", + // Format: datasets.DataSetFormatGTFSRealtime, + // Provider: datasets.Provider{ + // Name: "BART", + // Website: "http://www.bart.gov", + // }, + // Source: "https://api.bart.gov/gtfsrt/tripupdate.aspx", // https://api.bart.gov/gtfsrt/alerts.aspx + // UnpackBundle: datasets.BundleFormatNone, + // SupportedObjects: datasets.SupportedObjects{ + // RealtimeJourneys: true, + // }, + // ImportDestination: datasets.ImportDestinationRealtimeQueue, + // LinkedDataset: "us-bart-gtfs-schedule", + // }, + // } }