Skip to content

Commit

Permalink
Remontée en base de fichier GTFS et calculs des prochains départs (#2105
Browse files Browse the repository at this point in the history
)

* create tables and schemas

* playing around with Unzip

* properly fill stops table from file

* format

* change table name

* clean temp file

* add calendar table

* streaming all the way ❤️

* create gtfs_stop_times table and schema

* add nimblecsv dep

* fill the stop_times table

* format files

* fix credo warnings

* add calendar dates import

* fill data_import_id

* add trips

* add an array for dow in gtfs_calendar

* add query for next departures

* add some factories

* update factory

* add a simple test for next departure

* add tests for exceptions

* correct the query, thanks to the tests

* new test with exception on top of regular calendar

* additional tests

* real definition of the datetime (for time changes)

* format

* make location_type integer

* extract functions

* create gtfs file for db import test

* add gtfs to db import tests

* extract function and test it

* ignore file for dialyzer

* add gtfs reference links

Co-authored-by: Thibaut Barrère <thibaut.barrere@gmail.com>
  • Loading branch information
fchabouis and thbar authored Feb 28, 2022
1 parent 3fc7a4e commit 5e0b724
Show file tree
Hide file tree
Showing 21 changed files with 975 additions and 3 deletions.
4 changes: 3 additions & 1 deletion .dialyzer_ignore.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[
# temporary fix for https://github.com/elixir-ecto/postgrex/issues/549
~r/deps\/postgrex\/lib\/postgrex\/type_module.ex/,
~r/lib\/postgrex\/type_module.ex/
~r/lib\/postgrex\/type_module.ex/,
# EctoInterval raises an unknown_type error
~r/lib\/db\/gtfs_stop_times.ex/
]
11 changes: 11 additions & 0 deletions apps/db/lib/db/data_import.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defmodule DB.DataImport do
@moduledoc """
Table linking a ResourceHistory with a DataImport
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "data_import" do
belongs_to(:resource_history, DB.ResourceHistory)
end
end
16 changes: 16 additions & 0 deletions apps/db/lib/db/gtfs_calendar dates.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defmodule DB.GtfsCalendarDates do
@moduledoc """
This contains the information present in GTFS calendar_dates.txt files.
https://developers.google.com/transit/gtfs/reference?hl=fr#calendar_datestxt
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "gtfs_calendar_dates" do
belongs_to(:data_import, DB.GtfsImport)

field(:service_id, :binary)
field(:date, :date)
field(:exception_type, :integer)
end
end
24 changes: 24 additions & 0 deletions apps/db/lib/db/gtfs_calendar.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defmodule DB.GtfsCalendar do
@moduledoc """
This contains the information present in GTFS calendar.txt files.
https://developers.google.com/transit/gtfs/reference?hl=fr#calendartxt
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "gtfs_calendar" do
belongs_to(:data_import, DB.GtfsImport)

field(:service_id, :binary)
field(:monday, :integer)
field(:tuesday, :integer)
field(:wednesday, :integer)
field(:thursday, :integer)
field(:friday, :integer)
field(:saturday, :integer)
field(:sunday, :integer)
field(:days, {:array, :integer})
field(:start_date, :date)
field(:end_date, :date)
end
end
12 changes: 12 additions & 0 deletions apps/db/lib/db/gtfs_import.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
defmodule DB.GtfsImport do
@moduledoc """
GtfsImport list the imports done for each Resource History.
It will be a good place to add information about which import is currently in use, publishesd, etc
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "data_import" do
belongs_to(:resource_history, DB.ResourceHistory)
end
end
17 changes: 17 additions & 0 deletions apps/db/lib/db/gtfs_stop.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
defmodule DB.GtfsStops do
@moduledoc """
This contains the information present in GTFS stops.txt files.
https://developers.google.com/transit/gtfs/reference?hl=fr#stopstxt
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "gtfs_stops" do
belongs_to(:data_import, DB.GtfsImport)
field(:stop_id, :binary)
field(:stop_name, :binary)
field(:stop_lat, :float)
field(:stop_lon, :float)
field(:location_type, :integer)
end
end
17 changes: 17 additions & 0 deletions apps/db/lib/db/gtfs_stop_times.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
defmodule DB.GtfsStopTimes do
@moduledoc """
This contains the information present in GTFS stops.txt files.
https://developers.google.com/transit/gtfs/reference?hl=fr#stop_timestxt
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "gtfs_stop_times" do
belongs_to(:data_import, DB.GtfsImport)
field(:trip_id, :binary)
field(:stop_id, :binary)
field(:stop_sequence, :integer)
field(:arrival_time, EctoInterval)
field(:departure_time, EctoInterval)
end
end
16 changes: 16 additions & 0 deletions apps/db/lib/db/gtfs_trips.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defmodule DB.GtfsTrips do
@moduledoc """
This contains the information present in GTFS trips.txt files.
https://developers.google.com/transit/gtfs/reference?hl=fr#tripstxt
"""
use Ecto.Schema
use TypedEctoSchema

typed_schema "gtfs_trips" do
belongs_to(:data_import, DB.GtfsImport)

field(:route_id, :binary)
field(:service_id, :binary)
field(:trip_id, :binary)
end
end
3 changes: 2 additions & 1 deletion apps/db/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ defmodule Db.MixProject do
{:sentry, ">= 0.0.0"},
{:typed_ecto_schema, ">= 0.1.1"},
{:ex_machina, "~> 2.4", only: :test},
{:oban, "~> 2.9"}
{:oban, "~> 2.9"},
{:ecto_interval, git: "https://github.com/etalab/ecto_interval", ref: "master"}
]
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
defmodule DB.Repo.Migrations.CreateGtfsStopsTable do
use Ecto.Migration

def change do
create table(:data_import) do
add :resource_history_id, references(:resource_history)
end

create table(:gtfs_stops) do
add(:data_import_id, references(:data_import))
add(:stop_id, :binary)
add(:stop_name, :binary)
add(:stop_lat, :float)
add(:stop_lon, :float)
add(:location_type, :integer)
end

create table(:gtfs_calendar) do
add(:data_import_id, references(:data_import))
add(:service_id, :binary)
add(:monday, :integer)
add(:tuesday, :integer)
add(:wednesday, :integer)
add(:thursday, :integer)
add(:friday, :integer)
add(:saturday, :integer)
add(:sunday, :integer)
add(:days, {:array, :integer})
add(:start_date, :date)
add(:end_date, :date)
end

create table(:gtfs_calendar_dates) do
add(:data_import_id, references(:data_import))
add(:service_id, :binary)
add(:date, :date)
add(:exception_type, :integer)
end

create table(:gtfs_trips) do
add(:data_import_id, references(:data_import))
add(:route_id, :binary)
add(:service_id, :binary)
add(:trip_id, :binary)
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defmodule DB.Repo.Migrations.AddTableGtfsStopTimes do
use Ecto.Migration

def up do
create table(:gtfs_stop_times) do
add(:data_import_id, references(:data_import))
add(:trip_id, :binary)
add(:stop_id, :binary)
add(:stop_sequence, :integer)
end

execute """
alter table gtfs_stop_times add column arrival_time interval hour to second;
"""

execute """
alter table gtfs_stop_times add column departure_time interval hour to second;
"""
end

def down do
execute "drop table gtfs_stop_times;"
end
end
20 changes: 20 additions & 0 deletions apps/db/test/support/factory.ex
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,26 @@ defmodule DB.Factory do
}
end

def data_import_factory do
%DB.DataImport{}
end

def gtfs_stop_times_factory do
%DB.GtfsStopTimes{}
end

def gtfs_trips_factory do
%DB.GtfsTrips{}
end

def gtfs_calendar_factory do
%DB.GtfsCalendar{}
end

def gtfs_calendar_dates_factory do
%DB.GtfsCalendarDates{}
end

def validation_factory do
%DB.Validation{}
end
Expand Down
File renamed without changes.
52 changes: 52 additions & 0 deletions apps/transport/lib/S3/unzip.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
defmodule Transport.Unzip.S3File do
@moduledoc """
Read a remote zip file stored on a S3 bucket, as explained here
https://hexdocs.pm/unzip/readme.html
"""

defstruct [:path, :bucket, :s3_config]
alias __MODULE__

def new(path, bucket, s3_config) do
%S3File{path: path, bucket: bucket, s3_config: s3_config}
end

def get_file_stream(file_name, zip_name, bucket_name) do
aws_s3_config =
ExAws.Config.new(:s3,
access_key_id: [Application.fetch_env!(:ex_aws, :access_key_id), :instance_role],
secret_access_key: [Application.fetch_env!(:ex_aws, :secret_access_key), :instance_role]
)

file = new(zip_name, bucket_name, aws_s3_config)
{:ok, unzip} = Unzip.new(file)
Unzip.file_stream!(unzip, file_name)
end
end

defimpl Unzip.FileAccess, for: Transport.Unzip.S3File do
alias ExAws.S3

def size(file) do
%{headers: headers} = file.bucket |> S3.head_object(file.path) |> ExAws.request!(file.s3_config)

size =
headers
|> Enum.find(fn {k, _} -> String.downcase(k) == "content-length" end)
|> elem(1)
|> String.to_integer()

{:ok, size}
end

def pread(file, offset, length) do
{_, chunk} =
S3.Download.get_chunk(
%S3.Download{bucket: file.bucket, path: file.path, dest: nil},
%{start_byte: offset, end_byte: offset + length - 1},
file.s3_config
)

{:ok, chunk}
end
end
Loading

0 comments on commit 5e0b724

Please sign in to comment.