Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add pg13 + pg_partman docker #43

Merged
merged 3 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@

Extract matomo data from [`Live.getLastVisitsDetails`](https://developer.matomo.org/api-reference/reporting-api) API and push events and visits informations to Postgres.

Use [pg_partman](https://github.com/pgpartman/pg_partman) to partition data by month.

## Usage

Create the [initial table](./initial.sql) database table then run the following job with correct environment variables.

```sh
npx @socialgouv/matomo-postgres
```
Expand All @@ -31,7 +35,7 @@ export MATOMO_URL=
export MATOMO_SITE=
export MATOMO_KEY=
export DESTINATION_TABLE= # optional
export STARTDATE= # optional
export STARTDATE= # optional
export OFFSET= # optional
export PGDATABASE=postgres://postgres:postgres@127.0.0.1:5455/postgres
yarn start
Expand Down
10 changes: 9 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
version: "3.0"
services:
postgres:
image: postgres:11
build:
context: ./docker
dockerfile: ./Dockerfile
volumes:
- postgres_data:/var/lib/postgresql/data
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_USERNAME: postgres
TZ: "Europe/Paris"
ports:
- 5455:5432


volumes:
postgres_data:
43 changes: 43 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FROM postgres:13-alpine

ENV PG_PARTMAN_VERSION v4.7.0

# Install pg_jobmon
RUN set -ex \
\
&& apk add --no-cache --virtual .fetch-deps \
ca-certificates \
openssl \
tar \
\
&& apk add --no-cache --virtual .build-deps \
autoconf \
automake \
g++ \
clang \
llvm \
libtool \
libxml2-dev \
make \
perl
# Install pg_partman
RUN set -ex \
&& wget -O pg_partman.tar.gz "https://github.com/pgpartman/pg_partman/archive/$PG_PARTMAN_VERSION.tar.gz" \
&& mkdir -p /usr/src/pg_partman \
&& tar \
--extract \
--file pg_partman.tar.gz \
--directory /usr/src/pg_partman \
--strip-components 1 \
&& rm pg_partman.tar.gz \
&& cd /usr/src/pg_partman \
&& make \
&& make install \
&& cd / \
&& rm -rf /usr/src/pg_partman \
&& apk del .fetch-deps .build-deps

# Copy the init script
# The Docker Postgres initd script will run anything
# in the directory /docker-entrypoint-initdb.d
COPY initdb.sh /docker-entrypoint-initdb.d/initdb.sh
13 changes: 13 additions & 0 deletions docker/initdb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash -e

echo "Creating partman extension"
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
CREATE SCHEMA partman;
CREATE EXTENSION pg_partman SCHEMA partman;
EOSQL

echo "ADDING pg_partman_bgw TO postgresql.conf"
echo "shared_preload_libraries = 'pg_partman_bgw'" >> $PGDATA/postgresql.conf
echo "pg_partman_bgw.interval = 3600" >> $PGDATA/postgresql.conf
echo "pg_partman_bgw.role = '$POSTGRES_USER'" >> $PGDATA/postgresql.conf
echo "pg_partman_bgw.dbname = '$POSTGRES_DB'" >> $PGDATA/postgresql.conf
73 changes: 73 additions & 0 deletions initial.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
-- converting existing matomo table to partitioned witg pg_partman
-- usage : ON_ERROR_STOP=1 psql < partition.sql

--- pg_partman setup

CREATE SCHEMA IF NOT EXISTS partman;
CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman;

--- backup and recreate a new partionned matomo table

CREATE TABLE IF NOT EXISTS matomo_tmp as (select * from matomo);

ALTER TABLE IF EXISTS matomo RENAME TO matomo_backup;

CREATE TABLE IF NOT EXISTS matomo
(
idsite text,
idvisit text,
actions text,
country text,
region text,
city text,
operatingsystemname text,
devicemodel text,
devicebrand text,
visitduration text,
dayssincefirstvisit text,
visitortype text,
sitename text,
userid text,
serverdateprettyfirstaction date,
action_id text,
action_type text,
action_eventcategory text,
action_eventaction text,
action_eventname text,
action_eventvalue decimal,
action_timespent text,
action_timestamp timestamp with time zone DEFAULT now(),
usercustomproperties json,
usercustomdimensions json,
dimension1 text,
dimension2 text,
dimension3 text,
dimension4 text,
dimension5 text,
dimension6 text,
dimension7 text,
dimension8 text,
dimension9 text,
dimension10 text,
action_url text,
sitesearchkeyword text,
action_title text
) PARTITION BY RANGE (action_timestamp);

ALTER TABLE IF EXISTS matomo ADD CONSTRAINT unique_action_id UNIQUE (action_id, action_timestamp);
ALTER TABLE IF EXISTS matomo ALTER COLUMN action_eventvalue TYPE decimal USING action_eventvalue::decimal;
CREATE INDEX IF NOT EXISTS idx_action_timestamp_matomo ON matomo (action_timestamp);
CREATE INDEX IF NOT EXISTS idx_idvisit_matomo ON matomo(idvisit);
CREATE INDEX IF NOT EXISTS idx_action_eventcategory_matomo ON matomo(action_eventcategory);
CREATE INDEX IF NOT EXISTS idx_action_type_matomo ON matomo(action_type);
CREATE INDEX IF NOT EXISTS idx_action_eventaction_matomo ON matomo(action_eventaction);

SELECT partman.create_parent('public.matomo', 'action_timestamp', 'native', 'monthly');

-- Import des données depuis la table standard vers la table partitionnée
CALL partman.partition_data_proc('public.matomo', p_source_table := 'public.matomo_tmp', p_order:= 'DESC');

VACUUM ANALYZE public.matomo;

DROP TABLE if exists matomo_tmp;

15 changes: 10 additions & 5 deletions src/__tests__/__snapshots__/index.test.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

exports[`run: should create table 1`] = `
Array [
"CREATE TABLE IF NOT EXISTS matomo
(
"

CREATE SCHEMA IF NOT EXISTS partman;
CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman;
CREATE TABLE IF NOT EXISTS matomo
(
idsite text,
idvisit text,
actions text,
Expand All @@ -19,14 +23,14 @@ Array [
sitename text,
userid text,
serverdateprettyfirstaction date,
action_id text UNIQUE,
action_id text,
action_type text,
action_eventcategory text,
action_eventaction text,
action_eventname text,
action_eventvalue decimal,
action_timespent text,
action_timestamp timestamp with time zone,
action_timestamp timestamp with time zone DEFAULT now(),
usercustomproperties json,
usercustomdimensions json,
dimension1 text,
Expand All @@ -42,7 +46,8 @@ Array [
action_url text,
sitesearchkeyword text,
action_title text
)",
) PARTITION BY RANGE (action_timestamp);
",
Array [],
]
`;
7 changes: 1 addition & 6 deletions src/__tests__/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const matomoVisit = require("./visit.json");

const run = require("../index");

const NB_REQUEST_TO_INIT_DB = 21; // Number of query to init DB (createTable.js)
const NB_REQUEST_TO_INIT_DB = 1; // Number of query to init DB (createTable.js)
const TEST_DATE = new Date();

// @ts-ignore
Expand Down Expand Up @@ -86,11 +86,6 @@ test("run: should fetch the latest event date if no date provided", async () =>

// check db queries
expect(mock_pgQuery.mock.calls[NB_REQUEST_TO_INIT_DB][0]).toEqual(
// call 0 is create table
// call 1 is add column usercustomdimension
// call 2 is add column action_url
// ...
//
"select action_timestamp from matomo order by action_timestamp desc limit 1"
);
});
Expand Down
43 changes: 11 additions & 32 deletions src/createTable.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ const { DESTINATION_TABLE } = require("./config");
*/
async function createTable(client) {
const table = client.escapeIdentifier(DESTINATION_TABLE);
const text = `CREATE TABLE IF NOT EXISTS ${table}
(
const text = `

CREATE SCHEMA IF NOT EXISTS partman;
CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman;
CREATE TABLE IF NOT EXISTS ${table}
(
idsite text,
idvisit text,
actions text,
Expand All @@ -25,14 +29,14 @@ async function createTable(client) {
sitename text,
userid text,
serverdateprettyfirstaction date,
action_id text UNIQUE,
action_id text,
action_type text,
action_eventcategory text,
action_eventaction text,
action_eventname text,
action_eventvalue decimal,
action_timespent text,
action_timestamp timestamp with time zone,
action_timestamp timestamp with time zone DEFAULT now(),
usercustomproperties json,
usercustomdimensions json,
dimension1 text,
Expand All @@ -48,37 +52,12 @@ async function createTable(client) {
action_url text,
sitesearchkeyword text,
action_title text
)`;
) PARTITION BY RANGE (action_timestamp);
`;

await client.query(text, []);

const migrations = [
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "usercustomdimensions" json;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "action_url" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "sitesearchkeyword" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "action_title" text;`,
`ALTER TABLE IF EXISTS ${table} ALTER COLUMN action_eventvalue TYPE decimal USING action_eventvalue::decimal;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension1" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension2" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension3" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension4" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension5" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension6" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension7" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension8" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension9" text;`,
`ALTER TABLE IF EXISTS ${table} ADD COLUMN IF NOT EXISTS "dimension10" text;`,
`CREATE INDEX IF NOT EXISTS idx_action_timestamp ON ${table} (action_timestamp);`,
`CREATE INDEX IF NOT EXISTS idx_idvisit ON ${table}(idvisit);`,
`CREATE INDEX IF NOT EXISTS idx_action_eventcategory ON ${table}(action_eventcategory);`,
`CREATE INDEX IF NOT EXISTS idx_action_type ON ${table}(action_type);`,
`CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_action_eventaction ON ${table}(action_eventaction);`,
];

// --------------------------------------------- //
// If you add new query: Don't forget to update //
// const `NB_REQUEST_TO_INIT_DB` (index.test.js) //
// --------------------------------------------- //
const migrations = [];

for (const query of migrations) {
await client.query(query, []);
Expand Down