Skip to content

Commit

Permalink
Merge pull request #6 from nlnwa/schema-v002
Browse files Browse the repository at this point in the history
Update schema to v002
  • Loading branch information
maeb authored Sep 29, 2021
2 parents 93a15ea + 0e999d8 commit 6fdae88
Show file tree
Hide file tree
Showing 10 changed files with 542 additions and 137 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: release

on:
push:
branches:
- main
tags:
- v*

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v3
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}
type=ref,event=branch
type=ref,event=pr
- name: Log in to the container registry
uses: docker/login-action@v1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push Docker image
uses: docker/build-push-action@v2
with:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
36 changes: 36 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: test

on:
pull_request: {}

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v3
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}
type=ref,event=branch
type=ref,event=pr
- name: Build Docker image
uses: docker/build-push-action@v2
with:
push: false
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM golang:1.15 as build
FROM golang:1.16 as build

WORKDIR /build

COPY go.mod .
COPY go.sum .

RUN go mod download
RUN GOPROXY=proxy.golang.org go mod download

COPY . .

Expand Down
31 changes: 20 additions & 11 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
module github.com/nlnwa/veidemann-log-service

go 1.15
go 1.16

require (
github.com/gocql/gocql v0.0.0-20210129204804-4364a4b9cfdd
github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/gocql/gocql v0.0.0-20210817081954-bc256bbb90de
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.2.0
github.com/nlnwa/veidemann-api/go v0.0.0-20210413093311-7ff38e848604
github.com/nlnwa/veidemann-api/go v0.0.0-20210414094839-b36ce92632fe
github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e
github.com/opentracing/opentracing-go v1.2.0
github.com/prometheus/client_golang v1.7.1
github.com/prometheus/common v0.10.0
github.com/rs/zerolog v1.20.0
github.com/scylladb/gocqlx/v2 v2.3.0
github.com/prometheus/client_golang v1.11.0
github.com/prometheus/common v0.31.1
github.com/prometheus/procfs v0.7.3 // indirect
github.com/rs/zerolog v1.25.0
github.com/scylladb/gocqlx/v2 v2.4.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.7.1
github.com/spf13/viper v1.9.0
github.com/testcontainers/testcontainers-go v0.10.0
github.com/uber/jaeger-client-go v2.27.0+incompatible
github.com/uber/jaeger-client-go v2.29.1+incompatible
github.com/uber/jaeger-lib v2.4.1+incompatible // indirect
google.golang.org/grpc v1.33.2
google.golang.org/protobuf v1.26.0
go.uber.org/atomic v1.9.0 // indirect
golang.org/x/net v0.0.0-20210928044308-7d9f5e0b762b // indirect
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6 // indirect
golang.org/x/text v0.3.7 // indirect
google.golang.org/genproto v0.0.0-20210927142257-433400c27d05 // indirect
google.golang.org/grpc v1.41.0
google.golang.org/protobuf v1.27.1
)

replace github.com/gocql/gocql => github.com/scylladb/gocql v1.5.0
462 changes: 346 additions & 116 deletions go.sum

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions internal/logservice/logserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func TestMain(m *testing.M) {

scyllaC, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
ContainerRequest: testcontainers.ContainerRequest{
Image: "scylladb/scylla:4.4.1",
Image: "scylladb/scylla:4.4.4",
ExposedPorts: []string{"9042/tcp", "19042/tcp"},
Networks: []string{networkName},
NetworkAliases: map[string][]string{
Expand All @@ -61,9 +61,12 @@ func TestMain(m *testing.M) {

if _, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
ContainerRequest: testcontainers.ContainerRequest{
Image: "norsknettarkiv/veidemann-log-schema:v1.0.0",
Image: "ghcr.io/nlnwa/veidemann-log-schema:2.0.0",
AutoRemove: true,
Networks: []string{networkName},
Env: map[string]string{
"CQLSH_HOST": "scylla",
},
WaitingFor: wait.ForLog("Schema initialized"),
},
Started: true,
Expand All @@ -83,7 +86,7 @@ func TestMain(m *testing.M) {
panic(err)
}

cfg := scylla.CreateCluster(gocql.LocalQuorum, "v7n_v1_dc1", cqlshHost)
cfg := scylla.CreateCluster(gocql.LocalQuorum, "v7n_v2_dc1", cqlshHost)
cfg.Port = cqlshPort.Int()

session, err = scylla.Connect(cfg)
Expand Down
5 changes: 3 additions & 2 deletions schema/scylladb/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
FROM scylladb/scylla:4.4.1
FROM scylladb/scylla:4.4.4

COPY schema/* /schema/

ENV CQLSH_HOST=scylla
ENV TEMPLATE=/schema/v002.cql.tmpl
ENV CQLSH_HOST=scylla-client
ENTRYPOINT ["/schema/docker.sh"]
4 changes: 2 additions & 2 deletions schema/scylladb/schema/create.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ function usage {
>&2 echo "The following parameters can be set via environment:"
>&2 echo " MODE - prod or test. Test keyspace is usable on a single node cluster (no replication)"
>&2 echo " DATACENTER - datacenter name for network topology used in prod (optional in MODE=test)"
>&2 echo " KEYSPACE - keyspace (default: v7n_v1_{datacenter})"
>&2 echo " KEYSPACE - keyspace (default: v7n_v2_{datacenter})"
>&2 echo " REPLICATION_FACTOR - replication factor for prod (default: 2 for prod, 1 for test)"
>&2 echo ""
>&2 echo "The template-file argument must be fully qualified path to a v00#.cql.tmpl template file."
Expand Down Expand Up @@ -42,7 +42,7 @@ else
usage "invalid MODE=$MODE, expecting 'prod' or 'test'"
fi

keyspace=${KEYSPACE:-"v7n_v1_${datacenter}"}
keyspace=${KEYSPACE:-"v7n_v2_${datacenter}"}

if [[ $keyspace =~ [^a-zA-Z0-9_] ]]; then
usage "invalid characters in KEYSPACE=$keyspace parameter, please use letters, digits or underscores"
Expand Down
2 changes: 1 addition & 1 deletion schema/scylladb/schema/docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ CQLSH_PORT=${CQLSH_PORT:-"9042"}
CQLSH_SSL=${CQLSH_SSL:-""}
CASSANDRA_WAIT_TIMEOUT=${CASSANDRA_WAIT_TIMEOUT:-"60"}
DATACENTER=${DATACENTER:-"dc1"}
KEYSPACE=${KEYSPACE:-"v7n_v1_${DATACENTER}"}
KEYSPACE=${KEYSPACE:-"v7n_v2_${DATACENTER}"}
MODE=${MODE:-"test"}
TEMPLATE=${TEMPLATE:-""}
USER=${CASSANDRA_USERNAME:-""}
Expand Down
79 changes: 79 additions & 0 deletions schema/scylladb/schema/v002.cql.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
--
-- Creates v7n keyspace with tables for crawl logs and page logs.
--
-- Required parameters:
--
-- keyspace
-- name of the keyspace
-- replication
-- replication strategy for the keyspace, such as
-- for prod environments
-- {'class': 'NetworkTopologyStrategy', '$datacenter': '${replication_factor}' }
-- for test environments
-- {'class': 'SimpleStrategy', 'replication_factor': '1'}

CREATE KEYSPACE IF NOT EXISTS ${keyspace} WITH replication = ${replication};

CREATE TYPE IF NOT EXISTS ${keyspace}.error (
code int,
msg text,
detail text,
);

CREATE TYPE IF NOT EXISTS ${keyspace}.resource (
warc_id uuid,
uri text,
referrer text,
resource_type text,
content_type text,
discovery_path text,
method text,
from_cache boolean,
from_renderable boolean,
status_code int,
error frozen<error>,
);

CREATE TABLE IF NOT EXISTS ${keyspace}.page_log (
warc_id uuid,
execution_id uuid,
job_execution_id uuid,
collection_final_name text,
uri text,
referrer text,
method text,
outlink set<text>,
resource list<frozen<resource>>,
PRIMARY KEY (warc_id)
);

CREATE INDEX IF NOT EXISTS ON ${keyspace}.page_log (execution_id);

CREATE TABLE IF NOT EXISTS ${keyspace}.crawl_log (
warc_id uuid,
execution_id uuid,
job_execution_id uuid,
requested_uri text,
response_uri text,
referrer text,
collection_final_name text,
method text,
time_stamp timestamp,
fetch_time_stamp timestamp,
retries int,
ip_address text,
warc_refers_to text,
record_type text,
content_type text,
discovery_path text,
status_code int,
error frozen<error>,
size bigint,
fetch_time_ms bigint,
block_digest text,
payload_digest text,
storage_ref text,
PRIMARY KEY (warc_id)
);

CREATE INDEX IF NOT EXISTS ON ${keyspace}.crawl_log (execution_id);

0 comments on commit 6fdae88

Please sign in to comment.