Skip to content

Commit

Permalink
Check PREMIS validity before preservation
Browse files Browse the repository at this point in the history
[skip-codecov]
  • Loading branch information
mcantelon committed Nov 13, 2024
1 parent 92feb35 commit 6798c16
Show file tree
Hide file tree
Showing 15 changed files with 1,584 additions and 22 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
!go.mod
!go.sum
!main.go
!hack/xsd/premis.xsd
15 changes: 15 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
ARG TARGET=enduro
ARG GO_VERSION

FROM alpine:3.20 AS build-libxml
RUN apk add --no-cache libxml2-utils

FROM golang:${GO_VERSION}-alpine AS build-go
WORKDIR /src
ENV CGO_ENABLED=0
Expand Down Expand Up @@ -61,10 +64,22 @@ FROM base AS enduro-a3m-worker
COPY --from=build-enduro-a3m-worker --link /out/enduro-a3m-worker /home/enduro/bin/enduro-a3m-worker
COPY --from=build-enduro-a3m-worker --link /src/enduro.toml /home/enduro/.config/enduro.toml
CMD ["/home/enduro/bin/enduro-a3m-worker", "--config", "/home/enduro/.config/enduro.toml"]
COPY hack/xsd/premis.xsd /home/enduro/premis.xsd
COPY --from=build-libxml /usr/bin/xmllint /usr/bin/xmllint
COPY --from=build-libxml /usr/lib/libxml2.so.2 /usr/lib/libxml2.so.2
COPY --from=build-libxml /lib/ld-musl-x86_64.so.1 /lib/ld-musl-x86_64.so.1
COPY --from=build-libxml /lib/libz.so.1 /lib/libz.so.1
COPY --from=build-libxml /usr/lib/liblzma.so.5 /usr/lib/liblzma.so.5

FROM base AS enduro-am-worker
COPY --from=build-enduro-am-worker --link /out/enduro-am-worker /home/enduro/bin/enduro-am-worker
COPY --from=build-enduro-am-worker --link /src/enduro.toml /home/enduro/.config/enduro.toml
CMD ["/home/enduro/bin/enduro-am-worker", "--config", "/home/enduro/.config/enduro.toml"]
COPY hack/xsd/premis.xsd /home/enduro/premis.xsd
COPY --from=build-libxml /usr/bin/xmllint /usr/bin/xmllint
COPY --from=build-libxml /usr/lib/libxml2.so.2 /usr/lib/libxml2.so.2
COPY --from=build-libxml /lib/ld-musl-x86_64.so.1 /lib/ld-musl-x86_64.so.1
COPY --from=build-libxml /lib/libz.so.1 /lib/libz.so.1
COPY --from=build-libxml /usr/lib/liblzma.so.5 /usr/lib/liblzma.so.5

FROM ${TARGET}
5 changes: 5 additions & 0 deletions cmd/enduro-a3m-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
"github.com/artefactual-sdps/temporal-activities/bucketupload"
"github.com/artefactual-sdps/temporal-activities/removepaths"
"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
"github.com/hashicorp/go-cleanhttp"
"github.com/oklog/run"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand Down Expand Up @@ -252,6 +253,10 @@ func main() {
archiveextract.New(cfg.ExtractActivity).Execute,
temporalsdk_activity.RegisterOptions{Name: archiveextract.Name},
)
w.RegisterActivityWithOptions(
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)
w.RegisterActivityWithOptions(
activities.NewClassifyPackageActivity().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.ClassifyPackageActivityName},
Expand Down
5 changes: 5 additions & 0 deletions cmd/enduro-am-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
"github.com/artefactual-sdps/temporal-activities/bucketupload"
"github.com/artefactual-sdps/temporal-activities/removepaths"
"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
"github.com/hashicorp/go-cleanhttp"
"github.com/jonboulle/clockwork"
"github.com/oklog/run"
Expand Down Expand Up @@ -329,6 +330,10 @@ func main() {
bucketupload.New(failedPIPs).Execute,
temporalsdk_activity.RegisterOptions{Name: activities.SendToFailedPIPsName},
)
w.RegisterActivityWithOptions(
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)

g.Add(
func() error {
Expand Down
4 changes: 4 additions & 0 deletions enduro.toml
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"

[validatePremis]
enabled = true
xsdPath = "/home/enduro/premis.xsd"

[failedSips]
endpoint = "http://minio.enduro-sdps:9000"
pathStyle = true
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/XSAM/otelsql v0.29.0
github.com/alicebob/miniredis/v2 v2.32.1
github.com/artefactual-labs/bagit-gython v0.2.0
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4
github.com/coreos/go-oidc/v3 v3.10.0
github.com/cyphar/filepath-securejoin v0.2.4
github.com/dolmen-go/contextio v1.0.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -442,8 +442,8 @@ github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew
github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=
github.com/artefactual-labs/bagit-gython v0.2.0 h1:Zje4Lb1goZVUPoxpc/k65sWtYpNgK9Rvphvaok5cYzE=
github.com/artefactual-labs/bagit-gython v0.2.0/go.mod h1:C+hFZQMDnji1hjGt3nrlMK3BahaBhvo/hU2uqd+Q9Z4=
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b h1:kTOc2pbkdII6/Z84Bus1q52z5KAOaT8vLpfRoOs1l1I=
github.com/artefactual-sdps/temporal-activities v0.0.0-20240821162351-47302711bc7b/go.mod h1:FVh79rCGNlUU1QnioAU+lrSjLqrA1PJFYKIhWPsmyug=
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4 h1:WF95IOkZRVSCST/26SAqPYsUrtUuJpavBht6lvdeKl0=
github.com/artefactual-sdps/temporal-activities v0.0.0-20241018212855-8ea34d29bdf4/go.mod h1:FVh79rCGNlUU1QnioAU+lrSjLqrA1PJFYKIhWPsmyug=
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
github.com/aws/aws-sdk-go-v2 v1.30.3 h1:jUeBtG0Ih+ZIFH0F4UkmL9w3cSpaMv9tYYDbzILP8dY=
Expand Down
2 changes: 2 additions & 0 deletions hack/kube/base/enduro.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ spec:
value: "grafana-alloy.enduro-sdps:4317"
- name: ENDURO_TELEMETRY_TRACES_SAMPLING_RATIO
value: "1.0"
- name: ENDURO_VALIDATEPREMIS_ENABLED
value: "false"
ports:
- containerPort: 9000
- containerPort: 9002
Expand Down
Loading

0 comments on commit 6798c16

Please sign in to comment.