-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathDockerfile
97 lines (74 loc) · 2.46 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# SPDX-FileCopyrightText: 2024 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
FROM ubuntu:24.04 AS base
ENV LANG=C.UTF-8
ENV TZ=UTC
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python-is-python3
FROM base AS builder
# Note we need leptonica here to build jbig2
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential autoconf automake libtool \
libleptonica-dev \
zlib1g-dev \
libffi-dev \
ca-certificates \
curl \
git \
libcairo2-dev \
pkg-config
# Compile and install jbig2
# Needs libleptonica-dev, zlib1g-dev
RUN \
mkdir jbig2 \
&& curl -L https://github.com/agl/jbig2enc/archive/c0141bf.tar.gz | \
tar xz -C jbig2 --strip-components=1 \
&& cd jbig2 \
&& ./autogen.sh && ./configure && make && make install \
&& cd .. \
&& rm -rf jbig2
WORKDIR /app
# Copy uv from ghcr
COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --no-dev
# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen \
--extra test --extra webservice --extra watcher --no-dev \
--no-install-package pyarrow
FROM base
RUN apt-get update && apt-get install -y software-properties-common
RUN add-apt-repository -y ppa:alex-p/tesseract-ocr5
RUN apt-get update && apt-get install -y --no-install-recommends \
ghostscript \
fonts-droid-fallback \
jbig2dec \
pngquant \
tesseract-ocr \
tesseract-ocr-chi-sim \
tesseract-ocr-deu \
tesseract-ocr-eng \
tesseract-ocr-fra \
tesseract-ocr-por \
tesseract-ocr-spa \
ttyd \
unpaper \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=builder /usr/local/lib/ /usr/local/lib/
COPY --from=builder /usr/local/bin/ /usr/local/bin/
COPY --from=builder --chown=app:app /app /app
RUN rm -rf /app/.git && \
ln -s /app/misc/webservice.py /app/webservice.py && \
ln -s /app/misc/watcher.py /app/watcher.py
ENV PATH="/app/.venv/bin:${PATH}"
ENTRYPOINT ["/app/.venv/bin/ocrmypdf"]