Skip to content

Commit

Permalink
(sakuli/sakuli-ocr#7) Added tesseract 4.1.1 port for ubuntu 18.04
Browse files Browse the repository at this point in the history
  • Loading branch information
svettwer committed Dec 18, 2020
1 parent 32cd319 commit 625905d
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 11 deletions.
17 changes: 12 additions & 5 deletions .test/goss.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,7 @@ package:
installed: true
dnsutils:
installed: true
tesseract-ocr:
installed: true
tesseract-ocr-deu:
installed: true
tesseract-ocr-eng:
tesseract:
installed: true
port:
tcp:5901:
Expand Down Expand Up @@ -92,3 +88,14 @@ command:
exec: "echo ${LOG_MODE}"
stdout:
- "/^ci$/"
tesseract-languages:
exit-status: 0
exec: "tesseract --list-langs"
stdout:
- "deu"
- "eng"
tesseract-version:
exit-status: 0
exec: "tesseract -v"
stdout:
- "tesseract 4.1.1"
24 changes: 18 additions & 6 deletions Dockerfile.sakuli-base
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This Dockerfile is used to build an headles vnc image based on Ubuntu
# Used to run CI jobs for nut.js
ARG BASE_IMAGE_VERSION=20.04
ARG BASE_IMAGE_VERSION=18.04
FROM ubuntu:$BASE_IMAGE_VERSION

ARG NODE_VERSION
Expand All @@ -27,7 +27,8 @@ ENV REFRESHED_AT=${BUILD_DATE} \
VNC_RESOLUTION=1280x1024 \
VNC_PW=vncpassword \
VNC_VIEW_ONLY=false \
IMG=taconsol/sakuli-base
IMG=taconsol/sakuli-base \
TESSDATA_PREFIX=/usr/local/share/tessdata

EXPOSE $VNC_PORT $NO_VNC_PORT

Expand All @@ -37,6 +38,9 @@ WORKDIR $HOME
COPY ./src/common/install/ $INST_SCRIPTS/
RUN find $INST_SCRIPTS -name '*.sh' -exec chmod a+x {} +

### Copy tesseract
COPY ./src/tesseract /tmp/tesseract

### Install required packages and add configuration
RUN apt-get update && \
apt-get install -y \
Expand All @@ -60,6 +64,7 @@ RUN apt-get update && \
firefox-geckodriver \
openbox \
obconf \
obmenu \
xterm \
x11-xkb-utils \
libnss-wrapper \
Expand All @@ -68,9 +73,12 @@ RUN apt-get update && \
iputils-ping \
iputils-tracepath \
dnsutils \
tesseract-ocr \
tesseract-ocr-deu \
tesseract-ocr-eng && \
libleptonica-dev \
libpng-dev \
libjpeg8-dev \
libtiff5-dev \
zlib1g-dev \
libtool && \
apt-get clean -y && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* /var/cache/apt/* && \
Expand All @@ -86,7 +94,11 @@ RUN apt-get update && \
chmod +x -v $NO_VNC_HOME/utils/*.sh && \
ln -s $NO_VNC_HOME/vnc_lite.html $NO_VNC_HOME/index.html && \
ln -s /usr/bin/chromium-browser /usr/bin/google-chrome && \
echo "CHROMIUM_FLAGS='--no-sandbox --user-data-dir'" > $HOME/.chromium-browser.init
echo "CHROMIUM_FLAGS='--no-sandbox --user-data-dir'" > $HOME/.chromium-browser.init && \
dpkg -i /tmp/tesseract/tesseract_4.1.1-1_amd64.deb && \
ldconfig && \
cp /tmp/tesseract/*traineddata /usr/local/share/tessdata && \
rm -rf /tmp/tesseract

COPY ./src/common/wm/ $HOME/
COPY ./src/common/config/openbox /etc/xdg/openbox
Expand Down
Binary file added src/tesseract/deu.traineddata
Binary file not shown.
Binary file added src/tesseract/eng.traineddata
Binary file not shown.
Binary file added src/tesseract/tesseract_4.1.1-1_amd64.deb
Binary file not shown.

0 comments on commit 625905d

Please sign in to comment.