diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..809bbea --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +FROM ubuntu:18.04 + +RUN apt-get update && apt-get -y upgrade +RUN apt-get install -y python3 python3-dev python3-pip +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ + python3-requests python3-numpy python3-pandas + +# Some tools are in perl or java +RUN apt-get install -y perl default-jre-headless + +# Use pip to install less-stable dependencies +RUN pip3 install --no-cache-dir scikit-learn==0.20.2 + +# xgboost build requires cmake +RUN apt-get install -y cmake +RUN pip3 install --no-cache-dir xgboost==0.90 + +RUN pip3 install joblib depedit + +WORKDIR /usr/src/coptic-nlp + +COPY . . + +# Additional non-python dependencies +RUN apt-get install -y foma-bin +RUN ln -s /usr/bin/flookup bin/foma/ +RUN apt-get install -y wget +RUN echo '456548f7cc7b84aec6b639826d9a3ca76ad72b310247bf744780b8f6a28c1aee maltparser-1.8.tar.gz' > bin/maltparser-1.8.sha2 +RUN cd bin && wget http://maltparser.org/dist/maltparser-1.8.tar.gz +RUN cd bin && shasum -c maltparser-1.8.sha2 +RUN cd bin && tar xf maltparser-1.8.tar.gz +RUN cp bin/coptic.mco bin/maltparser-1.8/coptic.mco + +RUN python3 run_tests.py + +CMD [ "python3", "coptic_nlp.py" ] diff --git a/README.md b/README.md index 57e73e8..1cddf64 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,10 @@ The NLP pipeline will run on Python 2.7+ or Python 3.5+ (2.6 and lower are not s * requests * numpy * pandas - * scikit-learn==0.19.0 + * scikit-learn==0.20.2 + * xgboost==0.90 -You should be able to install these manually via pip if necessary (i.e. `pip install scikit-learn==0.19.0`). +You should be able to install these manually via pip if necessary (i.e. `pip install scikit-learn==0.20.2`). Note that some versions of Python + Windows do not install numpy correctly from pip, in which case you can download compiled binaries for your version of Python + Windows here: https://www.lfd.uci.edu/~gohlke/pythonlibs/, then run for example: diff --git a/requirements.txt b/requirements.txt index be9f786..4ca467f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ requests pandas -scikit-learn==0.19.0 -xgboost +scikit-learn==0.20.2 +xgboost==0.90 joblib -depedit \ No newline at end of file +depedit