Skip to content

Commit

Permalink
Merge branch 'release-2.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
menshikh-iv committed Jul 25, 2017
2 parents 1b8fdaf + b4d2cd2 commit 2cdf685
Show file tree
Hide file tree
Showing 140 changed files with 14,409 additions and 6,640 deletions.
12 changes: 4 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,12 @@ before_install:
- export PATH=/home/travis/miniconda2/bin:$PATH
- conda update --yes conda
install:
- conda create --yes -n gensim-test python=$TRAVIS_PYTHON_VERSION pip atlas numpy scipy
- conda create --yes -n gensim-test python=$TRAVIS_PYTHON_VERSION pip atlas numpy==1.11.3 scipy==0.18.1
- source activate gensim-test
- pip install pyemd
- pip install annoy
- pip install testfixtures
- pip install unittest2
- pip install scikit-learn
- pip install Morfessor==2.0.2a4
- python setup.py install
script:
- pip install .[test]
script:
- pip freeze
- python setup.py test
- pip install flake8
- continuous_integration/travis/flake8_diff.sh
391 changes: 251 additions & 140 deletions CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ Adopters
| Stillwater Supercomputing | <img src="http://www.stillwater-sc.com/img/stillwater-logo.png" width="100"> | [stillwater-sc.com](http://www.stillwater-sc.com/) | Document comprehension and association with word2vec |
| Channel 4 | <img src="http://www.channel4.com/static/info/images/lib/c4logo_2015_info_corporate.jpg" width="100"> | [channel4.com](http://www.channel4.com/) | Recommendation engine |
| Amazon | <img src="http://g-ec2.images-amazon.com/images/G/01/social/api-share/amazon_logo_500500._V323939215_.png" width="100"> | [amazon.com](http://www.amazon.com/) | Document similarity|
| SiteGround Hosting | <img src="https://www.siteground.com/img/knox/logos/siteground.png" width="100"> | [siteground.com](https://www.siteground.com/) | An ensemble search engine which uses different embeddings models and similarities, including word2vec, WMD, and LDA. |
| Juju | <img src="https://d5k1a84rm5hwo.cloudfront.net/img/juju_home_logo.png" width="100"> | [www.juju.com](http://www.juju.com/) | Provide non-obvious related job suggestions. |

-------

Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ test_script:
# installed library.
- "mkdir empty_folder"
- "cd empty_folder"
- "pip install pyemd testfixtures unittest2 sklearn Morfessor==2.0.2a4"

- "pip install pyemd testfixtures sklearn Morfessor==2.0.2a4"
- "pip freeze"
- "python -c \"import nose; nose.main()\" -s -v gensim"
# Move back to the project folder
- "cd .."
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/appveyor/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# fix the versions of numpy to force the use of numpy and scipy to use the whl
# of the rackspace folder instead of trying to install from more recent
# source tarball published on PyPI
numpy==1.9.3
scipy==0.16.0
numpy==1.11.3
scipy==0.18.1
cython
six >= 1.5.0
smart_open >= 1.2.1
Expand Down
5 changes: 3 additions & 2 deletions continuous_integration/travis/flake8_diff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
echo '--------------------------------------------------------------------------------'

# We ignore files from sklearn/externals.
# Excluding vec files since they contain non-utf8 content and flake8 raises exception for non-utf8 input
# We need the following command to exit with 0 hence the echo in case
# there is no match
MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")"
MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE -- . ':(exclude)*.vec' || echo "no_match")"

check_files() {
files="$1"
Expand All @@ -133,6 +134,6 @@ check_files() {
if [[ "$MODIFIED_FILES" == "no_match" ]]; then
echo "No file has been modified"
else
check_files "$(echo "$MODIFIED_FILES" )" "--ignore=E501,E731,E12,W503 --exclude=*.sh,*.md,*.yml,*.rst,*.ipynb"
check_files "$(echo "$MODIFIED_FILES" )" "--ignore=E501,E731,E12,W503 --exclude=*.sh,*.md,*.yml,*.rst,*.ipynb,*.txt,*.csv,*.vec,Dockerfile*,*.c,*.pyx,*.inc"
fi
echo -e "No problem detected by flake8\n"
163 changes: 163 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
FROM ubuntu:16.04

MAINTAINER Parul Sethi <parul1sethi@gmail.com>

ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim.git
ENV GENSIM_BRANCH develop

# Installs python, pip and setup tools (with fixed versions)
RUN apt-get update \
&& apt-get install -y \
ant=1.9.6-1ubuntu1 \
cmake=3.5.1-1ubuntu3 \
default-jdk=2:1.8-56ubuntu2 \
g++=4:5.3.1-1ubuntu1 \
git=1:2.7.4-0ubuntu1 \
libboost-all-dev=1.58.0.1ubuntu1 \
libgsl-dev=2.1+dfsg-2 \
mercurial=3.7.3-1ubuntu1 \
python3=3.5.1-3 \
python3-pip=8.1.1-2ubuntu0.4 \
python3-setuptools=20.7.0-1 \
python=2.7.11-1 \
python-pip=8.1.1-2ubuntu0.4 \
python-setuptools=20.7.0-1 \
unzip=6.0-20ubuntu1 \
wget=1.17.1-1ubuntu1.1 \
subversion=1.9.3-2ubuntu1 \
locales=2.23-0ubuntu9 \
libopenblas-dev=0.2.18-1ubuntu1 \
libboost-program-options-dev=1.58.0.1ubuntu1 \
zlib1g-dev=1:1.2.8.dfsg-2ubuntu4.1

# Setup python language
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_ALL en_US.UTF-8

# Upgrade pip
RUN pip2 install --upgrade pip
RUN pip3 install --upgrade pip

# Install dependencies
RUN pip2 install \
cython==0.25.2 \
jupyter==1.0.0 \
matplotlib==2.0.0 \
nltk==3.2.2 \
pandas==0.19.2 \
spacy==1.8.1 \
git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \
-r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt

RUN pip3 install \
cython==0.25.2 \
jupyter==1.0.0 \
matplotlib==2.0.0 \
nltk==3.2.2 \
pandas==0.19.2 \
spacy==1.8.1 \
git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \
-r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt

# avoid using old numpy version installed by blocks requirements
RUN pip2 install -U numpy
RUN pip3 install -U numpy

# Download english model of Spacy
RUN python2 -m spacy download en
RUN python3 -m spacy download en

# Download gensim from Github
RUN git clone $GENSIM_REPOSITORY \
&& cd /gensim \
&& git checkout $GENSIM_BRANCH \
&& pip2 install .[test] \
&& python2 setup.py install \
&& pip3 install .[test] \
&& python3 setup.py install

# Create gensim dependencies directory
RUN mkdir /gensim/gensim_dependencies

# Set ENV variables for wrappers
ENV WR_HOME /gensim/gensim_dependencies/wordrank
ENV FT_HOME /gensim/gensim_dependencies/fastText
ENV MALLET_HOME /gensim/gensim_dependencies/mallet
ENV DTM_PATH /gensim/gensim_dependencies/dtm/dtm/main
ENV VOWPAL_WABBIT_PATH /gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw

# For fixed version downloads of gensim wrappers dependencies
ENV WORDRANK_VERSION 44f3f7786f76c79c083dfad9d64e20bacfb4a0b0
ENV FASTTEXT_VERSION f24a781021862f0e475a5fb9c55b7c1cec3b6e2e
ENV MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION ec2e37a3bcb8bd7b56b75b043c47076bc5decf22
ENV DTM_VERSION 67139e6f526b2bc33aef56dc36176a1b8b210056
ENV MALLET_VERSION 2.0.8
ENV VOWPAL_WABBIT_VERSION 69ecc2847fa0c876c6e0557af409f386f0ced59a

# Install custom dependencies

# Install mpich (a wordrank dependency) and remove openmpi to avoid mpirun conflict
RUN apt-get purge -y openmpi-common openmpi-bin libopenmpi1.10
RUN apt-get install -y mpich

# Install wordrank
RUN cd /gensim/gensim_dependencies \
&& git clone https://bitbucket.org/shihaoji/wordrank \
&& cd /gensim/gensim_dependencies/wordrank \
&& git checkout $WORDRANK_VERSION \
&& sed -i -e 's/#export CC=gcc CXX=g++/export CC=gcc CXX=g++/g' install.sh \
&& sh ./install.sh

# Install fastText
RUN cd /gensim/gensim_dependencies \
&& git clone https://github.com/facebookresearch/fastText.git \
&& cd /gensim/gensim_dependencies/fastText \
&& git checkout $FASTTEXT_VERSION \
&& make

# Install MorphologicalPriorsForWordEmbeddings
RUN cd /gensim/gensim_dependencies \
&& git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git \
&& cd /gensim/gensim_dependencies/MorphologicalPriorsForWordEmbeddings \
&& git checkout $MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION

# Install DTM
RUN cd /gensim/gensim_dependencies \
&& git clone https://github.com/blei-lab/dtm.git \
&& cd /gensim/gensim_dependencies/dtm/dtm \
&& git checkout $DTM_VERSION \
&& make

# Install Mallet
RUN mkdir /gensim/gensim_dependencies/mallet \
&& mkdir /gensim/gensim_dependencies/download \
&& cd /gensim/gensim_dependencies/download \
&& wget --quiet http://mallet.cs.umass.edu/dist/mallet-$MALLET_VERSION.zip \
&& unzip mallet-$MALLET_VERSION.zip \
&& mv ./mallet-$MALLET_VERSION/* /gensim/gensim_dependencies/mallet \
&& rm -rf /gensim/gensim_dependencies/download \
&& cd /gensim/gensim_dependencies/mallet \
&& ant

# Install Vowpal wabbit
RUN cd /gensim/gensim_dependencies \
&& git clone https://github.com/JohnLangford/vowpal_wabbit.git \
&& cd /gensim/gensim_dependencies/vowpal_wabbit \
&& git checkout $VOWPAL_WABBIT_VERSION \
&& make \
&& make install

# Start gensim

# Run check script
RUN python2 /gensim/docker/check_fast_version.py
RUN python3 /gensim/docker/check_fast_version.py

# Add running permission to startup script
RUN chmod +x /gensim/docker/start_jupyter_notebook.sh

# Define the starting command for this container and expose its running port
CMD sh -c '/gensim/docker/start_jupyter_notebook.sh 9000'
EXPOSE 9000
21 changes: 21 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Build gensim image

In docker directory run the following command to build the image locally:

```
docker build -t gensim .
```

# Run ipython notebook with installed gensim

Just execute:

```
docker run -p 9000:9000 gensim
```

# Run the interactive bash mode

```
docker run -it gensim /bin/bash
```
10 changes: 10 additions & 0 deletions docker/check_fast_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import sys

try:
from gensim.models.word2vec_inner import FAST_VERSION

print('FAST_VERSION ok ! Retrieved with value ', FAST_VERSION)
sys.exit()
except ImportError:
print('Failed... fall back to plain numpy (20-80x slower training than the above)')
sys.exit(-1)
7 changes: 7 additions & 0 deletions docker/start_jupyter_notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

PORT=$1
NOTEBOOK_DIR=/gensim/docs/notebooks
DEFAULT_URL=/notebooks/gensim%20Quick%20Start.ipynb

jupyter notebook --no-browser --ip=* --port=$PORT --allow-root --notebook-dir=$NOTEBOOK_DIR --NotebookApp.token=\"\" --NotebookApp.default_url=$DEFAULT_URL
Loading

0 comments on commit 2cdf685

Please sign in to comment.