piskvorky · menshikh-iv · Oct 9, 2017 · Oct 9, 2017 · Oct 9, 2017 · Oct 9, 2017
diff --git a/docs/notebooks/Tensorboard_visualizations.ipynb b/docs/notebooks/Tensorboard_visualizations.ipynb
@@ -896,7 +896,7 @@
    "source": [
     "import pandas as pd\n",
     "import re\n",
-    "from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation\n",
+    "from gensim.utils.text_utils import remove_stopwords, strip_punctuation\n",
     "from gensim.models import ldamodel\n",
     "from gensim.corpora.dictionary import Dictionary\n",
     "\n",

diff --git a/docs/notebooks/Topic_dendrogram.ipynb b/docs/notebooks/Topic_dendrogram.ipynb
@@ -161,7 +161,7 @@
    "source": [
     "from gensim.models.ldamodel import LdaModel\n",
     "from gensim.corpora import Dictionary\n",
-    "from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation\n",
+    "from gensim.utils.text_utils import remove_stopwords, strip_punctuation\n",
     "\n",
     "import numpy as np\n",
     "import pandas as pd\n",

diff --git a/docs/notebooks/Training_visualizations.ipynb b/docs/notebooks/Training_visualizations.ipynb
@@ -48,7 +48,7 @@
     "from gensim.corpora import Dictionary\n",
     "import pandas as pd\n",
     "import re\n",
-    "from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation\n",
+    "from gensim.utils.text_utils import remove_stopwords, strip_punctuation\n",
     "\n",
     "import numpy as np\n",
     "\n",

diff --git a/docs/notebooks/Wordrank_comparisons.ipynb b/docs/notebooks/Wordrank_comparisons.ipynb
@@ -38,7 +38,7 @@
    ],
    "source": [
     "import nltk\n",
-    "from gensim.parsing.preprocessing import strip_punctuation, strip_multiple_whitespaces\n",
+    "from gensim.utils.text_utils import strip_punctuation, strip_multiple_whitespaces\n",
     "\n",
     "# Only the brown corpus is needed in case you don't have it.\n",
     "nltk.download('brown') \n",

diff --git a/docs/notebooks/summarization_tutorial.ipynb b/docs/notebooks/summarization_tutorial.ipynb
@@ -23,25 +23,21 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:22,358 : INFO : Pattern library is not installed, lemmatization won't be available.\n",
-      "2016-09-19 12:45:22,361 : INFO : Could not import Theano, will use standard float for default ShardedCorpus dtype.\n",
-      "2016-09-19 12:45:22,372 : INFO : 'pattern' package not found; tag filters are not available for English\n"
+      "2017-10-10 10:19:01,237 : INFO : 'pattern' package not found; tag filters are not available for English\n"
      ]
     }
    ],
    "source": [
     "import logging\n",
     "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n",
     "\n",
-    "from gensim.summarization import summarize"
+    "from gensim.models import summarize"
    ]
   },
   {
@@ -54,9 +50,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -99,18 +93,16 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:22,405 : WARNING : Input text is expected to have at least 10 sentences.\n",
-      "2016-09-19 12:45:22,405 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
-      "2016-09-19 12:45:22,406 : INFO : built Dictionary(53 unique tokens: ['realiti', 'averag', 'polic', 'legendari', 'hacker']...) from 6 documents (total 68 corpus positions)\n",
-      "2016-09-19 12:45:22,406 : WARNING : Input corpus is expected to have at least 10 documents.\n"
+      "2017-10-10 10:19:01,259 : WARNING : Input text is expected to have at least 10 sentences.\n",
+      "2017-10-10 10:19:01,260 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
+      "2017-10-10 10:19:01,261 : INFO : built Dictionary(53 unique tokens: [u'electrochem', u'real', u'captur', u'mind', u'agent']...) from 6 documents (total 68 corpus positions)\n",
+      "2017-10-10 10:19:01,262 : WARNING : Input corpus is expected to have at least 10 documents.\n"
      ]
     },
     {
@@ -137,18 +129,16 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:22,428 : WARNING : Input text is expected to have at least 10 sentences.\n",
-      "2016-09-19 12:45:22,428 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
-      "2016-09-19 12:45:22,429 : INFO : built Dictionary(53 unique tokens: ['realiti', 'averag', 'polic', 'legendari', 'hacker']...) from 6 documents (total 68 corpus positions)\n",
-      "2016-09-19 12:45:22,430 : WARNING : Input corpus is expected to have at least 10 documents.\n"
+      "2017-10-10 10:19:01,270 : WARNING : Input text is expected to have at least 10 sentences.\n",
+      "2017-10-10 10:19:01,271 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
+      "2017-10-10 10:19:01,272 : INFO : built Dictionary(53 unique tokens: [u'electrochem', u'real', u'captur', u'mind', u'agent']...) from 6 documents (total 68 corpus positions)\n",
+      "2017-10-10 10:19:01,272 : WARNING : Input corpus is expected to have at least 10 documents.\n"
      ]
     },
     {
@@ -173,18 +163,16 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:22,446 : WARNING : Input text is expected to have at least 10 sentences.\n",
-      "2016-09-19 12:45:22,446 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
-      "2016-09-19 12:45:22,447 : INFO : built Dictionary(53 unique tokens: ['realiti', 'averag', 'polic', 'legendari', 'hacker']...) from 6 documents (total 68 corpus positions)\n",
-      "2016-09-19 12:45:22,447 : WARNING : Input corpus is expected to have at least 10 documents.\n"
+      "2017-10-10 10:19:01,280 : WARNING : Input text is expected to have at least 10 sentences.\n",
+      "2017-10-10 10:19:01,281 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
+      "2017-10-10 10:19:01,282 : INFO : built Dictionary(53 unique tokens: [u'electrochem', u'real', u'captur', u'mind', u'agent']...) from 6 documents (total 68 corpus positions)\n",
+      "2017-10-10 10:19:01,283 : WARNING : Input corpus is expected to have at least 10 documents.\n"
      ]
     },
     {
@@ -213,18 +201,16 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:22,463 : WARNING : Input text is expected to have at least 10 sentences.\n",
-      "2016-09-19 12:45:22,464 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
-      "2016-09-19 12:45:22,464 : INFO : built Dictionary(53 unique tokens: ['realiti', 'averag', 'polic', 'legendari', 'hacker']...) from 6 documents (total 68 corpus positions)\n",
-      "2016-09-19 12:45:22,465 : WARNING : Input corpus is expected to have at least 10 documents.\n"
+      "2017-10-10 10:19:01,290 : WARNING : Input text is expected to have at least 10 sentences.\n",
+      "2017-10-10 10:19:01,291 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
+      "2017-10-10 10:19:01,292 : INFO : built Dictionary(53 unique tokens: [u'electrochem', u'real', u'captur', u'mind', u'agent']...) from 6 documents (total 68 corpus positions)\n",
+      "2017-10-10 10:19:01,293 : WARNING : Input corpus is expected to have at least 10 documents.\n"
      ]
     },
     {
@@ -251,9 +237,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -265,13 +249,13 @@
       "neo\n",
       "humans body\n",
       "super\n",
-      "reality\n",
-      "hacker\n"
+      "hacker\n",
+      "reality\n"
      ]
     }
    ],
    "source": [
-    "from gensim.summarization import keywords\n",
+    "from gensim.models import keywords\n",
     "\n",
     "print ('Keywords:')\n",
     "print (keywords(text))"
@@ -290,18 +274,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 8,
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:22,510 : INFO : Starting new HTTP connection (1): rare-technologies.com\n",
-      "2016-09-19 12:45:23,035 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
-      "2016-09-19 12:45:23,042 : INFO : built Dictionary(1093 unique tokens: ['realiti', 'keanu', 'miseri', 'vestig', 'massiv']...) from 416 documents (total 2985 corpus positions)\n"
+      "2017-10-10 10:19:02,079 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
+      "2017-10-10 10:19:02,089 : INFO : built Dictionary(1093 unique tokens: [u'code', u'squiddi', u'relai', u'dinosaur', u'electron']...) from 416 documents (total 2985 corpus positions)\n"
      ]
     },
     {
@@ -355,16 +336,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 9,
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2016-09-19 12:45:25,227 : INFO : Starting new HTTP connection (1): rare-technologies.com\n"
+      "2017-10-10 10:19:05,119 : INFO : adding document #0 to Dictionary(0 unique tokens: [])\n",
+      "2017-10-10 10:19:05,127 : INFO : built Dictionary(1054 unique tokens: [u'fawn', u'windi', u'concept', u'doctor', u'gant']...) from 227 documents (total 2434 corpus positions)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Summary:\n",
+      "The answering machine records a woman introducing herself as Maude Lebowski and saying that she is the one who took his rug and has sent a car to pick Dude up at his apartment.\n",
+      "As he climbs out of bed to make a White Russian, Maude asks about the apartment and Dude explains that Treehorn's thugs most likely vandalized it looking for Lebowski's money.\n",
+      "\n",
+      "Keywords:\n",
+      "dude\n",
+      "dudes\n",
+      "walter\n",
+      "lebowski\n",
+      "brandt\n",
+      "maude\n",
+      "donny\n",
+      "bunny\n"
      ]
     }
    ],
@@ -413,23 +412,23 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 2",
    "language": "python",
-   "name": "python3"
+   "name": "python2"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 3
+    "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "pygments_lexer": "ipython2",
+   "version": "2.7.13"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/docs/notebooks/topic_network.ipynb b/docs/notebooks/topic_network.ipynb
@@ -27,7 +27,7 @@
     "from gensim.corpora import Dictionary\n",
     "import pandas as pd\n",
     "import re\n",
-    "from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation\n",
+    "from gensim.utils.text_utils import remove_stopwords, strip_punctuation\n",
     "\n",
     "import numpy as np"
    ]

diff --git a/docs/src/apiref.rst b/docs/src/apiref.rst
@@ -9,7 +9,8 @@ Modules:
     :maxdepth: 0
 
     interfaces
-    utils
+    utils/utils
+    utils/text_utils
     matutils
     corpora/bleicorpus
     corpora/csvcorpus
@@ -45,6 +46,20 @@ Modules:
     models/fasttext
     models/phrases
     models/coherencemodel
+    models/_coherence/aggregation
+    models/_coherence/direct_confirmation_measure
+    models/_coherence/indirect_confirmation_measure
+    models/_coherence/probability_estimation
+    models/_coherence/segmentation
+    models/_coherence/text_analysis
+    models/summarization/bm25
+    models/summarization/commons
+    models/summarization/graph
+    models/summarization/keywords
+    models/summarization/pagerank_weighted
+    models/summarization/summariser
+    models/summarization/syntactic_unit
+    models/summarization/textcleaner
     models/basemodel
     models/callbacks
     models/wrappers/ldamallet
@@ -66,26 +81,6 @@ Modules:
     sklearn_api/text2bow
     sklearn_api/tfidf
     sklearn_api/w2vmodel
-    topic_coherence/aggregation
-    topic_coherence/direct_confirmation_measure
-    topic_coherence/indirect_confirmation_measure
-    topic_coherence/probability_estimation
-    topic_coherence/segmentation
-    topic_coherence/text_analysis
     scripts/glove2word2vec
-    scripts/make_wikicorpus
-    scripts/word2vec_standalone
-    scripts/make_wiki_online
-    scripts/make_wiki_online_lemma
-    scripts/make_wiki_online_nodebug
+    scripts/make_wiki
     scripts/word2vec2tensor
-    parsing/porter
-    parsing/preprocessing
-    summarization/bm25
-    summarization/commons
-    summarization/graph
-    summarization/keywords
-    summarization/pagerank_weighted
-    summarization/summariser
-    summarization/syntactic_unit
-    summarization/textcleaner
diff --git a/docs/src/models/_coherence/aggregation.rst b/docs/src/models/_coherence/aggregation.rst
@@ -0,0 +1,9 @@
+:mod:`models._coherence.aggregation` -- Aggregation module
+==========================================================
+
+.. automodule:: gensim.models._coherence.aggregation
+    :synopsis: Aggregation module
+    :members:
+    :inherited-members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/...coherence/direct_confirmation_measure.rst → ...coherence/direct_confirmation_measure.rst b/...coherence/direct_confirmation_measure.rst → ...coherence/direct_confirmation_measure.rst
@@ -1,7 +1,7 @@
-:mod:`topic_coherence.direct_confirmation_measure` -- Direct confirmation measure module
-========================================================================================
+:mod:`models._coherence.direct_confirmation_measure` -- Direct confirmation measure module
+==========================================================================================
 
-.. automodule:: gensim.topic_coherence.direct_confirmation_measure
+.. automodule:: gensim.models._coherence.direct_confirmation_measure
     :synopsis: Direct confirmation measure module
     :members:
     :inherited-members:

diff --git a/...herence/indirect_confirmation_measure.rst → ...herence/indirect_confirmation_measure.rst b/...herence/indirect_confirmation_measure.rst → ...herence/indirect_confirmation_measure.rst
@@ -1,7 +1,7 @@
-:mod:`topic_coherence.indirect_confirmation_measure` -- Indirect confirmation measure module
-============================================================================================
+:mod:`models._coherence.indirect_confirmation_measure` -- Indirect confirmation measure module
+==============================================================================================
 
-.. automodule:: gensim.topic_coherence.indirect_confirmation_measure
+.. automodule:: gensim.models._coherence.indirect_confirmation_measure
     :synopsis: Indirect confirmation measure module
     :members:
     :inherited-members: