From f67b095435ab91e37bb07c5139e33ded631bea6b Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Sun, 18 Jun 2017 13:39:08 -0700
Subject: [PATCH 1/8] updated ipynb for new sklearn wrappers

---
 docs/notebooks/sklearn_wrapper.ipynb | 308 +++++++++++++--------------
 1 file changed, 150 insertions(+), 158 deletions(-)

diff --git a/docs/notebooks/sklearn_wrapper.ipynb b/docs/notebooks/sklearn_wrapper.ipynb
index cc5e85d3a2..6bf336b08e 100644
--- a/docs/notebooks/sklearn_wrapper.ipynb
+++ b/docs/notebooks/sklearn_wrapper.ipynb
@@ -18,17 +18,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The wrapper available (as of now) are :\n",
-    "* LdaModel (```gensim.sklearn_integration.sklearn_wrapper_gensim_ldaModel.SklearnWrapperLdaModel```),which implements gensim's ```LdaModel``` in a scikit-learn interface\n",
+    "The wrappers available (as of now) are :\n",
+    "* LdaModel (```gensim.sklearn_integration.sklearn_wrapper_gensim_ldaModel.SklLdaModel```),which implements gensim's ```LDA Model``` in a scikit-learn interface\n",
     "\n",
-    "* LsiModel (```gensim.sklearn_integration.sklearn_wrapper_gensim_lsiModel.SklearnWrapperLsiModel```),which implements gensim's ```LsiModel``` in a scikit-learn interface"
+    "* LsiModel (```gensim.sklearn_integration.sklearn_wrapper_gensim_lsiModel.SklLsiModel```),which implements gensim's ```LSI Model``` in a scikit-learn interface\n",
+    "\n",
+    "* RpModel (```gensim.sklearn_integration.sklearn_wrapper_gensim_rpmodel.SklRpModel```),which implements gensim's ```Random Projections Model``` in a scikit-learn interface\n",
+    "\n",
+    "* LDASeq Model (```gensim.sklearn_integration.sklearn_wrapper_gensim_lsiModel.SklLdaSeqModel```),which implements gensim's ```LdaSeqModel``` in a scikit-learn interface"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### LdaModel"
+    "### LDA Model"
    ]
   },
   {
@@ -40,13 +44,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
    "outputs": [],
    "source": [
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel import SklearnWrapperLdaModel"
+    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel import SklLdaModel"
    ]
   },
   {
@@ -58,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -89,41 +93,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "array([[ 0.85275314,  0.14724686],\n",
-       "       [ 0.12390183,  0.87609817],\n",
-       "       [ 0.4612995 ,  0.5387005 ],\n",
-       "       [ 0.84924177,  0.15075823],\n",
-       "       [ 0.49180096,  0.50819904],\n",
-       "       [ 0.40086923,  0.59913077],\n",
-       "       [ 0.28454427,  0.71545573],\n",
-       "       [ 0.88776198,  0.11223802],\n",
-       "       [ 0.84210373,  0.15789627]])"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model=SklearnWrapperLdaModel(num_topics=2, id2word=dictionary, iterations=20, random_state=1)\n",
+   "outputs": [],
+   "source": [
+    "model = SklLdaModel(num_topics=2, id2word=dictionary, iterations=20, random_state=1)\n",
     "model.fit(corpus)\n",
-    "model.print_topics(2)\n",
     "model.transform(corpus)"
    ]
   },
@@ -145,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -156,12 +133,12 @@
     "from gensim.models.ldamodel import LdaModel\n",
     "from sklearn.datasets import fetch_20newsgroups\n",
     "from sklearn.feature_extraction.text import CountVectorizer\n",
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel import SklearnWrapperLdaModel"
+    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel import SklLdaModel"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -181,7 +158,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -204,35 +181,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[(0,\n",
-       "  u'0.025*\"456\" + 0.021*\"argue\" + 0.016*\"bitnet\" + 0.015*\"beastmaster\" + 0.014*\"cryptography\" + 0.013*\"false\" + 0.012*\"digex\" + 0.011*\"cover\" + 0.011*\"classified\" + 0.010*\"disk\"'),\n",
-       " (1,\n",
-       "  u'0.142*\"abroad\" + 0.113*\"asking\" + 0.088*\"cryptography\" + 0.044*\"ciphertext\" + 0.043*\"arithmetic\" + 0.032*\"courtesy\" + 0.030*\"facts\" + 0.021*\"argue\" + 0.019*\"amolitor\" + 0.018*\"agree\"'),\n",
-       " (2,\n",
-       "  u'0.034*\"certain\" + 0.027*\"69\" + 0.025*\"book\" + 0.025*\"demand\" + 0.024*\"87\" + 0.024*\"cracking\" + 0.021*\"farm\" + 0.019*\"fierkelab\" + 0.015*\"face\" + 0.011*\"abroad\"'),\n",
-       " (3,\n",
-       "  u'0.017*\"decipher\" + 0.017*\"example\" + 0.016*\"cases\" + 0.016*\"follow\" + 0.008*\"considering\" + 0.006*\"forgot\" + 0.006*\"cellular\" + 0.005*\"evans\" + 0.005*\"computed\" + 0.005*\"cia\"'),\n",
-       " (4,\n",
-       "  u'0.022*\"accurate\" + 0.021*\"corporate\" + 0.013*\"chance\" + 0.012*\"clark\" + 0.009*\"consideration\" + 0.009*\"candidates\" + 0.008*\"dawson\" + 0.008*\"authentication\" + 0.008*\"assess\" + 0.008*\"attempt\"')]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "obj = SklearnWrapperLdaModel(id2word=id2word, num_topics=5, passes=20)\n",
-    "lda = obj.fit(X)\n",
-    "lda.print_topics()"
+   "outputs": [],
+   "source": [
+    "obj = SklLdaModel(id2word=id2word, num_topics=5, passes=20)\n",
+    "lda = obj.fit(X)"
    ]
   },
   {
@@ -246,19 +202,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
    "outputs": [],
    "source": [
-    "from sklearn.model_selection  import GridSearchCV\n",
+    "from sklearn.model_selection import GridSearchCV\n",
     "from gensim.models.coherencemodel import CoherenceModel"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -271,33 +227,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "GridSearchCV(cv=5, error_score='raise',\n",
-       "       estimator=SklearnWrapperLdaModel(alpha='symmetric', chunksize=2000, corpus=None,\n",
-       "            decay=0.5, eta=None, eval_every=10, gamma_threshold=0.001,\n",
-       "            id2word=<gensim.corpora.dictionary.Dictionary object at 0x7f42ccbebd10>,\n",
-       "            iterations=50, minimum_probability=0.01, num_topics=5,\n",
-       "            offset=1.0, passes=20, random_state=None, update_every=1),\n",
-       "       fit_params={}, iid=True, n_jobs=1,\n",
-       "       param_grid={'num_topics': (2, 3, 5, 10), 'iterations': (1, 20, 50)},\n",
-       "       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
-       "       scoring=<function scorer at 0x7f42cad12230>, verbose=0)"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "obj = SklearnWrapperLdaModel(id2word=dictionary, num_topics=5, passes=20)\n",
+   "outputs": [],
+   "source": [
+    "obj = SklLdaModel(id2word=dictionary, num_topics=5, passes=20)\n",
     "parameters = {'num_topics': (2, 3, 5, 10), 'iterations': (1, 20, 50)}\n",
     "model = GridSearchCV(obj, parameters, scoring=scorer, cv=5)\n",
     "model.fit(corpus)"
@@ -305,22 +241,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'iterations': 20, 'num_topics': 3}"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "model.best_params_"
    ]
@@ -334,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -343,7 +268,6 @@
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn import linear_model\n",
     "\n",
-    "\n",
     "def print_features_pipe(clf, vocab, n=10):\n",
     "    ''' Better printing for sorted list '''\n",
     "    coef = clf.named_steps['classifier'].coef_[0]\n",
@@ -354,7 +278,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -366,46 +290,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[ -2.95020466e-01  -1.04115352e-01   5.19570267e-01   1.03817059e-01\n",
-      "   2.72881013e-02   1.35738501e-02   1.89246630e-13   1.89246630e-13\n",
-      "   1.89246630e-13   1.89246630e-13   1.89246630e-13   1.89246630e-13\n",
-      "   1.89246630e-13   1.89246630e-13   1.89246630e-13]\n",
-      "Positive features: Fame,:0.52 Keach:0.10 comp.org.eff.talk,:0.03 comp.org.eff.talk.:0.01 >Pat:0.00 dome.:0.00 internet...:0.00 trawling:0.00 hanging:0.00 red@redpoll.neoucom.edu:0.00\n",
-      "Negative features: Fame.:-0.30 considered,:-0.10\n",
-      "0.531040268456\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = SklearnWrapperLdaModel(num_topics=15, id2word=id2word, iterations=50, random_state=37)\n",
+   "outputs": [],
+   "source": [
+    "model = SklLdaModel(num_topics=15, id2word=id2word, iterations=50, random_state=37)\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
     "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
     "pipe.fit(corpus, data.target)\n",
     "print_features_pipe(pipe, id2word.values())\n",
-    "print pipe.score(corpus, data.target)"
+    "print(pipe.score(corpus, data.target))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### LsiModel"
+    "### LSI Model"
    ]
   },
   {
@@ -417,13 +320,61 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_integration.sklearn_wrapper_gensim_lsimodel import SklLsiModel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "model = SklLsiModel(num_topics=15, id2word=id2word)\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
+    "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
+    "pipe.fit(corpus, data.target)\n",
+    "print_features_pipe(pipe, id2word.values())\n",
+    "print(pipe.score(corpus, data.target))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Random Projections Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use RpModel begin with importing RpModel wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_lsimodel import SklearnWrapperLsiModel"
+    "from gensim.sklearn_integration.sklearn_wrapper_gensim_rpmodel import SklRpModel"
    ]
   },
   {
@@ -435,31 +386,72 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[ 0.13652819  0.00383696  0.02635504 -0.08454895 -0.02356143  0.60020084\n",
-      "  1.07026252 -0.04072257  0.43732847  0.54913549 -0.20242834 -0.21855402\n",
-      " -1.30546283 -0.08690711  0.17606255]\n",
-      "Positive features: 01101001B:1.07 comp.org.eff.talk.:0.60 red@redpoll.neoucom.edu:0.55 circuitry:0.44 >Pat:0.18 Fame.:0.14 Fame,:0.03 considered,:0.00\n",
-      "Negative features: internet...:-1.31 trawling:-0.22 hanging:-0.20 dome.:-0.09 Keach:-0.08 *best*:-0.04 comp.org.eff.talk,:-0.02\n",
-      "0.865771812081\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = SklearnWrapperLsiModel(num_topics=15, id2word=id2word)\n",
+   "outputs": [],
+   "source": [
+    "model = SklRpModel(num_topics=2)\n",
+    "numpy.random.mtrand.RandomState(1)  # set seed for getting same result\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
     "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
     "pipe.fit(corpus, data.target)\n",
     "print_features_pipe(pipe, id2word.values())\n",
-    "print pipe.score(corpus, data.target)"
+    "print(pipe.score(corpus, data.target))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### LDASeq Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use LdaSeqModel begin with importing LdaSeqModel wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldaseqmodel import SklLdaSeqModel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "test_data = data.data[0:2]\n",
+    "test_target = data.target[0:2]\n",
+    "id2word = Dictionary(map(lambda x: x.split(), test_data))\n",
+    "corpus = [id2word.doc2bow(i.split()) for i in test_data]\n",
+    "\n",
+    "model = SklLdaSeqModel(id2word=id2word, num_topics=2, time_slice=[1, 1, 1], initialize='gensim')\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
+    "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
+    "pipe.fit(corpus, test_target)\n",
+    "print_features_pipe(pipe, id2word.values())\n",
+    "print(pipe.score(corpus, test_target))"
    ]
   }
  ],

From 861dc8bd1739de3d0ee84eea9f9cef868b00813f Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 01:36:49 +0530
Subject: [PATCH 2/8] included skl wrapper classes in '__init__.py'

---
 gensim/sklearn_integration/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/gensim/sklearn_integration/__init__.py b/gensim/sklearn_integration/__init__.py
index c3227cbdef..bad381a306 100644
--- a/gensim/sklearn_integration/__init__.py
+++ b/gensim/sklearn_integration/__init__.py
@@ -8,3 +8,10 @@
 See [1] for complete set of conventions.
 [1] http://scikit-learn.org/stable/developers/
 """
+
+
+from .base_sklearn_wrapper import BaseSklearnWrapper
+from .sklearn_wrapper_gensim_ldamodel import SklearnWrapperLdaModel
+from .sklearn_wrapper_gensim_lsimodel import SklearnWrapperLsiModel
+from .sklearn_wrapper_gensim_rpmodel import SklRpModel
+from .sklearn_wrapper_gensim_ldaseqmodel import SklLdaSeqModel

From 539adb4aeacc7e0ff8197ba690dc438b3593fb05 Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 01:37:30 +0530
Subject: [PATCH 3/8] shortened import statements for skl wrapper classes

---
 docs/notebooks/sklearn_wrapper.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/notebooks/sklearn_wrapper.ipynb b/docs/notebooks/sklearn_wrapper.ipynb
index 6bf336b08e..8340239669 100644
--- a/docs/notebooks/sklearn_wrapper.ipynb
+++ b/docs/notebooks/sklearn_wrapper.ipynb
@@ -50,7 +50,7 @@
    },
    "outputs": [],
    "source": [
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel import SklLdaModel"
+    "from gensim.sklearn_integration import SklLdaModel"
    ]
   },
   {
@@ -326,7 +326,7 @@
    },
    "outputs": [],
    "source": [
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_lsimodel import SklLsiModel"
+    "from gensim.sklearn_integration import SklLsiModel"
    ]
   },
   {
@@ -374,7 +374,7 @@
    },
    "outputs": [],
    "source": [
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_rpmodel import SklRpModel"
+    "from gensim.sklearn_integration import SklRpModel"
    ]
   },
   {
@@ -423,7 +423,7 @@
    },
    "outputs": [],
    "source": [
-    "from gensim.sklearn_integration.sklearn_wrapper_gensim_ldaseqmodel import SklLdaSeqModel"
+    "from gensim.sklearn_integration import SklLdaSeqModel"
    ]
   },
   {

From 554f941c1ac3d36255ff99064518a563936bdbc1 Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 14:55:35 -0700
Subject: [PATCH 4/8] changes for __init__.py file

---
 gensim/sklearn_integration/__init__.py                        | 4 ++--
 gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py | 4 ++--
 .../sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py | 4 ++--
 gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py | 4 ++--
 gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py  | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gensim/sklearn_integration/__init__.py b/gensim/sklearn_integration/__init__.py
index bad381a306..f7f5e749c5 100644
--- a/gensim/sklearn_integration/__init__.py
+++ b/gensim/sklearn_integration/__init__.py
@@ -11,7 +11,7 @@
 
 
 from .base_sklearn_wrapper import BaseSklearnWrapper
-from .sklearn_wrapper_gensim_ldamodel import SklearnWrapperLdaModel
-from .sklearn_wrapper_gensim_lsimodel import SklearnWrapperLsiModel
+from .sklearn_wrapper_gensim_ldamodel import SklLdaModel
+from .sklearn_wrapper_gensim_lsimodel import SklLsiModel
 from .sklearn_wrapper_gensim_rpmodel import SklRpModel
 from .sklearn_wrapper_gensim_ldaseqmodel import SklLdaSeqModel
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py
index a158e5f71d..3e5cae4c9c 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py
@@ -16,10 +16,10 @@
 
 from gensim import models
 from gensim import matutils
-from gensim.sklearn_integration import base_sklearn_wrapper
+from gensim.sklearn_integration import BaseSklearnWrapper
 
 
-class SklLdaModel(base_sklearn_wrapper.BaseSklearnWrapper, TransformerMixin, BaseEstimator):
+class SklLdaModel(BaseSklearnWrapper, TransformerMixin, BaseEstimator):
     """
     Base LDA module
     """
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py
index fdf9e58a10..e783784d31 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py
@@ -14,10 +14,10 @@
 from sklearn.exceptions import NotFittedError
 
 from gensim import models
-from gensim.sklearn_integration import base_sklearn_wrapper
+from gensim.sklearn_integration import BaseSklearnWrapper
 
 
-class SklLdaSeqModel(base_sklearn_wrapper.BaseSklearnWrapper, TransformerMixin, BaseEstimator):
+class SklLdaSeqModel(BaseSklearnWrapper, TransformerMixin, BaseEstimator):
     """
     Base LdaSeq module
     """
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py
index 9b93e3a37f..b5f2809a74 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py
@@ -16,10 +16,10 @@
 
 from gensim import models
 from gensim import matutils
-from gensim.sklearn_integration import base_sklearn_wrapper
+from gensim.sklearn_integration import BaseSklearnWrapper
 
 
-class SklLsiModel(base_sklearn_wrapper.BaseSklearnWrapper, TransformerMixin, BaseEstimator):
+class SklLsiModel(BaseSklearnWrapper, TransformerMixin, BaseEstimator):
     """
     Base LSI module
     """
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py
index e98d64aa97..5ece879bed 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py
@@ -14,10 +14,10 @@
 from sklearn.exceptions import NotFittedError
 
 from gensim import models
-from gensim.sklearn_integration import base_sklearn_wrapper
+from gensim.sklearn_integration import BaseSklearnWrapper
 
 
-class SklRpModel(base_sklearn_wrapper.BaseSklearnWrapper, TransformerMixin, BaseEstimator):
+class SklRpModel(BaseSklearnWrapper, TransformerMixin, BaseEstimator):
     """
     Base RP module
     """

From 088d3d7b89e761c6f5c3fc4024d0631abc51aec8 Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 14:56:06 -0700
Subject: [PATCH 5/8] added output in sklearn wrappers ipynb

---
 docs/notebooks/sklearn_wrapper.ipynb | 169 ++++++++++++++++++++++-----
 1 file changed, 138 insertions(+), 31 deletions(-)

diff --git a/docs/notebooks/sklearn_wrapper.ipynb b/docs/notebooks/sklearn_wrapper.ipynb
index 8340239669..4f4635389f 100644
--- a/docs/notebooks/sklearn_wrapper.ipynb
+++ b/docs/notebooks/sklearn_wrapper.ipynb
@@ -44,11 +44,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
    "source": [
     "from gensim.sklearn_integration import SklLdaModel"
    ]
@@ -62,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
@@ -93,11 +101,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.85275314,  0.14724686],\n",
+       "       [ 0.12390183,  0.87609817],\n",
+       "       [ 0.4612995 ,  0.5387005 ],\n",
+       "       [ 0.84924177,  0.15075823],\n",
+       "       [ 0.49180096,  0.50819904],\n",
+       "       [ 0.40086923,  0.59913077],\n",
+       "       [ 0.28454427,  0.71545573],\n",
+       "       [ 0.88776198,  0.11223802],\n",
+       "       [ 0.84210373,  0.15789627]])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "model = SklLdaModel(num_topics=2, id2word=dictionary, iterations=20, random_state=1)\n",
     "model.fit(corpus)\n",
@@ -122,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false
    },
@@ -138,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false
    },
@@ -158,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "collapsed": false
    },
@@ -181,13 +215,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
-   "source": [
-    "obj = SklLdaModel(id2word=id2word, num_topics=5, passes=20)\n",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
+     ]
+    }
+   ],
+   "source": [
+    "obj = SklLdaModel(id2word=id2word, num_topics=5, iterations=20)\n",
     "lda = obj.fit(X)"
    ]
   },
@@ -202,7 +244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {
     "collapsed": false
    },
@@ -214,14 +256,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "def scorer(estimator, X, y=None):\n",
-    "    goodcm = CoherenceModel(model=estimator, texts= texts, dictionary=estimator.id2word, coherence='c_v')\n",
+    "    goodcm = CoherenceModel(model=estimator.gensim_model, texts= texts, dictionary=estimator.gensim_model.id2word, coherence='c_v')\n",
     "    return goodcm.get_coherence()"
    ]
   },
@@ -233,9 +275,9 @@
    },
    "outputs": [],
    "source": [
-    "obj = SklLdaModel(id2word=dictionary, num_topics=5, passes=20)\n",
+    "obj = SklLdaModel(id2word=dictionary, num_topics=5, iterations=20)\n",
     "parameters = {'num_topics': (2, 3, 5, 10), 'iterations': (1, 20, 50)}\n",
-    "model = GridSearchCV(obj, parameters, scoring=scorer, cv=5)\n",
+    "model = GridSearchCV(obj, parameters, scoring=scorer, cv=2)\n",
     "model.fit(corpus)"
    ]
   },
@@ -259,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {
     "collapsed": false
    },
@@ -278,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {
     "collapsed": false
    },
@@ -290,11 +332,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ -2.95020466e-01  -1.04115352e-01   5.19570267e-01   1.03817059e-01\n",
+      "   2.72881013e-02   1.35738501e-02   1.89246630e-13   1.89246630e-13\n",
+      "   1.89246630e-13   1.89246630e-13   1.89246630e-13   1.89246630e-13\n",
+      "   1.89246630e-13   1.89246630e-13   1.89246630e-13]\n",
+      "Positive features: Fame,:0.52 Keach:0.10 comp.org.eff.talk,:0.03 comp.org.eff.talk.:0.01 >Pat:0.00 dome.:0.00 internet...:0.00 trawling:0.00 hanging:0.00 red@redpoll.neoucom.edu:0.00\n",
+      "Negative features: Fame.:-0.30 considered,:-0.10\n",
+      "0.531040268456\n"
+     ]
+    }
+   ],
    "source": [
     "model = SklLdaModel(num_topics=15, id2word=id2word, iterations=50, random_state=37)\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
@@ -320,7 +383,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {
     "collapsed": false
    },
@@ -338,11 +401,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0.13651844 -0.0038155   0.0264238   0.08494405 -0.02384796 -0.60051921\n",
+      " -1.07079081  0.04000798  0.43845983 -0.54894361  0.2017333  -0.21800463\n",
+      "  1.3045325   0.08672903 -0.17578455]\n",
+      "Positive features: internet...:1.30 circuitry:0.44 hanging:0.20 Fame.:0.14 dome.:0.09 Keach:0.08 *best*:0.04 Fame,:0.03\n",
+      "Negative features: 01101001B:-1.07 comp.org.eff.talk.:-0.60 red@redpoll.neoucom.edu:-0.55 trawling:-0.22 >Pat:-0.18 comp.org.eff.talk,:-0.02 considered,:-0.00\n",
+      "0.865771812081\n"
+     ]
+    }
+   ],
    "source": [
     "model = SklLsiModel(num_topics=15, id2word=id2word)\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
@@ -368,7 +444,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {
     "collapsed": true
    },
@@ -386,14 +462,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0.01241958 -0.01335879]\n",
+      "Positive features: Fame.:0.01\n",
+      "Negative features: considered,:-0.01\n",
+      "0.59144295302\n"
+     ]
+    }
+   ],
    "source": [
     "model = SklRpModel(num_topics=2)\n",
-    "numpy.random.mtrand.RandomState(1)  # set seed for getting same result\n",
+    "np.random.mtrand.RandomState(1)  # set seed for getting same result\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
     "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
     "pipe.fit(corpus, data.target)\n",
@@ -417,7 +504,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {
     "collapsed": true
    },
@@ -435,11 +522,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0.04877324 -0.04877324]\n",
+      "Positive features: What:0.05\n",
+      "Negative features: NLCS:-0.05\n",
+      "1.0\n"
+     ]
+    }
+   ],
    "source": [
     "test_data = data.data[0:2]\n",
     "test_target = data.target[0:2]\n",
@@ -453,6 +551,15 @@
     "print_features_pipe(pipe, id2word.values())\n",
     "print(pipe.score(corpus, test_target))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From e2f2a22b0c10b58c927f706410e5a4368c29d3ac Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 17:02:27 -0700
Subject: [PATCH 6/8] updated 'set_params' function in skl wrappers

---
 gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py    | 1 +
 gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py | 1 +
 gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py    | 1 +
 gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py     | 1 +
 4 files changed, 4 insertions(+)

diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py
index 3e5cae4c9c..1ad1fabccf 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldamodel.py
@@ -64,6 +64,7 @@ def set_params(self, **parameters):
         Set all parameters.
         """
         super(SklLdaModel, self).set_params(**parameters)
+        return self
 
     def fit(self, X, y=None):
         """
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py
index e783784d31..32a732f145 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_ldaseqmodel.py
@@ -61,6 +61,7 @@ def set_params(self, **parameters):
         Set all parameters.
         """
         super(SklLdaSeqModel, self).set_params(**parameters)
+        return self
 
     def fit(self, X, y=None):
         """
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py
index b5f2809a74..5bd4fd0362 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_lsimodel.py
@@ -51,6 +51,7 @@ def set_params(self, **parameters):
         Set all parameters.
         """
         super(SklLsiModel, self).set_params(**parameters)
+        return self
 
     def fit(self, X, y=None):
         """
diff --git a/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py b/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py
index 5ece879bed..19e5739b33 100644
--- a/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py
+++ b/gensim/sklearn_integration/sklearn_wrapper_gensim_rpmodel.py
@@ -41,6 +41,7 @@ def set_params(self, **parameters):
         Set all parameters.
         """
         super(SklRpModel, self).set_params(**parameters)
+        return self
 
     def fit(self, X, y=None):
         """

From 7aa385b5693f083ddcf10955accd8114f3637313 Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 17:02:59 -0700
Subject: [PATCH 7/8] updated sklearn ipynb for GridSearch

---
 docs/notebooks/sklearn_wrapper.ipynb | 147 ++++++++++++++++++++++-----
 1 file changed, 122 insertions(+), 25 deletions(-)

diff --git a/docs/notebooks/sklearn_wrapper.ipynb b/docs/notebooks/sklearn_wrapper.ipynb
index 4f4635389f..fb2008df72 100644
--- a/docs/notebooks/sklearn_wrapper.ipynb
+++ b/docs/notebooks/sklearn_wrapper.ipynb
@@ -256,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 9,
    "metadata": {
     "collapsed": true
    },
@@ -269,25 +269,123 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n",
+      "WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "GridSearchCV(cv=5, error_score='raise',\n",
+       "       estimator=SklLdaModel(alpha='symmetric', chunksize=2000, decay=0.5, eta=None,\n",
+       "      eval_every=10, gamma_threshold=0.001,\n",
+       "      id2word=<gensim.corpora.dictionary.Dictionary object at 0x7ffa190461d0>,\n",
+       "      iterations=20, minimum_probability=0.01, num_topics=5, offset=1.0,\n",
+       "      passes=1, random_state=None, update_every=1),\n",
+       "       fit_params={}, iid=True, n_jobs=1,\n",
+       "       param_grid={'num_topics': (2, 3, 5, 10), 'iterations': (1, 20, 50)},\n",
+       "       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
+       "       scoring=<function scorer at 0x7ffa1756f6e0>, verbose=0)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "obj = SklLdaModel(id2word=dictionary, num_topics=5, iterations=20)\n",
     "parameters = {'num_topics': (2, 3, 5, 10), 'iterations': (1, 20, 50)}\n",
-    "model = GridSearchCV(obj, parameters, scoring=scorer, cv=2)\n",
+    "model = GridSearchCV(obj, parameters, scoring=scorer, cv=5)\n",
     "model.fit(corpus)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'iterations': 20, 'num_topics': 10}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "model.best_params_"
    ]
@@ -348,18 +446,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[ -2.95020466e-01  -1.04115352e-01   5.19570267e-01   1.03817059e-01\n",
-      "   2.72881013e-02   1.35738501e-02   1.89246630e-13   1.89246630e-13\n",
-      "   1.89246630e-13   1.89246630e-13   1.89246630e-13   1.89246630e-13\n",
-      "   1.89246630e-13   1.89246630e-13   1.89246630e-13]\n",
-      "Positive features: Fame,:0.52 Keach:0.10 comp.org.eff.talk,:0.03 comp.org.eff.talk.:0.01 >Pat:0.00 dome.:0.00 internet...:0.00 trawling:0.00 hanging:0.00 red@redpoll.neoucom.edu:0.00\n",
-      "Negative features: Fame.:-0.30 considered,:-0.10\n",
-      "0.531040268456\n"
+      "[-0.91085778 -0.48036135 -0.41265981 -0.66310168 -0.01339967 -0.12794711\n",
+      "  0.01611456  0.15208847  0.21579624  0.25621594  0.54796235  0.43618653\n",
+      "  0.56767608  0.39267377  0.27554429]\n",
+      "Positive features: internet...:0.57 hanging:0.55 trawling:0.44 dome.:0.39 >Pat:0.28 red@redpoll.neoucom.edu:0.26 circuitry:0.22 *best*:0.15 01101001B:0.02\n",
+      "Negative features: Fame.:-0.91 Keach:-0.66 considered,:-0.48 Fame,:-0.41 comp.org.eff.talk.:-0.13 comp.org.eff.talk,:-0.01\n",
+      "0.640939597315\n"
      ]
     }
    ],
    "source": [
-    "model = SklLdaModel(num_topics=15, id2word=id2word, iterations=50, random_state=37)\n",
+    "model = SklLdaModel(num_topics=15, id2word=id2word, iterations=10, random_state=37)\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
     "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
     "pipe.fit(corpus, data.target)\n",
@@ -401,7 +498,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {
     "collapsed": false
    },
@@ -410,11 +507,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[ 0.13651844 -0.0038155   0.0264238   0.08494405 -0.02384796 -0.60051921\n",
-      " -1.07079081  0.04000798  0.43845983 -0.54894361  0.2017333  -0.21800463\n",
-      "  1.3045325   0.08672903 -0.17578455]\n",
-      "Positive features: internet...:1.30 circuitry:0.44 hanging:0.20 Fame.:0.14 dome.:0.09 Keach:0.08 *best*:0.04 Fame,:0.03\n",
-      "Negative features: 01101001B:-1.07 comp.org.eff.talk.:-0.60 red@redpoll.neoucom.edu:-0.55 trawling:-0.22 >Pat:-0.18 comp.org.eff.talk,:-0.02 considered,:-0.00\n",
+      "[ 0.13650375 -0.00382155 -0.0264042  -0.08478659 -0.02379243 -0.6006137\n",
+      "  1.07099917  0.03998737  0.43831279 -0.54905248  0.20204591 -0.2185433\n",
+      " -1.3051437  -0.08704868  0.17599105]\n",
+      "Positive features: 01101001B:1.07 circuitry:0.44 hanging:0.20 >Pat:0.18 Fame.:0.14 *best*:0.04\n",
+      "Negative features: internet...:-1.31 comp.org.eff.talk.:-0.60 red@redpoll.neoucom.edu:-0.55 trawling:-0.22 dome.:-0.09 Keach:-0.08 Fame,:-0.03 comp.org.eff.talk,:-0.02 considered,:-0.00\n",
       "0.865771812081\n"
      ]
     }
@@ -444,7 +541,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "metadata": {
     "collapsed": true
    },
@@ -471,10 +568,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[ 0.01241958 -0.01335879]\n",
-      "Positive features: Fame.:0.01\n",
-      "Negative features: considered,:-0.01\n",
-      "0.59144295302\n"
+      "[-0.00071555  0.00913274]\n",
+      "Positive features: considered,:0.01\n",
+      "Negative features: Fame.:-0.00\n",
+      "0.543624161074\n"
      ]
     }
    ],

From 4547c1e5f9f89d611fa9169559259d08198c3ad3 Mon Sep 17 00:00:00 2001
From: Chinmaya Pancholi <chinmayapancholi13@gmail.com>
Date: Tue, 20 Jun 2017 17:07:35 -0700
Subject: [PATCH 8/8] added comments to skip flake8 checks

---
 gensim/sklearn_integration/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gensim/sklearn_integration/__init__.py b/gensim/sklearn_integration/__init__.py
index f7f5e749c5..d351e625fc 100644
--- a/gensim/sklearn_integration/__init__.py
+++ b/gensim/sklearn_integration/__init__.py
@@ -10,8 +10,8 @@
 """
 
 
-from .base_sklearn_wrapper import BaseSklearnWrapper
-from .sklearn_wrapper_gensim_ldamodel import SklLdaModel
-from .sklearn_wrapper_gensim_lsimodel import SklLsiModel
-from .sklearn_wrapper_gensim_rpmodel import SklRpModel
-from .sklearn_wrapper_gensim_ldaseqmodel import SklLdaSeqModel
+from .base_sklearn_wrapper import BaseSklearnWrapper  # noqa: F401
+from .sklearn_wrapper_gensim_ldamodel import SklLdaModel  # noqa: F401
+from .sklearn_wrapper_gensim_lsimodel import SklLsiModel  # noqa: F401
+from .sklearn_wrapper_gensim_rpmodel import SklRpModel  # noqa: F401
+from .sklearn_wrapper_gensim_ldaseqmodel import SklLdaSeqModel  # noqa: F401