From 0157ab885a11a06d3a4cfb862bd27fe931b5aaa0 Mon Sep 17 00:00:00 2001
From: anmol01gulati <anmol01gulati@gmail.com>
Date: Sun, 25 Sep 2016 23:57:07 +0530
Subject: [PATCH 1/6] Added check in summarize_corpus to fix bug in summarizer

---
 gensim/summarization/summarizer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py
index 234dcec377..3d0ae91656 100644
--- a/gensim/summarization/summarizer.py
+++ b/gensim/summarization/summarizer.py
@@ -158,6 +158,11 @@ def summarize_corpus(corpus, ratio=0.2):
     _set_graph_edge_weights(graph)
     _remove_unreachable_nodes(graph)
 
+    # If the number of nodes < 3, the function ends.
+    if len(graph.nodes) < 3:
+        logger.warning("Please add more sentences to the text. The number of reachable nodes is below 3")
+        return
+
     pagerank_scores = _pagerank(graph)
 
     hashable_corpus.sort(key=lambda doc: pagerank_scores.get(doc, 0), reverse=True)

From d74dc16dcf2e3c0c17b1e396dc57990ad750685e Mon Sep 17 00:00:00 2001
From: anmol01gulati <anmol01gulati@gmail.com>
Date: Mon, 26 Sep 2016 00:38:03 +0530
Subject: [PATCH 2/6] Fix #805: Added check in summarizing text

---
 gensim/summarization/summarizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py
index 3d0ae91656..703f2b8115 100644
--- a/gensim/summarization/summarizer.py
+++ b/gensim/summarization/summarizer.py
@@ -159,7 +159,7 @@ def summarize_corpus(corpus, ratio=0.2):
     _remove_unreachable_nodes(graph)
 
     # If the number of nodes < 3, the function ends.
-    if len(graph.nodes) < 3:
+    if len(graph.nodes()) < 3:
         logger.warning("Please add more sentences to the text. The number of reachable nodes is below 3")
         return
 

From f24533bb8e681dbe5c1c4df29f221a6dab8cf9fe Mon Sep 17 00:00:00 2001
From: anmol01gulati <anmol01gulati@gmail.com>
Date: Wed, 28 Sep 2016 08:37:22 +0530
Subject: [PATCH 3/6] Added test for checking low number of distinct words in
 text

---
 gensim/summarization/summarizer.py             |  2 +-
 gensim/test/test_data/testlowdistinctwords.txt | 10 ++++++++++
 gensim/test/test_summarization.py              | 14 ++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 gensim/test/test_data/testlowdistinctwords.txt

diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py
index 703f2b8115..11b1e48d95 100644
--- a/gensim/summarization/summarizer.py
+++ b/gensim/summarization/summarizer.py
@@ -158,7 +158,7 @@ def summarize_corpus(corpus, ratio=0.2):
     _set_graph_edge_weights(graph)
     _remove_unreachable_nodes(graph)
 
-    # If the number of nodes < 3, the function ends.
+    # Cannot calculate eigenvectors if number of unique words in text < 3. Warns user to add more text. The function ends.
     if len(graph.nodes()) < 3:
         logger.warning("Please add more sentences to the text. The number of reachable nodes is below 3")
         return
diff --git a/gensim/test/test_data/testlowdistinctwords.txt b/gensim/test/test_data/testlowdistinctwords.txt
new file mode 100644
index 0000000000..70e20fa3d3
--- /dev/null
+++ b/gensim/test/test_data/testlowdistinctwords.txt
@@ -0,0 +1,10 @@
+here here.
+there there. 
+here here.
+there there.
+here here.
+there there.
+here here.
+there there.
+here here.
+there there.
\ No newline at end of file
diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py
index fde845dc93..0535e09e32 100644
--- a/gensim/test/test_summarization.py
+++ b/gensim/test/test_summarization.py
@@ -160,6 +160,20 @@ def test_keywords_runs(self):
         kwds_lst = keywords(text, split=True)
         self.assertTrue(len(kwds_lst))
 
+    def test_low_distinct_words_corpus_summarization_is_none(self):
+        pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
+
+        with utils.smart_open(os.path.join(pre_path, "testlowdistinctwords.txt")) as f:
+            text = f.read()
+
+        # Generate the corpus.
+        sentences = text.split('\n')
+        tokens = [sentence.split() for sentence in sentences]
+        dictionary = Dictionary(tokens)
+        corpus = [dictionary.doc2bow(sentence_tokens) for sentence_tokens in tokens]
+
+        self.assertTrue(summarize_corpus(corpus) is None)
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()

From 990f70c72d9f3d8bbc6bce63da954d669de62707 Mon Sep 17 00:00:00 2001
From: anmol01gulati <anmol01gulati@gmail.com>
Date: Wed, 28 Sep 2016 09:32:16 +0530
Subject: [PATCH 4/6] Text split method changed to allow running in Python 3.3
 and above.

---
 gensim/test/test_summarization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py
index 0535e09e32..0af739be03 100644
--- a/gensim/test/test_summarization.py
+++ b/gensim/test/test_summarization.py
@@ -163,11 +163,11 @@ def test_keywords_runs(self):
     def test_low_distinct_words_corpus_summarization_is_none(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "testlowdistinctwords.txt")) as f:
+        with utils.smart_open(os.path.join(pre_path, "testlowdistinctwords.txt"), mode="r") as f:
             text = f.read()
 
         # Generate the corpus.
-        sentences = text.split('\n')
+        sentences = text.split(b'\n')
         tokens = [sentence.split() for sentence in sentences]
         dictionary = Dictionary(tokens)
         corpus = [dictionary.doc2bow(sentence_tokens) for sentence_tokens in tokens]

From 343b9a0f8712c889b45953182bf8b859b3d9ad23 Mon Sep 17 00:00:00 2001
From: anmol01gulati <anmol01gulati@gmail.com>
Date: Wed, 28 Sep 2016 11:06:46 +0530
Subject: [PATCH 5/6] Change to fix test in python versions 3.3 and higher

---
 gensim/test/test_summarization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py
index 0af739be03..70d2dcada4 100644
--- a/gensim/test/test_summarization.py
+++ b/gensim/test/test_summarization.py
@@ -167,7 +167,7 @@ def test_low_distinct_words_corpus_summarization_is_none(self):
             text = f.read()
 
         # Generate the corpus.
-        sentences = text.split(b'\n')
+        sentences = text.split("\n")
         tokens = [sentence.split() for sentence in sentences]
         dictionary = Dictionary(tokens)
         corpus = [dictionary.doc2bow(sentence_tokens) for sentence_tokens in tokens]

From df238ef1bc71568819ba92502f0e9df46b933698 Mon Sep 17 00:00:00 2001
From: anmol01gulati <anmol01gulati@gmail.com>
Date: Thu, 29 Sep 2016 18:56:14 +0530
Subject: [PATCH 6/6] Added blank line test_wikicorpus.py file

Added blank line to fix issue with travis CI
---
 gensim/test/test_wikicorpus.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gensim/test/test_wikicorpus.py b/gensim/test/test_wikicorpus.py
index 77c4212831..39e205002b 100644
--- a/gensim/test/test_wikicorpus.py
+++ b/gensim/test/test_wikicorpus.py
@@ -27,10 +27,11 @@ class TestWikiCorpus(unittest.TestCase):
     def setUp(self):
         wc = WikiCorpus(datapath(FILENAME))
 
-
     def test_get_texts_returns_generator_of_lists(self):
+        
         if sys.version_info < (2, 7, 0):
             return
+        
         wc = WikiCorpus(datapath(FILENAME))
         l = wc.get_texts()
         self.assertEqual(type(l), types.GeneratorType)