Skip to content

Commit

Permalink
Warn and return raw text if only one sentence given to summarizer. Fi…
Browse files Browse the repository at this point in the history
  • Loading branch information
metalaman authored and harshuljain13 committed Sep 30, 2016
1 parent f958d46 commit cc931b7
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Changes
- bigram construction can now support multiple bigrams within one sentence
* Fixed issue #838, RuntimeWarning: overflow encountered in exp (@markroxor, [#895](https://github.com/RaRe-Technologies/gensim/pull/895))
* Changed some log messages to warnings as suggested in issue #828. (@rhnvrm, [#884](https://github.com/RaRe-Technologies/gensim/pull/884))
* Fixed issue #851, In summarizer.py, check for single sentence as an input added to avoid ZeroDivionError, added test cases in test/test_summarization.py(@metalaman, #887)


0.13.2, 2016-08-19
Expand Down
5 changes: 5 additions & 0 deletions gensim/summarization/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
logger.warning("Input text is empty.")
return

# If only one sentence is present, the function return the input text (Avoids ZeroDivisionError).
if len(sentences) == 1:
logger.warning("Summarization not performed since the document has only one sentence.")
return text

# Warns if the text is too short.
if len(sentences) < INPUT_MIN_LENGTH:
logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")
Expand Down
11 changes: 11 additions & 0 deletions gensim/test/test_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,17 @@ def test_text_summarization_raises_exception_on_short_input_text(self):

self.assertTrue(summarize(text) is not None)

def test_text_summarization_returns_input_on_single_input_sentence(self):
pre_path = os.path.join(os.path.dirname(__file__), 'test_data')

with utils.smart_open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f:
text = f.read()

# Keeps the first sentence only.
text = text.split('\n')[0]

self.assertEqual(summarize(text),text)

def test_corpus_summarization_raises_exception_on_short_input_text(self):
pre_path = os.path.join(os.path.dirname(__file__), 'test_data')

Expand Down
3 changes: 3 additions & 0 deletions gensim/test/test_wikicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import os
import sys
import types

import unittest

from gensim.corpora.wikicorpus import WikiCorpus



module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
FILENAME = 'enwiki-latest-pages-articles1.xml-p000000010p000030302-shortened.bz2'
Expand All @@ -29,6 +31,7 @@ def setUp(self):


def test_get_texts_returns_generator_of_lists(self):

if sys.version_info < (2, 7, 0):
return
wc = WikiCorpus(datapath(FILENAME))
Expand Down

0 comments on commit cc931b7

Please sign in to comment.