diff --git a/gensim/summarization/__init__.py b/gensim/summarization/__init__.py index 57c9a7c815..c7efb84d4a 100644 --- a/gensim/summarization/__init__.py +++ b/gensim/summarization/__init__.py @@ -1,4 +1,4 @@ # bring model classes directly into package namespace, to save some typing from .summarizer import summarize, summarize_corpus -from .keywords import keywords \ No newline at end of file +from .keywords import keywords diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py index 6704146d54..d634a32b54 100644 --- a/gensim/summarization/bm25.py +++ b/gensim/summarization/bm25.py @@ -40,7 +40,7 @@ def initialize(self): self.df[word] += 1 for word, freq in iteritems(self.df): - self.idf[word] = math.log(self.corpus_size-freq+0.5) - math.log(freq+0.5) + self.idf[word] = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5) def get_score(self, document, index, average_idf): score = 0 @@ -48,8 +48,8 @@ def get_score(self, document, index, average_idf): if word not in self.f[index]: continue idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf - score += (idf*self.f[index][word]*(PARAM_K1+1) - / (self.f[index][word] + PARAM_K1*(1 - PARAM_B+PARAM_B*self.corpus_size / self.avgdl))) + score += (idf * self.f[index][word] * (PARAM_K1 + 1) + / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl))) return score def get_scores(self, document, average_idf): diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index fe09ae1947..b24e6f1f04 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -164,7 +164,7 @@ def _get_combined_keywords(_keywords, split_text): result.append(word) # appends last word if keyword and doesn't iterate for j in xrange(i + 1, len_text): other_word = _strip_word(split_text[j]) - if other_word in _keywords and other_word == split_text[j] and not other_word in combined_word: + if other_word in _keywords and other_word == split_text[j] and other_word not in combined_word: combined_word.append(other_word) else: for keyword in combined_word: diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py index 0779011999..e749b4cc66 100644 --- a/gensim/summarization/summarizer.py +++ b/gensim/summarization/summarizer.py @@ -198,10 +198,10 @@ def summarize(text, ratio=0.2, word_count=None, split=False): logger.warning("Input text is empty.") return - # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). + # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). if len(sentences) == 1: raise ValueError("input must have more than one sentence") - + # Warns if the text is too short. if len(sentences) < INPUT_MIN_LENGTH: logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")