Skip to content

Commit

Permalink
nltk.data.find argument needs to be a path, not just a filename
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaboose authored and gunthercox committed Jan 2, 2017
1 parent 6c44daa commit 305e22b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
8 changes: 4 additions & 4 deletions chatterbot/chatterbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def initialize(self):
from .utils import nltk_download_corpus

# Download required NLTK corpora if they have not already been downloaded
nltk_download_corpus('stopwords')
nltk_download_corpus('wordnet')
nltk_download_corpus('punkt')
nltk_download_corpus('vader_lexicon')
nltk_download_corpus('corpora/stopwords')
nltk_download_corpus('corpora/wordnet')
nltk_download_corpus('tokenizers/punkt')
nltk_download_corpus('sentiment/vader_lexicon')

def get_response(self, input_item, session_id=None):
"""
Expand Down
17 changes: 14 additions & 3 deletions chatterbot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def input_function():
return user_input


def nltk_download_corpus(corpus_name):
def nltk_download_corpus(resource_path):
"""
Download the specified NLTK corpus file
unless it has already been downloaded.
Expand All @@ -155,13 +155,24 @@ def nltk_download_corpus(corpus_name):
"""
from nltk.data import find
from nltk import download
from os.path import split

# Download the wordnet data only if it is not already downloaded
zip_file = '{}.zip'.format(corpus_name)
_, corpus_name = split(resource_path)

## From http://www.nltk.org/api/nltk.html ##
# When using find() to locate a directory contained in a zipfile,
# the resource name must end with the forward slash character.
# Otherwise, find() will not locate the directory.
####
# Helps when resource_path=='sentiment/vader_lexicon''
if not resource_path.endswith('/'):
resource_path = resource_path + '/'

downloaded = False

try:
find(zip_file)
find(resource_path)
except LookupError:
download(corpus_name)
downloaded = True
Expand Down

0 comments on commit 305e22b

Please sign in to comment.