Skip to content

Commit

Permalink
Fixing punctuation thing
Browse files Browse the repository at this point in the history
  • Loading branch information
AG committed Mar 5, 2024
1 parent 8bf8c2b commit e122c99
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def main():
if os.path.exists('training'):
shutil.rmtree('training')
print("Previous training data cleared.")
trie_store = {'tries': {'3_words': {}, '2_words': {}, '1_word': {}}, 'scores': {}}

# Get the total size of the file to calculate the number of iterations needed
total_size = os.path.getsize(training_data_file)
Expand Down Expand Up @@ -194,7 +193,7 @@ def main():
internal_punctuation = {"'", "-"}
additional_punctuation = {"“", "”", "–", "—"}
# Create a set of punctuation that signals the end of a word, excluding the internal punctuation
ending_punctuation = set(string.punctuation) - internal_punctuation # + additional_punctuation
ending_punctuation = (set(string.punctuation) | additional_punctuation) - internal_punctuation

# Check for and remove ending punctuation from the word
cleaned_word = ''.join(char for char in word if char not in ending_punctuation)
Expand Down

0 comments on commit e122c99

Please sign in to comment.