Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

💫 Update training examples and use minibatching #2830

Merged
merged 2 commits into from
Oct 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions examples/training/train_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@

import plac
import random
import spacy
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


# training data: texts, heads and dependency labels
Expand Down Expand Up @@ -63,7 +64,7 @@
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int))
def main(model=None, output_dir=None, n_iter=5):
def main(model=None, output_dir=None, n_iter=15):
"""Load the model, set up the pipeline and train the parser."""
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
Expand All @@ -89,9 +90,12 @@ def main(model=None, output_dir=None, n_iter=5):
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
print(losses)
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
print('Losses', losses)

# test the trained model
test_model(nlp)
Expand Down Expand Up @@ -135,7 +139,8 @@ def test_model(nlp):
# [
# ('find', 'ROOT', 'find'),
# ('cheapest', 'QUALITY', 'gym'),
# ('gym', 'PLACE', 'find')
# ('gym', 'PLACE', 'find'),
# ('near', 'ATTRIBUTE', 'gym'),
# ('work', 'LOCATION', 'near')
# ]
# show me the best hotel in berlin
Expand Down
12 changes: 8 additions & 4 deletions examples/training/train_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


# training data
Expand Down Expand Up @@ -62,14 +63,17 @@ def main(model=None, output_dir=None, n_iter=100):
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(
[text], # batch of texts
[annotations], # batch of annotations
texts, # batch of texts
annotations, # batch of annotations
drop=0.5, # dropout - make it harder to memorise data
sgd=optimizer, # callable to update weights
losses=losses)
print(losses)
print('Losses', losses)

# test the trained model
for text, _ in TRAIN_DATA:
Expand Down
12 changes: 8 additions & 4 deletions examples/training/train_new_entity_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


# new entity label
Expand Down Expand Up @@ -73,7 +74,7 @@
new_model_name=("New model name for model meta.", "option", "nm", str),
output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int))
def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
def main(model=None, new_model_name='animal', output_dir=None, n_iter=10):
"""Set up the pipeline and entity recognizer, and train the new entity."""
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
Expand Down Expand Up @@ -104,10 +105,13 @@ def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update([text], [annotations], sgd=optimizer, drop=0.35,
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, drop=0.35,
losses=losses)
print(losses)
print('Losses', losses)

# test the trained model
test_text = 'Do you like horses?'
Expand Down
10 changes: 7 additions & 3 deletions examples/training/train_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


# training data
Expand Down Expand Up @@ -62,9 +63,12 @@ def main(model=None, output_dir=None, n_iter=10):
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
print(losses)
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
print('Losses', losses)

# test the trained model
test_text = "I like securities."
Expand Down
10 changes: 7 additions & 3 deletions examples/training/train_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


# You need to define a mapping from your data's part-of-speech tag names to the
Expand Down Expand Up @@ -63,9 +64,12 @@ def main(lang='en', output_dir=None, n_iter=25):
for i in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update([text], [annotations], sgd=optimizer, losses=losses)
print(losses)
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
print('Losses', losses)

# test the trained model
test_text = "I like blue eggs"
Expand Down