Skip to content

Commit

Permalink
Fix Issue #599, by considering empty documents to be parsed and tagge…
Browse files Browse the repository at this point in the history
…d. Implementation is a bit dodgy.
  • Loading branch information
honnibal committed Nov 2, 2016
1 parent b6b01d4 commit f292f7f
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions spacy/tokens/doc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ cdef class Doc:
# must be created.
self.push_back(
<const LexemeC*>self.vocab.get(self.mem, orth), has_space)
# Tough to decide on policy for this. Is an empty doc tagged and parsed?
# There's no information we'd like to add to it, so I guess so?
if self.length == 0:
self.is_tagged = True
self.is_parsed = True

def __getitem__(self, object i):
'''
Expand Down Expand Up @@ -430,6 +435,10 @@ cdef class Doc:
yield Span(self, start, self.length)

cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
if self.length == 0:
# Flip these to false when we see the first token.
self.is_tagged = False
self.is_parsed = False
if self.length == self.max_length:
self._realloc(self.length * 2)
cdef TokenC* t = &self.c[self.length]
Expand Down

0 comments on commit f292f7f

Please sign in to comment.