Skip to content

Commit

Permalink
handle overlapping tokens: fcfs i.e. later token gets leftover range
Browse files Browse the repository at this point in the history
  • Loading branch information
swissiety committed Mar 15, 2022
1 parent 61c54d8 commit 91bff6e
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions lsplexer/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,10 @@ def get_tokens_unprocessed(self, text):
while lineNo < startLine:
firstCharIdx = text.find('\n', printedCharIdx)+1 # beginning idx of the newline
lineNo += 1
#yield printedCharIdx, pygments.token.Text, text[printedCharIdx:firstCharIdx] # fill gap of a whole line
# print("line token"+ str(printedCharIdx) + " to " + str(firstCharIdx));
printedCharIdx = firstCharIdx

tokenStartIdx = firstCharIdx+startCharInLine

# fill gap of a text range that has no token assigned
#if printedCharIdx < tokenStartIdx: # is already on the same line
# yield printedCharIdx, pygments.token.Text, text[ printedCharIdx:tokenStartIdx]
# print("gap token" + str(printedCharIdx) + " to " + str(tokenStartIdx));

if tokenStartIdx >= printedCharIdx: # filter overlaps which would result in more output than there was input -> skip token then
# print real token
printedCharIdx = tokenStartIdx + length
Expand All @@ -221,10 +214,21 @@ def get_tokens_unprocessed(self, text):
result.sort(key=lambda x: x[0] )
printedIdx = 0
for tokenStartIdx, tokenType, endIdx in result:
# fill gaps
# skip possible overlapping tokens
if tokenStartIdx <= printedIdx:
if endIdx > printedIdx:
#add rest of token
yield printedIdx, tokenType, text[ printedIdx:endIdx]
printedIdx = endIdx

continue


# fill gap
if printedIdx < tokenStartIdx:
yield printedIdx, pygments.token.Text, text[ printedIdx:tokenStartIdx]

# add token
yield tokenStartIdx, tokenType, text[tokenStartIdx:endIdx]
printedIdx = endIdx

Expand Down

0 comments on commit 91bff6e

Please sign in to comment.