Skip to content

Commit

Permalink
fix(python) Fix recognition of numeric literals followed by keywords …
Browse files Browse the repository at this point in the history
…without whitespace

Fixes highlightjs#2985
  • Loading branch information
gibson042 committed Oct 19, 2021
1 parent a6a7925 commit a71d89f
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## Version 11.3.2 (most likely)

Grammars:

- fix(python) Match numeric literals per the language reference [Richard Gibson][]

## Version 11.3.1

Build:
Expand Down
18 changes: 12 additions & 6 deletions src/languages/python.js
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,12 @@ export default function(hljs) {
// https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals
const digitpart = '[0-9](_?[0-9])*';
const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
// Whitespace after a number (or any lexical token) is needed only if its absence
// would change the tokenization
// https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens
// We deviate slightly, requiring a word boundary or a keyword
// to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`)
const lookahead = `\\b|${RESERVED_WORDS.join('|')}`;
const NUMBER = {
className: 'number',
relevance: 0,
Expand All @@ -270,7 +276,7 @@ export default function(hljs) {
// because both MUST contain a decimal point and so cannot be confused with
// the interior part of an identifier
{
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b`
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`
},
{
begin: `(${pointfloat})[jJ]?`
Expand All @@ -283,22 +289,22 @@ export default function(hljs) {
// decinteger is optionally imaginary
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b'
begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`
},
{
begin: '\\b0[bB](_?[01])+[lL]?\\b'
begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`
},
{
begin: '\\b0[oO](_?[0-7])+[lL]?\\b'
begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`
},
{
begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b'
begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`
},

// imagnumber (digitpart-based)
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{
begin: `\\b(${digitpart})[jJ]\\b`
begin: `\\b(${digitpart})[jJ](?=${lookahead})`
}
]
};
Expand Down

0 comments on commit a71d89f

Please sign in to comment.