Skip to content

Commit

Permalink
(chore) deprecate C-like (#2954)
Browse files Browse the repository at this point in the history
- `c` and `cpp` are now individual grammars and can diverge over time
  • Loading branch information
joshgoebel authored Jan 19, 2021
1 parent fff8b86 commit 871c00b
Show file tree
Hide file tree
Showing 4 changed files with 573 additions and 293 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ Language grammar improvements:
- enh(swift) Improved highlighting for operator and precedencegroup declarations. (#2938) [Steven Van Impe][]
- fix(xml) Support single-character namespaces. (#2957) [Jan Pilzer][]

Grammar changes:

- Deprecate `c-like`, though you should not be using it directly anyways.
- will be removed in v11.
- `c` and `cpp` are now wholly unique grammars that will diverge over time

Parser:

- allow `keywords` to be an array of strings [Josh Goebel][]
Expand Down
301 changes: 27 additions & 274 deletions src/languages/c-like.js
Original file line number Diff line number Diff line change
@@ -1,293 +1,46 @@
/*
Language: C-like foundation grammar for C/C++ grammars
Language: C-like (deprecated, use C and C++ instead)
Author: Ivan Sagalaev <maniac@softwaremaniacs.org>
Contributors: Evgeny Stepanischev <imbolk@gmail.com>, Zaven Muradyan <megalivoithos@gmail.com>, Roel Deckers <admin@codingcat.nl>, Sam Wu <samsam2310@gmail.com>, Jordi Petit <jordi.petit@gmail.com>, Pieter Vantorre <pietervantorre@gmail.com>, Google Inc. (David Benjamin) <davidben@google.com>
*/

/* In the future the intention is to split out the C/C++ grammars distinctly
since they are separate languages. They will likely share a common foundation
though, and this file sets the groundwork for that - so that we get the breaking
change in v10 and don't have to change the requirements again later.
/*
C and C++ have now been fully split into `c.js` and `cpp.js`.
This file only exists for legacy purposes to support v10.
TODO: Remove this in v11.
See: https://github.com/highlightjs/highlight.js/issues/2146
*/

import * as regex from '../lib/regex.js';
import cPlusPlus from './cpp.js';

/** @type LanguageFn */
export default function(hljs) {
// added for historic reasons because `hljs.C_LINE_COMMENT_MODE` does
// not include such support nor can we be sure all the grammars depending
// on it would desire this behavior
const C_LINE_COMMENT_MODE = hljs.COMMENT('//', '$', {
contains: [
{
begin: /\\\n/
}
]
});
const DECLTYPE_AUTO_RE = 'decltype\\(auto\\)';
const NAMESPACE_RE = '[a-zA-Z_]\\w*::';
const TEMPLATE_ARGUMENT_RE = '<[^<>]+>';
const FUNCTION_TYPE_RE = '(' +
DECLTYPE_AUTO_RE + '|' +
regex.optional(NAMESPACE_RE) +
'[a-zA-Z_]\\w*' + regex.optional(TEMPLATE_ARGUMENT_RE) +
')';
const CPP_PRIMITIVE_TYPES = {
className: 'keyword',
begin: '\\b[a-z\\d_]*_t\\b'
};

// https://en.cppreference.com/w/cpp/language/escape
// \\ \x \xFF \u2837 \u00323747 \374
const CHARACTER_ESCAPES = '\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4,8}|[0-7]{3}|\\S)';
const STRINGS = {
className: 'string',
variants: [
{
begin: '(u8?|U|L)?"',
end: '"',
illegal: '\\n',
contains: [ hljs.BACKSLASH_ESCAPE ]
},
{
begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)",
end: '\'',
illegal: '.'
},
hljs.END_SAME_AS_BEGIN({
begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/,
end: /\)([^()\\ ]{0,16})"/
})
]
};

const NUMBERS = {
className: 'number',
variants: [
{
begin: '\\b(0b[01\']+)'
},
{
begin: '(-?)\\b([\\d\']+(\\.[\\d\']*)?|\\.[\\d\']+)((ll|LL|l|L)(u|U)?|(u|U)(ll|LL|l|L)?|f|F|b|B)'
},
{
begin: '(-?)(\\b0[xX][a-fA-F0-9\']+|(\\b[\\d\']+(\\.[\\d\']*)?|\\.[\\d\']+)([eE][-+]?[\\d\']+)?)'
}
],
relevance: 0
};

const PREPROCESSOR = {
className: 'meta',
begin: /#\s*[a-z]+\b/,
end: /$/,
keywords: {
'meta-keyword':
'if else elif endif define undef warning error line ' +
'pragma _Pragma ifdef ifndef include'
},
contains: [
{
begin: /\\\n/,
relevance: 0
},
hljs.inherit(STRINGS, {
className: 'meta-string'
}),
{
className: 'meta-string',
begin: /<.*?>/,
end: /$/,
illegal: '\\n'
},
C_LINE_COMMENT_MODE,
hljs.C_BLOCK_COMMENT_MODE
]
};
const lang = cPlusPlus(hljs);

const TITLE_MODE = {
className: 'title',
begin: regex.optional(NAMESPACE_RE) + hljs.IDENT_RE,
relevance: 0
};

const FUNCTION_TITLE = regex.optional(NAMESPACE_RE) + hljs.IDENT_RE + '\\s*\\(';

const CPP_KEYWORDS = {
keyword: 'int float while private char char8_t char16_t char32_t catch import module export virtual operator sizeof ' +
'dynamic_cast|10 typedef const_cast|10 const for static_cast|10 union namespace ' +
'unsigned long volatile static protected bool template mutable if public friend ' +
'do goto auto void enum else break extern using asm case typeid wchar_t ' +
'short reinterpret_cast|10 default double register explicit signed typename try this ' +
'switch continue inline delete alignas alignof constexpr consteval constinit decltype ' +
'concept co_await co_return co_yield requires ' +
'noexcept static_assert thread_local restrict final override ' +
'atomic_bool atomic_char atomic_schar ' +
'atomic_uchar atomic_short atomic_ushort atomic_int atomic_uint atomic_long atomic_ulong atomic_llong ' +
'atomic_ullong new throw return ' +
'and and_eq bitand bitor compl not not_eq or or_eq xor xor_eq',
built_in: 'std string wstring cin cout cerr clog stdin stdout stderr stringstream istringstream ostringstream ' +
'auto_ptr deque list queue stack vector map set pair bitset multiset multimap unordered_set ' +
'unordered_map unordered_multiset unordered_multimap priority_queue make_pair array shared_ptr abort terminate abs acos ' +
'asin atan2 atan calloc ceil cosh cos exit exp fabs floor fmod fprintf fputs free frexp ' +
'fscanf future isalnum isalpha iscntrl isdigit isgraph islower isprint ispunct isspace isupper ' +
'isxdigit tolower toupper labs ldexp log10 log malloc realloc memchr memcmp memcpy memset modf pow ' +
'printf putchar puts scanf sinh sin snprintf sprintf sqrt sscanf strcat strchr strcmp ' +
'strcpy strcspn strlen strncat strncmp strncpy strpbrk strrchr strspn strstr tanh tan ' +
'vfprintf vprintf vsprintf endl initializer_list unique_ptr _Bool complex _Complex imaginary _Imaginary',
literal: 'true false nullptr NULL'
};
const C_ALIASES = [
"c",
"h"
];

const EXPRESSION_CONTAINS = [
PREPROCESSOR,
CPP_PRIMITIVE_TYPES,
C_LINE_COMMENT_MODE,
hljs.C_BLOCK_COMMENT_MODE,
NUMBERS,
STRINGS
const CPP_ALIASES = [
'cc',
'c++',
'h++',
'hpp',
'hh',
'hxx',
'cxx'
];

const EXPRESSION_CONTEXT = {
// This mode covers expression context where we can't expect a function
// definition and shouldn't highlight anything that looks like one:
// `return some()`, `else if()`, `(x*sum(1, 2))`
variants: [
{
begin: /=/,
end: /;/
},
{
begin: /\(/,
end: /\)/
},
{
beginKeywords: 'new throw return else',
end: /;/
}
],
keywords: CPP_KEYWORDS,
contains: EXPRESSION_CONTAINS.concat([
{
begin: /\(/,
end: /\)/,
keywords: CPP_KEYWORDS,
contains: EXPRESSION_CONTAINS.concat([ 'self' ]),
relevance: 0
}
]),
relevance: 0
};
lang.disableAutodetect = true;
lang.aliases = [];
// support users only loading c-like (legacy)
if (!hljs.getLanguage("c")) lang.aliases.push(...C_ALIASES);
if (!hljs.getLanguage("cpp")) lang.aliases.push(...CPP_ALIASES);

const FUNCTION_DECLARATION = {
className: 'function',
begin: '(' + FUNCTION_TYPE_RE + '[\\*&\\s]+)+' + FUNCTION_TITLE,
returnBegin: true,
end: /[{;=]/,
excludeEnd: true,
keywords: CPP_KEYWORDS,
illegal: /[^\w\s\*&:<>.]/,
contains: [
{ // to prevent it from being confused as the function title
begin: DECLTYPE_AUTO_RE,
keywords: CPP_KEYWORDS,
relevance: 0
},
{
begin: FUNCTION_TITLE,
returnBegin: true,
contains: [ TITLE_MODE ],
relevance: 0
},
{
className: 'params',
begin: /\(/,
end: /\)/,
keywords: CPP_KEYWORDS,
relevance: 0,
contains: [
C_LINE_COMMENT_MODE,
hljs.C_BLOCK_COMMENT_MODE,
STRINGS,
NUMBERS,
CPP_PRIMITIVE_TYPES,
// Count matching parentheses.
{
begin: /\(/,
end: /\)/,
keywords: CPP_KEYWORDS,
relevance: 0,
contains: [
'self',
C_LINE_COMMENT_MODE,
hljs.C_BLOCK_COMMENT_MODE,
STRINGS,
NUMBERS,
CPP_PRIMITIVE_TYPES
]
}
]
},
CPP_PRIMITIVE_TYPES,
C_LINE_COMMENT_MODE,
hljs.C_BLOCK_COMMENT_MODE,
PREPROCESSOR
]
};
// if c and cpp are loaded after then they will reclaim these
// aliases for themselves

return {
aliases: [
'c',
'cc',
'h',
'c++',
'h++',
'hpp',
'hh',
'hxx',
'cxx'
],
keywords: CPP_KEYWORDS,
// the base c-like language will NEVER be auto-detected, rather the
// derivitives: c, c++, arduino turn auto-detect back on for themselves
disableAutodetect: true,
illegal: '</',
contains: [].concat(
EXPRESSION_CONTEXT,
FUNCTION_DECLARATION,
EXPRESSION_CONTAINS,
[
PREPROCESSOR,
{ // containers: ie, `vector <int> rooms (9);`
begin: '\\b(deque|list|queue|priority_queue|pair|stack|vector|map|set|bitset|multiset|multimap|unordered_map|unordered_set|unordered_multiset|unordered_multimap|array)\\s*<',
end: '>',
keywords: CPP_KEYWORDS,
contains: [
'self',
CPP_PRIMITIVE_TYPES
]
},
{
begin: hljs.IDENT_RE + '::',
keywords: CPP_KEYWORDS
},
{
className: 'class',
beginKeywords: 'enum class struct union',
end: /[{;:<>=]/,
contains: [
{
beginKeywords: "final class struct"
},
hljs.TITLE_MODE
]
}
]),
exports: {
preprocessor: PREPROCESSOR,
strings: STRINGS,
keywords: CPP_KEYWORDS
}
};
return lang;
}
Loading

0 comments on commit 871c00b

Please sign in to comment.