Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better markdown support and basic syntax highlighting; see #68 #88

Merged
merged 1 commit into from
Dec 13, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 260 additions & 8 deletions llama.cpp/server/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,27 @@
}

pre code {
background-color: #22211f;
padding: 0.3em 0.3em;
display: block;
background-color: #222;
color: #ddd;
overflow-x: auto;
tab-size: 4;
}

code {
font-family: monospace;
padding: 0.1em 0.3em;
border-radius: 3px;
}
code span.sh-c { color: #999; } /* comment */
code span.sh-s { color: #ae2; } /* string */
code span.sh-r { color: #b8f; } /* regexp */
code span.sh-p { color: #5df; } /* punctuation */
code span.sh-k { color: #f07; } /* keyword */
code span.sh-n { color: #fa4; } /* number */
code span.sh-d { color: #a8f; } /* defintion */

fieldset label {
margin: 0.5em 0;
Expand Down Expand Up @@ -828,20 +839,261 @@
});
}

// poor mans markdown replacement
// Syntax Highlighting, condensed down from https://github.com/speed-highlight/core
// Following is a number of regexps that capture some generic highlightable syntax
// such as "strings" or /* comments */. These regexps are then utilizied by the
// language definitions below, assigning each regexp to a `type`. The type maps
// to a sh-* css class.

const SH_REGEXP = {
STRING: /("|')(\\[^]|(?!\1)[^\r\n\\])*\1?/g,
STRING_BACKTICK: /`((?!`)[^]|\\[^])*`?/g,
COMMENT: /\/\/.*\n?|\/\*((?!\*\/)[^])*(\*\/)?/g,
COMMENT_HASH: /#.*\n?/g,
COMMENT_PYTHON: /("""|''')(\\[^]|(?!\1)[^])*\1?/g,
PREPROC: /#\s*\w+(\\\n|[^\n])*\n?/g,
COMMENT_DASH: /--.*\n?/g,
COMMENT_XML: /<!--((?!-->)[^])*-->/g,
KW: /\b(set|get|as|break|case|const|continue|default|delete|do|else|export|for|from|function|goto|if|import|extern|in|let|var|null|of|package|return|static|switch|typeof|void|while)\b/g,
CLASS: /\b(class|constructor|extends|implements|interface|new|private|protected|public|super|this|abstract|final|virtual|instanceof)\b/g,
EXCEPTION: /\b(try|throw|throws|catch|finnaly)\b/g,
BOOL: /\b(true|false)\b/g,
BOOL_PYTHON: /\b(True|False)\b/g,
KW_ASYNC: /\b(async|await|yield)\b/g,
KW_JS: /\b(with|NaN|debugger|undefined)\b/g,
KW_RS: /\b(crate|fn|impl|loop|match|mod|move|mut|pub|ref|self|Self|trait|type|unsafe|use|where|dyn|become|box|macro|override|priv|unsized)\b/g,
KW_LUA: /\b(and|elseif|end|local|nil|not|or|repeat|then|until)\b/g,
KW_PYTHON: /\b(and|as|assert|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b/g,
TYPES: /\b(char|double|enum|float|int|signed|unsigned|struct|union)\b/g,
TYPES_SQL: /\b(varchar|(tiny|medium|long|big)(text|blob|int)|integer|decimal|boolean)\b/g,
REGEX: /\/((?!\/)[^\r\n\\]|\\.)+\/[dgimsuy]*/g,
NUM: /(\.e?|\b)\d(e-|[\d.oxa-fA-F_])*(\.|\b)/g,
DEF: /\b([A-Z][A-Z_]*)\b/g,
OP: /(&|<|>|[/*+:?|%^~=!,.^-])+/g,
XML_ELEM: /<\w+|<\/\w+|>/g
};

const SH_LANGS = {
generic: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.KW},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
py: [
{type: 'c', match: SH_REGEXP.COMMENT_PYTHON},
{type: 'c', match: SH_REGEXP.COMMENT_HASH},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.BOOL_PYTHON},
{type: 'k', match: SH_REGEXP.KW_PYTHON},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
js: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 's', match: SH_REGEXP.STRING_BACKTICK},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.KW_ASYNC},
{type: 'k', match: SH_REGEXP.KW_JS},
{type: 'r', match: SH_REGEXP.REGEX},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
json: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'k', match: SH_REGEXP.BOOL},
],
bash: [
{type: 's', match: SH_REGEXP.STRING},
{type: 'c', match: SH_REGEXP.COMMENT_HASH},
{type: 'p', match: SH_REGEXP.OP},
],
c: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.TYPES},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.PREPROC},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
cpp: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.TYPES},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
rs: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.KW},
{type: 'r', match: SH_REGEXP.REGEX},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.TYPES},
{type: 'k', match: SH_REGEXP.KW_ASYNC},
{type: 'k', match: SH_REGEXP.KW_RS},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
lua: [
{type: 'c', match: SH_REGEXP.COMMENT_DASH},
{type: 'c', match: SH_REGEXP.COMMENT_HASH},
{type: 's', match: SH_REGEXP.STRING},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.KW_LUA},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
java: [
{type: 'c', match: SH_REGEXP.COMMENT},
{type: 's', match: SH_REGEXP.STRING},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'n', match: SH_REGEXP.DEF},
{type: 'k', match: SH_REGEXP.KW},
{type: 'k', match: SH_REGEXP.EXCEPTION},
{type: 'k', match: SH_REGEXP.BOOL},
{type: 'k', match: SH_REGEXP.CLASS},
{type: 'k', match: SH_REGEXP.TYPES},
],
sql: [
{type: 'c', match: SH_REGEXP.COMMENT_DASH},
{type: 's', match: SH_REGEXP.STRING},
{type: 's', match: SH_REGEXP.STRING_BACKTICK},
{type: 'r', match: SH_REGEXP.TYPES},
{type: 'r', match: SH_REGEXP.TYPES_SQL},
{type: 'n', match: SH_REGEXP.NUM},
{type: 'k', match: SH_REGEXP.DEF},
{type: 'p', match: SH_REGEXP.OP},
],
html: [
{type: 's', match: SH_REGEXP.STRING},
{type: 'c', match: SH_REGEXP.COMMENT_XML},
{type: 'k', match: SH_REGEXP.XML_ELEM},
],
xml: [
{type: 's', match: SH_REGEXP.STRING},
{type: 'c', match: SH_REGEXP.COMMENT_XML},
{type: 'k', match: SH_REGEXP.XML_ELEM},
]
};
SH_LANGS.python = SH_LANGS.py;
SH_LANGS.rust = SH_LANGS.rs;
SH_LANGS.javascript = SH_LANGS.js;

const Syntaxhighlightish = (src, lang) => {
let html = '';
const cache = [];
const rules = (SH_LANGS[lang] || SH_LANGS.generic).slice();

let i = 0;
while (i < src.length) {
let fm = null;
let ft = '';
for (let r = rules.length; r-- > 0;) {
const token = rules[r];

if (cache[r] === undefined || cache[r].index < i) {
token.match.lastIndex = i;
const match = token.match.exec(src);
if (match === null) {
rules.splice(r, 1);
cache.splice(r, 1);
continue;
}
cache[r] = match;
}
if (cache[r][0] && (fm === null || cache[r].index <= fm.index)) {
fm = cache[r];
ft = token.type;
}
}
if (fm === null) {
break;
}
if (i !== fm.index) {
html += src.substring(i, fm.index);
}
i = fm.index + fm[0].length;
html += '<span class="sh-'+ft+'">'+fm[0]+'</span>';
}
html += src.substring(i, src.length);
return html;
};

// This transforms _some_ markdown to html by replacing code blocks and
// urls with a placeholder, so that any markdown within these already
// matched blocks won't be processed again.

const Markdownish = (params) => {
const blocks = [];
const md = params.text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')

// Multiline code - be liberal with the closing ``` here: we just assume
// it will be closed eventually. This makes the code formatting and
// highlighting work while we still receive more tokens.
.replace(/```(\w*)\n([\s\S]*?)(```|$)/g, (m, lang, code) => {
const id = '<block'+blocks.length+'>';
const block = lang.length ? Syntaxhighlightish(code, lang) : code;
blocks.push('<pre><code class="shl-'+lang+'">'+block+'</code></pre>');
return id;
})

// Inline code
.replace(/`(.*?)`/g, (m, code) => {
const id = '<block'+blocks.length+'>';
blocks.push('<code class="inline">'+code+'</code>');
return id;
})

// Urls. These are often wrapped in <> angle brackets
.replace(/(\b|&lt;)((https?:\/\/(?:www\.)?|www\.)([^\s]+\([^\s]+[^!,.:\s]|[^(\s]+[^)!,.:\s]))/ig, (m, pre, url, httpwww, hostandpath) => {
const id = '<block'+blocks.length+'>';
if (httpwww === 'www.') {
url = 'http://' + url;
}
if (pre === '&lt;') {
hostandpath = hostandpath.replace(/&gt;$/g, '');
url = url.replace(/&gt;$/g, '');
}
blocks.push('<a href="'+url+'">'+hostandpath+'</a>');
return id;
})

// Headlines, emphasis and line breaks
.replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
.replace(/__(.*?)__/g, '<strong>$1</strong>')
.replace(/\*(.*?)\*/g, '<em>$1</em>')
.replace(/_(.*?)_/g, '<em>$1</em>')
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
.replace(/`(.*?)`/g, '<code>$1</code>')
.replace(/\n/gim, '<br />');
.replace(/(__|\*\*)(.*?)\1/g, '<strong>$2</strong>')
.replace(/(_|\*)(.*?)\1/g, '<em>$2</em>')
.replace(/\n/gim, '<br />')

// Paste the extracted blocks back in again
.replace(/<block(\d+)>/g, (m, index) => blocks[index]);
return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
};

Expand Down