Skip to content

Commit

Permalink
Fixes issue #130
Browse files Browse the repository at this point in the history
Adds support for OSC hyperlink sequences.
  • Loading branch information
hakonhagland committed Sep 1, 2021
1 parent 93beaaf commit 596f42c
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 27 deletions.
101 changes: 74 additions & 27 deletions ansi2html/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
\\usepackage{fancyvrb}
\\usepackage[usenames,dvipsnames]{xcolor}
%% \\definecolor{red-sd}{HTML}{7ed2d2}
%(hyperref)s
\\title{%(title)s}
\\fvset{commandchars=\\\\\\{\\}}
Expand Down Expand Up @@ -116,6 +116,7 @@

class _State:
def __init__(self):
self.inside_span = False
self.reset()

def reset(self):
Expand Down Expand Up @@ -220,15 +221,10 @@ def append_color_unless_default(
return css_classes


def linkify(line, latex_mode):
url_matcher = re.compile(
r"(((((https?|ftps?|gopher|telnet|nntp)://)|"
r"(mailto:|news:))(%[0-9A-Fa-f]{2}|[-()_.!~*"
r"\';/?:@&=+$,A-Za-z0-9])+)([).!\';/?:,][\s])?)"
)
if latex_mode:
return url_matcher.sub(r"\\url{\1}", line)
return url_matcher.sub(r'<a href="\1">\1</a>', line)
class OSC_Link:
def __init__(self, url, text):
self.url = url
self.text = text


def map_vt100_box_code(char):
Expand Down Expand Up @@ -282,6 +278,7 @@ def __init__(
self.scheme = scheme
self.title = title
self._attrs = None
self.hyperref = False

if inline:
self.styles = dict(
Expand All @@ -293,26 +290,55 @@ def __init__(

self.vt100_box_codes_prog = re.compile("\033\\(([B0])")
self.ansi_codes_prog = re.compile("\033\\[" "([\\d;]*)" "([a-zA-z])")
self.url_matcher = re.compile(
r"(((((https?|ftps?|gopher|telnet|nntp)://)|"
r"(mailto:|news:))(%[0-9A-Fa-f]{2}|[-()_.!~*"
r"\';/?:@&=+$,A-Za-z0-9])+)([).!\';/?:,][\s])?)"
)
self.osc_link_re = re.compile("\033\\]8;;(.*?)\007(.*?)\033\\]8;;\007")

def do_linkify(self, line):
if not isinstance(line, str):
return line # If line is an object, e.g. OSC_Link, it
# will be expanded to a string later
if self.latex:
return self.url_matcher.sub(r"\\url{\1}", line)
return self.url_matcher.sub(r'<a href="\1">\1</a>', line)

def handle_osc_links(self, part):
if self.latex:
self.hyperref = True
return """\\href{%s}{%s}""" % (part.url, part.text)
else:
return """<a href="%s">%s</a>""" % (part.url, part.text)

def apply_regex(self, ansi):
styles_used = set()
parts = self._apply_regex(ansi, styles_used)
parts = self._collapse_cursor(parts)
parts = list(parts)

if self.linkify:
parts = [linkify(part, self.latex) for part in parts]
def _check_links(parts):
for part in parts:
if isinstance(part, str):
if self.linkify:
yield self.do_linkify(part)
else:
yield part
elif isinstance(part, OSC_Link):
yield self.handle_osc_links(part)
else:
yield part

parts = list(_check_links(parts))
combined = "".join(parts)

if self.markup_lines and not self.latex:
combined = "\n".join(
[
"""<span id="line-%i">%s</span>""" % (i, line)
for i, line in enumerate(combined.split("\n"))
]
)

return combined, styles_used

def _apply_regex(self, ansi, styles_used):
Expand Down Expand Up @@ -347,9 +373,36 @@ def _vt100_box_drawing():
yield ansi[last_end:]

ansi = "".join(_vt100_box_drawing())
def _osc_link(ansi):
last_end = 0
for match in self.osc_link_re.finditer(ansi):
trailer = ansi[last_end : match.start()]
yield trailer
url = match.groups()[0]
text = match.groups()[1]
yield OSC_Link(url, text)
last_end = match.end()
yield ansi[last_end:]

state = _State()
inside_span = False
for part in _osc_link(ansi):
if isinstance(part, OSC_Link):
yield part
else:
if ((sys.version_info.major == 3 and sys.version_info.minor >= 3)
or sys.version_info.major > 3):
# yield from requires python >= 3.3
yield from self._handle_ansi_code(part, styles_used, state)
else:
for sub_part in self._handle_ansi_code(part, styles_used, state):
yield sub_part
if state.inside_span:
if self.latex:
yield "}"
else:
yield "</span>"

def _handle_ansi_code(self, ansi, styles_used, state):
last_end = 0 # the index of the last end of a code we've seen
for match in self.ansi_codes_prog.finditer(ansi):
yield ansi[last_end : match.start()]
Expand Down Expand Up @@ -385,8 +438,8 @@ def _vt100_box_drawing():
# Process reset marker, drop everything before
if last_null_index is not None:
params = params[last_null_index + 1 :]
if inside_span:
inside_span = False
if state.inside_span:
state.inside_span = False
if self.latex:
yield "}"
else:
Expand All @@ -412,12 +465,12 @@ def _vt100_box_drawing():
parameter = None
state.adjust(v, parameter=parameter)

if inside_span:
if state.inside_span:
if self.latex:
yield "}"
else:
yield "</span>"
inside_span = False
state.inside_span = False

css_classes = state.to_css_classes()
if not css_classes:
Expand All @@ -444,15 +497,8 @@ def _vt100_box_drawing():
yield "\\textcolor{%s}{" % " ".join(css_classes)
else:
yield '<span class="%s">' % " ".join(css_classes)
inside_span = True

state.inside_span = True
yield ansi[last_end:]
if inside_span:
if self.latex:
yield "}"
else:
yield "</span>"
inside_span = False

def _collapse_cursor(self, parts):
"""Act on any CursorMoveUp commands by deleting preceding tokens"""
Expand Down Expand Up @@ -523,6 +569,7 @@ def convert(self, ansi, full=True, ensure_trailing_newline=False):
"font_size": self.font_size,
"content": attrs["body"],
"output_encoding": self.output_encoding,
"hyperref" : "\\usepackage{hyperref}" if self.hyperref else ""
}

def produce_headers(self):
Expand Down
12 changes: 12 additions & 0 deletions tests/test_ansi2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@ def test_not_linkify(self):
html = Ansi2HTMLConverter().convert(ansi)
assert target not in html

def test_osc_link(self):
ansi = "[\x1b[01;35m\x1b[K\x1b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits\x07-Wtype-limits\x1b]8;;\x07\x1b[m\x1b[K]\n"
target = '[<span class="ansi1 ansi35"><a href="https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits">-Wtype-limits</a></span>]'
html = Ansi2HTMLConverter().convert(ansi)
assert target in html

def test_osc_link_latex(self):
ansi = "[\x1b[01;35m\x1b[K\x1b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits\x07-Wtype-limits\x1b]8;;\x07\x1b[m\x1b[K]\n"
target = '[\\textcolor{ansi1 ansi35}{\\href{https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits}{-Wtype-limits}}]'
html = Ansi2HTMLConverter(latex=True).convert(ansi)
assert target in html

def test_conversion(self):
for input_filename, expected_output_filename in (
("ansicolor.txt", "ansicolor.html"),
Expand Down

0 comments on commit 596f42c

Please sign in to comment.