Skip to content

Commit

Permalink
gguf_dump.py: fix markddown kv array print (#8588)
Browse files Browse the repository at this point in the history
* gguf_dump.py: fix markddown kv array print

* Update gguf-py/scripts/gguf_dump.py

Co-authored-by: compilade <git@compilade.net>

* gguf_dump.py: refactor kv array string handling

* gguf_dump.py: escape backticks inside of strings

* gguf_dump.py: inline code markdown escape handler added

>>> escape_markdown_inline_code("hello world")
'`hello world`'
>>> escape_markdown_inline_code("hello ` world")
'``hello ` world``'

* gguf_dump.py: handle edge case about backticks on start or end of a string

---------

Co-authored-by: compilade <git@compilade.net>
  • Loading branch information
mofosyne and compilade authored Jul 20, 2024
1 parent 87e397d commit c3776ca
Showing 1 changed file with 38 additions and 5 deletions.
43 changes: 38 additions & 5 deletions gguf-py/scripts/gguf_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import argparse
import os
import re
import sys
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -244,26 +245,58 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
else:
pretty_type = str(field.types[-1].name)

def escape_markdown_inline_code(value_string):
# Find the longest contiguous sequence of backticks in the string then
# wrap string with appropriate number of backticks required to escape it
max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
inline_code_marker = '`' * (max_backticks + 1)

# If the string starts or ends with a backtick, add a space at the beginning and end
if value_string.startswith('`') or value_string.endswith('`'):
value_string = f" {value_string} "

return f"{inline_code_marker}{value_string}{inline_code_marker}"

total_elements = len(field.data)
value = ""
if len(field.types) == 1:
curr_type = field.types[0]
if curr_type == GGUFValueType.STRING:
value = repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60])
truncate_length = 60
value_string = str(bytes(field.parts[-1]), encoding='utf-8')
if len(value_string) > truncate_length:
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
value = "{head}...{tail}".format(head=head, tail=tail)
else:
value = escape_markdown_inline_code(value_string)
elif curr_type in reader.gguf_scalar_to_np:
value = str(field.parts[-1][0])
else:
if field.types[0] == GGUFValueType.ARRAY:
curr_type = field.types[1]
array_elements = []

if curr_type == GGUFValueType.STRING:
render_element = min(5, total_elements)
for element_pos in range(render_element):
value += repr(str(bytes(field.parts[-1 - element_pos]), encoding='utf-8')[:5]) + (", " if total_elements > 1 else "")
truncate_length = 30
value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
if len(value_string) > truncate_length:
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
value = "{head}...{tail}".format(head=head, tail=tail)
else:
value = escape_markdown_inline_code(value_string)
array_elements.append(value)

elif curr_type in reader.gguf_scalar_to_np:
render_element = min(7, total_elements)
for element_pos in range(render_element):
value += str(field.parts[-1 - element_pos][0]) + (", " if total_elements > 1 else "")
value = f'[ {value}{" ..." if total_elements > 1 else ""} ]'
array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))

value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'

kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})

kv_dump_table_header_map = [
Expand Down Expand Up @@ -382,7 +415,7 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
markdown_content += "\n\n"

print(markdown_content) # noqa: NP100
print(markdown_content) # noqa: NP100


def main() -> None:
Expand Down

0 comments on commit c3776ca

Please sign in to comment.