Skip to content

Commit

Permalink
Merge pull request #27 from ivg/robust-comment-parser
Browse files Browse the repository at this point in the history
simplifies quotation in the comment parser
  • Loading branch information
ivg authored Feb 2, 2017
2 parents 07ad00c + 17e6c67 commit f9a2255
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 17 deletions.
12 changes: 8 additions & 4 deletions plugins/bap/plugins/bap_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,14 @@ def run(self, arg):
for addr in ida.addresses():
comm = idaapi.get_cmt(addr, 0)
if comm:
parsed = bap_comment.parse(comm)
if parsed:
for (name, data) in parsed.items():
comms[(addr, name)] = data
try:
parsed = bap_comment.parse(comm)
if parsed:
for (name, data) in parsed.items():
comms[(addr, name)] = data
except:
idc.Message("BAP> failed to parse string {0}\n{1}".
format(comm, str(sys.exc_info()[1])))
comms = [(name, addr, data)
for ((addr, name), data) in comms.items()]
attrs = Attributes(comms)
Expand Down
19 changes: 8 additions & 11 deletions plugins/bap/utils/bap_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Basically, the comment string includes an arbitrary amount of
key=value pairs. If a value contains whitespaces, punctuation or any
non-word character, then it should be delimited with double quotes. If
a value contains quote character, then it should be escaped with the
a value contains a quote character, then it should be escaped with the
backslash character (the backslash character can escape
itself). Properties that doesn't have values (or basically has a
property of a unit type, so called boolean properties) are represented
Expand Down Expand Up @@ -96,15 +96,17 @@
WORDCHARS = ''.join(['-:', string.ascii_letters, string.digits])


def parse(comment):
def parse(comment, debug=0):
""" Parse comment string.
Returns a dictionary that maps properties to their values.
Raises SyntaxError if the comment is syntactically incorrect.
Returns None if comment doesn't start with the `BAP:` prefix.
"""
lexer = shlex(comment)
lexer = shlex(comment, posix=True)
lexer.wordchars = WORDCHARS
lexer.debug = debug
lexer.quotes = '"'
result = {}
key = ''
values = []
Expand Down Expand Up @@ -193,14 +195,9 @@ def quote(token):
>>> quote('hello, world')
'"hello, world"'
"""
if set(token) - set(WORDCHARS):
if "'" not in token:
return "'{}'".format(token)
elif '"' not in token:
return '"{}"'.format(token)
else: # we ran out of quotes, so we need
return "'{}'".format(''.join('\\'+c if c == "'" else c
for c in token))
if not token.startswith('"') and set(token) - set(WORDCHARS):
return '"{}"'.format(''.join('\\'+c if c == '"' else c
for c in token))
else:
return token

Expand Down
9 changes: 7 additions & 2 deletions tests/test_bap_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_dumps():
assert 'BAP:' in dumps({'hello': []})
assert dumps({'hello': ['cruel', 'world'], 'nice': [], 'thing': []}) == \
'BAP: nice,thing hello=cruel,world'
assert dumps({'hello': ["world\'"]}) == 'BAP: hello="world\'"'
assert dumps({'hello': ["world'"]}) == 'BAP: hello="world\'"'


def test_is_valid():
Expand All @@ -39,6 +39,11 @@ def test_roundup():


def test_quotation():
data = 'BAP: chars=\'{"a", "b", "c"}\''
data = 'BAP: chars="{\\\"a\\\", \\\"b\\\", \\\"c\\\"}"'
assert parse(data) == {'chars': ['{"a", "b", "c"}']}
assert parse(data) == parse(dumps(parse(data)))


def test_single_quote():
data = 'BAP: key="{can\\\'t do}"'
assert parse(data) == {'key': ["{can\\'t do}"]}

0 comments on commit f9a2255

Please sign in to comment.