Skip to content

Commit

Permalink
simplifies quotation in the comment parser
Browse files Browse the repository at this point in the history
This commit makes the comment parser more robust. Instead of trying to
support both kind of quotes (single and double) we now switch to the
double quotes as the only method to delimit a token that has non-wordy
symbols. A signle quote is no longer considered a special symbol and
should not be escaped.

For the future pioneers of comment parsing I added a debug option, that
will be passed to the underlying shlex lexer (where all the problems
usually happen). If `debug=3`, then it will print every single state.

In case even if this robust version will fail, I've added a final line
of defense - if a comment cannot be parsed the BAP comments View will
not fail, but will just ignore the failed comment (of course a
diagnostic message will be printed into the message box).
  • Loading branch information
ivg committed Feb 2, 2017
1 parent 07ad00c commit 17e6c67
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 17 deletions.
12 changes: 8 additions & 4 deletions plugins/bap/plugins/bap_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,14 @@ def run(self, arg):
for addr in ida.addresses():
comm = idaapi.get_cmt(addr, 0)
if comm:
parsed = bap_comment.parse(comm)
if parsed:
for (name, data) in parsed.items():
comms[(addr, name)] = data
try:
parsed = bap_comment.parse(comm)
if parsed:
for (name, data) in parsed.items():
comms[(addr, name)] = data
except:
idc.Message("BAP> failed to parse string {0}\n{1}".
format(comm, str(sys.exc_info()[1])))
comms = [(name, addr, data)
for ((addr, name), data) in comms.items()]
attrs = Attributes(comms)
Expand Down
19 changes: 8 additions & 11 deletions plugins/bap/utils/bap_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Basically, the comment string includes an arbitrary amount of
key=value pairs. If a value contains whitespaces, punctuation or any
non-word character, then it should be delimited with double quotes. If
a value contains quote character, then it should be escaped with the
a value contains a quote character, then it should be escaped with the
backslash character (the backslash character can escape
itself). Properties that doesn't have values (or basically has a
property of a unit type, so called boolean properties) are represented
Expand Down Expand Up @@ -96,15 +96,17 @@
WORDCHARS = ''.join(['-:', string.ascii_letters, string.digits])


def parse(comment):
def parse(comment, debug=0):
""" Parse comment string.
Returns a dictionary that maps properties to their values.
Raises SyntaxError if the comment is syntactically incorrect.
Returns None if comment doesn't start with the `BAP:` prefix.
"""
lexer = shlex(comment)
lexer = shlex(comment, posix=True)
lexer.wordchars = WORDCHARS
lexer.debug = debug
lexer.quotes = '"'
result = {}
key = ''
values = []
Expand Down Expand Up @@ -193,14 +195,9 @@ def quote(token):
>>> quote('hello, world')
'"hello, world"'
"""
if set(token) - set(WORDCHARS):
if "'" not in token:
return "'{}'".format(token)
elif '"' not in token:
return '"{}"'.format(token)
else: # we ran out of quotes, so we need
return "'{}'".format(''.join('\\'+c if c == "'" else c
for c in token))
if not token.startswith('"') and set(token) - set(WORDCHARS):
return '"{}"'.format(''.join('\\'+c if c == '"' else c
for c in token))
else:
return token

Expand Down
9 changes: 7 additions & 2 deletions tests/test_bap_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_dumps():
assert 'BAP:' in dumps({'hello': []})
assert dumps({'hello': ['cruel', 'world'], 'nice': [], 'thing': []}) == \
'BAP: nice,thing hello=cruel,world'
assert dumps({'hello': ["world\'"]}) == 'BAP: hello="world\'"'
assert dumps({'hello': ["world'"]}) == 'BAP: hello="world\'"'


def test_is_valid():
Expand All @@ -39,6 +39,11 @@ def test_roundup():


def test_quotation():
data = 'BAP: chars=\'{"a", "b", "c"}\''
data = 'BAP: chars="{\\\"a\\\", \\\"b\\\", \\\"c\\\"}"'
assert parse(data) == {'chars': ['{"a", "b", "c"}']}
assert parse(data) == parse(dumps(parse(data)))


def test_single_quote():
data = 'BAP: key="{can\\\'t do}"'
assert parse(data) == {'key': ["{can\\'t do}"]}

0 comments on commit 17e6c67

Please sign in to comment.