From 42bf58bb137992b876be37f8b2e683c49bc2abed Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 3 Sep 2019 22:04:32 +0200 Subject: [PATCH] rust-demangle.c (unescape): Remove. * rust-demangle.c (unescape): Remove. (parse_lower_hex_nibble): New function. (parse_legacy_escape): New function. (is_prefixed_hash): Use parse_lower_hex_nibble. (looks_like_rust): Use parse_legacy_escape. (rust_demangle_sym): Use parse_legacy_escape. * testsuite/rust-demangle-expected: Add 'llv$u6d$' test. From-SVN: r275353 --- libiberty/ChangeLog | 10 + libiberty/rust-demangle.c | 281 ++++++++++----------- libiberty/testsuite/rust-demangle-expected | 4 + 3 files changed, 148 insertions(+), 147 deletions(-) diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog index cf101e7f8a12f..712c4fe55493d 100644 --- a/libiberty/ChangeLog +++ b/libiberty/ChangeLog @@ -1,3 +1,13 @@ +2019-09-03 Eduard-Mihai Burtescu + + * rust-demangle.c (unescape): Remove. + (parse_lower_hex_nibble): New function. + (parse_legacy_escape): New function. + (is_prefixed_hash): Use parse_lower_hex_nibble. + (looks_like_rust): Use parse_legacy_escape. + (rust_demangle_sym): Use parse_legacy_escape. + * testsuite/rust-demangle-expected: Add 'llv$u6d$' test. + 2019-08-27 Martin Liska PR lto/91478 diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c index 2302db45b6fbc..da591902db11c 100644 --- a/libiberty/rust-demangle.c +++ b/libiberty/rust-demangle.c @@ -50,7 +50,7 @@ extern void *memset(void *s, int c, size_t n); #include "rust-demangle.h" -/* Mangled Rust symbols look like this: +/* Mangled (legacy) Rust symbols look like this: _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a The original symbol is: @@ -74,16 +74,7 @@ extern void *memset(void *s, int c, size_t n); ">" => $GT$ "(" => $LP$ ")" => $RP$ - " " => $u20$ - "\"" => $u22$ - "'" => $u27$ - "+" => $u2b$ - ";" => $u3b$ - "[" => $u5b$ - "]" => $u5d$ - "{" => $u7b$ - "}" => $u7d$ - "~" => $u7e$ + "\u{XY}" => $uXY$ A double ".." means "::" and a single "." means "-". @@ -95,7 +86,8 @@ static const size_t hash_len = 16; static int is_prefixed_hash (const char *start); static int looks_like_rust (const char *sym, size_t len); -static int unescape (const char **in, char **out, const char *seq, char value); +static int parse_lower_hex_nibble (char nibble); +static char parse_legacy_escape (const char **in); /* INPUT: sym: symbol that has been through C++ (gnu v3) demangling @@ -149,7 +141,7 @@ is_prefixed_hash (const char *str) const char *end; char seen[16]; size_t i; - int count; + int count, nibble; if (strncmp (str, hash_prefix, hash_prefix_len)) return 0; @@ -157,12 +149,12 @@ is_prefixed_hash (const char *str) memset (seen, 0, sizeof(seen)); for (end = str + hash_len; str < end; str++) - if (*str >= '0' && *str <= '9') - seen[*str - '0'] = 1; - else if (*str >= 'a' && *str <= 'f') - seen[*str - 'a' + 10] = 1; - else - return 0; + { + nibble = parse_lower_hex_nibble (*str); + if (nibble < 0) + return 0; + seen[nibble] = 1; + } /* Count how many distinct digits seen */ count = 0; @@ -179,57 +171,17 @@ looks_like_rust (const char *str, size_t len) const char *end = str + len; while (str < end) - switch (*str) - { - case '$': - if (!strncmp (str, "$C$", 3)) - str += 3; - else if (!strncmp (str, "$SP$", 4) - || !strncmp (str, "$BP$", 4) - || !strncmp (str, "$RF$", 4) - || !strncmp (str, "$LT$", 4) - || !strncmp (str, "$GT$", 4) - || !strncmp (str, "$LP$", 4) - || !strncmp (str, "$RP$", 4)) - str += 4; - else if (!strncmp (str, "$u20$", 5) - || !strncmp (str, "$u22$", 5) - || !strncmp (str, "$u27$", 5) - || !strncmp (str, "$u2b$", 5) - || !strncmp (str, "$u3b$", 5) - || !strncmp (str, "$u5b$", 5) - || !strncmp (str, "$u5d$", 5) - || !strncmp (str, "$u7b$", 5) - || !strncmp (str, "$u7d$", 5) - || !strncmp (str, "$u7e$", 5)) - str += 5; - else - return 0; - break; - case '.': - /* Do not allow three or more consecutive dots */ - if (!strncmp (str, "...", 3)) - return 0; - /* Fall through */ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case '_': - case ':': - str++; - break; - default: - return 0; - } + { + if (*str == '$') + { + if (!parse_legacy_escape (&str)) + return 0; + } + else if (*str == '.' || *str == '_' || *str == ':' || ISALNUM (*str)) + str++; + else + return 0; + } return 1; } @@ -246,6 +198,7 @@ rust_demangle_sym (char *sym) const char *in; char *out; const char *end; + char unescaped; if (!sym) return; @@ -255,75 +208,49 @@ rust_demangle_sym (char *sym) end = sym + strlen (sym) - (hash_prefix_len + hash_len); while (in < end) - switch (*in) - { - case '$': - if (!(unescape (&in, &out, "$C$", ',') - || unescape (&in, &out, "$SP$", '@') - || unescape (&in, &out, "$BP$", '*') - || unescape (&in, &out, "$RF$", '&') - || unescape (&in, &out, "$LT$", '<') - || unescape (&in, &out, "$GT$", '>') - || unescape (&in, &out, "$LP$", '(') - || unescape (&in, &out, "$RP$", ')') - || unescape (&in, &out, "$u20$", ' ') - || unescape (&in, &out, "$u22$", '\"') - || unescape (&in, &out, "$u27$", '\'') - || unescape (&in, &out, "$u2b$", '+') - || unescape (&in, &out, "$u3b$", ';') - || unescape (&in, &out, "$u5b$", '[') - || unescape (&in, &out, "$u5d$", ']') - || unescape (&in, &out, "$u7b$", '{') - || unescape (&in, &out, "$u7d$", '}') - || unescape (&in, &out, "$u7e$", '~'))) { - /* unexpected escape sequence, not looks_like_rust. */ - goto fail; - } - break; - case '_': - /* If this is the start of a path component and the next - character is an escape sequence, ignore the underscore. The - mangler inserts an underscore to make sure the path - component begins with a XID_Start character. */ - if ((in == sym || in[-1] == ':') && in[1] == '$') - in++; - else - *out++ = *in++; - break; - case '.': - if (in[1] == '.') - { - /* ".." becomes "::" */ - *out++ = ':'; - *out++ = ':'; - in += 2; - } - else - { - /* "." becomes "-" */ - *out++ = '-'; - in++; - } - break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case ':': - *out++ = *in++; - break; - default: - /* unexpected character in symbol, not looks_like_rust. */ - goto fail; - } + { + if (*in == '$') + { + unescaped = parse_legacy_escape (&in); + if (unescaped) + *out++ = unescaped; + else + /* unexpected escape sequence, not looks_like_rust. */ + goto fail; + } + else if (*in == '_') + { + /* If this is the start of a path component and the next + character is an escape sequence, ignore the underscore. The + mangler inserts an underscore to make sure the path + component begins with a XID_Start character. */ + if ((in == sym || in[-1] == ':') && in[1] == '$') + in++; + else + *out++ = *in++; + } + else if (*in == '.') + { + if (in[1] == '.') + { + /* ".." becomes "::" */ + *out++ = ':'; + *out++ = ':'; + in += 2; + } + else + { + /* "." becomes "-" */ + *out++ = '-'; + in++; + } + } + else if (*in == ':' || ISALNUM (*in)) + *out++ = *in++; + else + /* unexpected character in symbol, not looks_like_rust. */ + goto fail; + } goto done; fail: @@ -332,18 +259,78 @@ rust_demangle_sym (char *sym) *out = '\0'; } +/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ static int -unescape (const char **in, char **out, const char *seq, char value) +parse_lower_hex_nibble (char nibble) { - size_t len = strlen (seq); + if ('0' <= nibble && nibble <= '9') + return nibble - '0'; + if ('a' <= nibble && nibble <= 'f') + return 0xa + (nibble - 'a'); + return -1; +} - if (strncmp (*in, seq, len)) - return 0; +/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */ +static char +parse_legacy_escape (const char **in) +{ + char c = 0; + const char *e; + size_t escape_len = 0; + int lo_nibble = -1, hi_nibble = -1; - **out = value; + if ((*in)[0] != '$') + return 0; - *in += len; - *out += 1; + e = *in + 1; + + if (e[0] == 'C') + { + escape_len = 1; + + c = ','; + } + else + { + escape_len = 2; + + if (e[0] == 'S' && e[1] == 'P') + c = '@'; + else if (e[0] == 'B' && e[1] == 'P') + c = '*'; + else if (e[0] == 'R' && e[1] == 'F') + c = '&'; + else if (e[0] == 'L' && e[1] == 'T') + c = '<'; + else if (e[0] == 'G' && e[1] == 'T') + c = '>'; + else if (e[0] == 'L' && e[1] == 'P') + c = '('; + else if (e[0] == 'R' && e[1] == 'P') + c = ')'; + else if (e[0] == 'u') + { + escape_len = 3; + + hi_nibble = parse_lower_hex_nibble (e[1]); + if (hi_nibble < 0) + return 0; + lo_nibble = parse_lower_hex_nibble (e[2]); + if (lo_nibble < 0) + return 0; + + /* Only allow non-control ASCII characters. */ + if (hi_nibble > 7) + return 0; + c = (hi_nibble << 4) | lo_nibble; + if (c < 0x20) + return 0; + } + } + + if (!c || e[escape_len] != '$') + return 0; - return 1; + *in += 2 + escape_len; + return c; } diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected index 0b4288fc37d70..c3b03f9f02d01 100644 --- a/libiberty/testsuite/rust-demangle-expected +++ b/libiberty/testsuite/rust-demangle-expected @@ -159,3 +159,7 @@ _ZN68_$LT$core..nonzero..NonZero$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref1 --format=rust _ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f2ad4920655e85E as core::ops::Deref>::deref +# +--format=rust +_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE +issue_60925::foo::Foo::foo