From de855c50aa4435fa54510e299e10c1481ef59ea0 Mon Sep 17 00:00:00 2001 From: "John J. Aylward" Date: Sat, 19 Aug 2017 18:21:56 -0400 Subject: [PATCH] Fixes #361. * Removes unescape from the XML class calls * fixes bug with unescape method * moves unescape logic into the XMLTokener class for more consistency --- JSONML.java | 2 +- XML.java | 38 +++++--------------------------------- XMLTokener.java | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/JSONML.java b/JSONML.java index c1d50b351..be16693e2 100644 --- a/JSONML.java +++ b/JSONML.java @@ -174,7 +174,7 @@ private static Object parse( if (!(token instanceof String)) { throw x.syntaxError("Missing value"); } - newjo.accumulate(attribute, keepStrings ? XML.unescape((String)token) :XML.stringToValue((String)token)); + newjo.accumulate(attribute, keepStrings ? ((String)token) :XML.stringToValue((String)token)); token = null; } else { newjo.accumulate(attribute, ""); diff --git a/XML.java b/XML.java index 36f44c80e..faa5b65e1 100644 --- a/XML.java +++ b/XML.java @@ -141,7 +141,7 @@ public static String escape(String string) { if (mustEscape(cp)) { sb.append("&#x"); sb.append(Integer.toHexString(cp)); - sb.append(";"); + sb.append(';'); } else { sb.appendCodePoint(cp); } @@ -191,31 +191,7 @@ public static String unescape(String string) { final int semic = string.indexOf(';', i); if (semic > i) { final String entity = string.substring(i + 1, semic); - if (entity.charAt(0) == '#') { - int cp; - if (entity.charAt(1) == 'x') { - // hex encoded unicode - cp = Integer.parseInt(entity.substring(2), 16); - } else { - // decimal encoded unicode - cp = Integer.parseInt(entity.substring(1)); - } - sb.appendCodePoint(cp); - } else { - if ("quot".equalsIgnoreCase(entity)) { - sb.append('"'); - } else if ("amp".equalsIgnoreCase(entity)) { - sb.append('&'); - } else if ("apos".equalsIgnoreCase(entity)) { - sb.append('\''); - } else if ("lt".equalsIgnoreCase(entity)) { - sb.append('<'); - } else if ("gt".equalsIgnoreCase(entity)) { - sb.append('>'); - } else {// unsupported xml entity. leave encoded - sb.append('&').append(entity).append(';'); - } - } + sb.append(XMLTokener.unescapeEntity(entity)); // skip past the entity we just parsed. i += entity.length() + 1; } else { @@ -364,7 +340,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool throw x.syntaxError("Missing value"); } jsonobject.accumulate(string, - keepStrings ? unescape((String)token) : stringToValue((String) token)); + keepStrings ? ((String)token) : stringToValue((String) token)); token = null; } else { jsonobject.accumulate(string, ""); @@ -396,7 +372,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool string = (String) token; if (string.length() > 0) { jsonobject.accumulate("content", - keepStrings ? unescape(string) : stringToValue(string)); + keepStrings ? string : stringToValue(string)); } } else if (token == LT) { @@ -430,11 +406,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool * @return JSON value of this string or the string */ public static Object stringToValue(String string) { - Object ret = JSONObject.stringToValue(string); - if(ret instanceof String){ - return unescape((String)ret); - } - return ret; + return JSONObject.stringToValue(string); } /** diff --git a/XMLTokener.java b/XMLTokener.java index 1c5f2b59d..fb54da389 100644 --- a/XMLTokener.java +++ b/XMLTokener.java @@ -138,8 +138,37 @@ public Object nextEntity(char ampersand) throws JSONException { } } String string = sb.toString(); - Object object = entity.get(string); - return object != null ? object : ampersand + string + ";"; + return unescapeEntity(string); + } + + /** + * Unescapes an XML entity encoding; + * @param e entity (only the actual entity value, not the preceding & or ending ; + * @return + */ + static String unescapeEntity(String e) { + // validate + if (e == null || e.isEmpty()) { + return ""; + } + // if our entity is an encoded unicode point, parse it. + if (e.charAt(0) == '#') { + int cp; + if (e.charAt(1) == 'x') { + // hex encoded unicode + cp = Integer.parseInt(e.substring(2), 16); + } else { + // decimal encoded unicode + cp = Integer.parseInt(e.substring(1)); + } + return new String(new int[] {cp},0,1); + } + Character knownEntity = entity.get(e); + if(knownEntity==null) { + // we don't know the entity so keep it encoded + return '&' + e + ';'; + } + return knownEntity.toString(); }