Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes XML Unescaping #362

Merged
merged 1 commit into from
Aug 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion JSONML.java
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ private static Object parse(
if (!(token instanceof String)) {
throw x.syntaxError("Missing value");
}
newjo.accumulate(attribute, keepStrings ? XML.unescape((String)token) :XML.stringToValue((String)token));
newjo.accumulate(attribute, keepStrings ? ((String)token) :XML.stringToValue((String)token));
token = null;
} else {
newjo.accumulate(attribute, "");
Expand Down
38 changes: 5 additions & 33 deletions XML.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public static String escape(String string) {
if (mustEscape(cp)) {
sb.append("&#x");
sb.append(Integer.toHexString(cp));
sb.append(";");
sb.append(';');
} else {
sb.appendCodePoint(cp);
}
Expand Down Expand Up @@ -191,31 +191,7 @@ public static String unescape(String string) {
final int semic = string.indexOf(';', i);
if (semic > i) {
final String entity = string.substring(i + 1, semic);
if (entity.charAt(0) == '#') {
int cp;
if (entity.charAt(1) == 'x') {
// hex encoded unicode
cp = Integer.parseInt(entity.substring(2), 16);
} else {
// decimal encoded unicode
cp = Integer.parseInt(entity.substring(1));
}
sb.appendCodePoint(cp);
} else {
if ("quot".equalsIgnoreCase(entity)) {
sb.append('"');
} else if ("amp".equalsIgnoreCase(entity)) {
sb.append('&');
} else if ("apos".equalsIgnoreCase(entity)) {
sb.append('\'');
} else if ("lt".equalsIgnoreCase(entity)) {
sb.append('<');
} else if ("gt".equalsIgnoreCase(entity)) {
sb.append('>');
} else {// unsupported xml entity. leave encoded
sb.append('&').append(entity).append(';');
}
}
sb.append(XMLTokener.unescapeEntity(entity));
// skip past the entity we just parsed.
i += entity.length() + 1;
} else {
Expand Down Expand Up @@ -364,7 +340,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
throw x.syntaxError("Missing value");
}
jsonobject.accumulate(string,
keepStrings ? unescape((String)token) : stringToValue((String) token));
keepStrings ? ((String)token) : stringToValue((String) token));
token = null;
} else {
jsonobject.accumulate(string, "");
Expand Down Expand Up @@ -396,7 +372,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
string = (String) token;
if (string.length() > 0) {
jsonobject.accumulate("content",
keepStrings ? unescape(string) : stringToValue(string));
keepStrings ? string : stringToValue(string));
}

} else if (token == LT) {
Expand Down Expand Up @@ -430,11 +406,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
* @return JSON value of this string or the string
*/
public static Object stringToValue(String string) {
Object ret = JSONObject.stringToValue(string);
if(ret instanceof String){
return unescape((String)ret);
}
return ret;
return JSONObject.stringToValue(string);
}

/**
Expand Down
33 changes: 31 additions & 2 deletions XMLTokener.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,37 @@ public Object nextEntity(char ampersand) throws JSONException {
}
}
String string = sb.toString();
Object object = entity.get(string);
return object != null ? object : ampersand + string + ";";
return unescapeEntity(string);
}

/**
* Unescapes an XML entity encoding;
* @param e entity (only the actual entity value, not the preceding & or ending ;
* @return
*/
static String unescapeEntity(String e) {
// validate
if (e == null || e.isEmpty()) {
return "";
}
// if our entity is an encoded unicode point, parse it.
if (e.charAt(0) == '#') {
int cp;
if (e.charAt(1) == 'x') {
// hex encoded unicode
cp = Integer.parseInt(e.substring(2), 16);
} else {
// decimal encoded unicode
cp = Integer.parseInt(e.substring(1));
}
return new String(new int[] {cp},0,1);
}
Character knownEntity = entity.get(e);
if(knownEntity==null) {
// we don't know the entity so keep it encoded
return '&' + e + ';';
}
return knownEntity.toString();
}


Expand Down