Skip to content

Commit

Permalink
Update json_extract to produce canonicalized output
Browse files Browse the repository at this point in the history
  • Loading branch information
duxiao1212 committed Feb 24, 2025
1 parent abb3399 commit 01e3b68
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,24 @@
*/
package com.facebook.presto.operator.scalar;

import com.facebook.airlift.json.JsonObjectMapperProvider;
import com.facebook.presto.spi.PrestoException;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.io.SerializedString;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UncheckedIOException;

import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static com.facebook.presto.util.JsonUtil.createJsonGenerator;
import static com.facebook.presto.util.JsonUtil.createJsonParser;
import static com.fasterxml.jackson.core.JsonFactory.Feature.CANONICALIZE_FIELD_NAMES;
import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
Expand All @@ -38,6 +39,7 @@
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL;
import static com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS;
import static io.airlift.slice.Slices.utf8Slice;
import static java.util.Objects.requireNonNull;

Expand Down Expand Up @@ -121,6 +123,8 @@ public final class JsonExtract
private static final JsonFactory JSON_FACTORY = new JsonFactory()
.disable(CANONICALIZE_FIELD_NAMES);

private static final ObjectMapper SORTED_MAPPER = new JsonObjectMapperProvider().get().configure(ORDER_MAP_ENTRIES_BY_KEYS, true);

private JsonExtract() {}

public static <T> T extract(Slice jsonInput, JsonExtractor<T> jsonExtractor)
Expand Down Expand Up @@ -302,11 +306,12 @@ public Slice extract(JsonParser jsonParser)
if (!jsonParser.hasCurrentToken()) {
throw new JsonParseException(jsonParser, "Unexpected end of value");
}

DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE);
try (JsonGenerator jsonGenerator = createJsonGenerator(JSON_FACTORY, dynamicSliceOutput)) {
jsonGenerator.copyCurrentStructure(jsonParser);
}
// Write the JSON to output stream with sorted keys
SORTED_MAPPER.writeValue((OutputStream) dynamicSliceOutput, SORTED_MAPPER.readValue(jsonParser, Object.class));
// nextToken() returns null if the input is parsed correctly,
// but will throw an exception if there are trailing characters.
jsonParser.nextToken();
return dynamicSliceOutput.slice();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,47 @@ public void testJsonExtract()
assertInvalidFunction(format("JSON_EXTRACT('%s', '%s')", json, "$...invalid"), "Invalid JSON path: '$...invalid'");
}

@Test
public void testExtractJsonWithCanonicalOutput()
{
// Test with simple JSON object
String json = "{\"key_2\": 2, \"key_3\": 3, \"key_1\": 1}";
String path = "$";
String expected = "{\"key_1\":1,\"key_2\":2,\"key_3\":3}";
assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, expected);

// Test with nested JSON object
json = "{\"key_1\": {\"nested_key_2\": \"value_2\", \"nested_key_1\": \"value_1\"}, \"key_2\": 2}";
path = "$.key_1";
expected = "{\"nested_key_1\":\"value_1\",\"nested_key_2\":\"value_2\"}";
assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, expected);

// Test with Array of JSON objects
json = "[{\"key_b\":\"v_b\",\"key_a\":\"v_a\"}, {\"key_2\": \"value_2\"}]";
path = "$[0]";
expected = "{\"key_a\":\"v_a\",\"key_b\":\"v_b\"}";
assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, expected);
}

@Test
public void testParseNullIfJsonInvalid()
{
// Unbalanced quotes
String json = "{ \"key_2\": 2, \"key_1\": \"z\"a1\" }";
String path = "$.key_1";
assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, null);

// Missing colon
json = "{ \"key_2\" 2, \"key_1\": \"value_1\" }";
path = "$.key_2";
assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, null);

// Extra comma
json = "{ \"key_2\": 2, \"key_1\": \"value_1\", }";
path = "$.key_1";
assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, null);
}

@Test
public void testJsonSize()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,8 @@ public void testJsonToRow()

// invalid cast
assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP<bigint, ARRAY<ROW(key1 VARCHAR)>>)",
"Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: KEY1\n" +
"{\"1\":[{\"key1\":\"John\",\"KEY1\":\"Johnny\"}]}");
"Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: key1\n" +
"{\"1\":[{\"KEY1\":\"Johnny\",\"key1\":\"John\"}]}");
assertInvalidCast("CAST(unchecked_to_json('{\"a\":1,\"b\":2,\"a\":3}') AS ROW(a BIGINT, b BIGINT))", "Cannot cast to row(a bigint,b bigint). Duplicate field: a\n{\"a\":1,\"b\":2,\"a\":3}");
assertInvalidCast("CAST(unchecked_to_json('[{\"a\":1,\"b\":2,\"a\":3}]') AS ARRAY<ROW(a BIGINT, b BIGINT)>)", "Cannot cast to array(row(a bigint,b bigint)). Duplicate field: a\n[{\"a\":1,\"b\":2,\"a\":3}]");
}
Expand Down

0 comments on commit 01e3b68

Please sign in to comment.