diff --git a/pom.xml b/pom.xml index 954f4d897e311..2f3b006a38853 100644 --- a/pom.xml +++ b/pom.xml @@ -234,28 +234,28 @@ com.fasterxml.jackson.core jackson-core - 2.4.2 + 2.4.4 compile com.fasterxml.jackson.dataformat jackson-dataformat-smile - 2.4.2 + 2.4.4 compile com.fasterxml.jackson.dataformat jackson-dataformat-yaml - 2.4.2 + 2.4.4 compile com.fasterxml.jackson.dataformat jackson-dataformat-cbor - 2.4.2 + 2.4.4 compile diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java b/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java index ca3e2f7bcd25b..2eb1b16c6bbd9 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java @@ -208,14 +208,16 @@ public static XContentType xContentType(byte[] data) { * Guesses the content type based on the provided input stream. */ public static XContentType xContentType(InputStream si) throws IOException { - int first = si.read(); - if (first == -1) { + int iFirst = si.read(); + if (iFirst == -1) { return null; } - int second = si.read(); - if (second == -1) { + byte first = (byte) iFirst; + int iSecond = si.read(); + if (iSecond == -1) { return null; } + byte second = (byte) iSecond; if (first == SmileConstants.HEADER_BYTE_1 && second == SmileConstants.HEADER_BYTE_2) { int third = si.read(); if (third == SmileConstants.HEADER_BYTE_3) { @@ -231,9 +233,27 @@ public static XContentType xContentType(InputStream si) throws IOException { return XContentType.YAML; } } - if (first == (CBORConstants.BYTE_OBJECT_INDEFINITE & 0xff)){ + // CBOR logic similar to CBORFactory#hasCBORFormat + if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){ + return XContentType.CBOR; + } + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first)) { + // Actually, specific "self-describe tag" is a very good indicator + int iThird = si.read(); + if (iThird == -1) { + return null; + } + byte third = (byte) iThird; + if (first == (byte) 0xD9 && second == (byte) 0xD9 && third == (byte) 0xF7) { + return XContentType.CBOR; + } + } + // for small objects, some encoders just encode as major type object, we can safely + // say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) { return XContentType.CBOR; } + for (int i = 2; i < GUESS_HEADER_LENGTH; i++) { int val = si.read(); if (val == -1) { @@ -279,9 +299,23 @@ public static XContentType xContentType(BytesReference bytes) { if (length > 2 && first == '-' && bytes.get(1) == '-' && bytes.get(2) == '-') { return XContentType.YAML; } - if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){ + // CBOR logic similar to CBORFactory#hasCBORFormat + if (first == CBORConstants.BYTE_OBJECT_INDEFINITE && length > 1){ return XContentType.CBOR; } + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first) && length > 2) { + // Actually, specific "self-describe tag" is a very good indicator + if (first == (byte) 0xD9 && bytes.get(1) == (byte) 0xD9 && bytes.get(2) == (byte) 0xF7) { + return XContentType.CBOR; + } + } + // for small objects, some encoders just encode as major type object, we can safely + // say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) { + return XContentType.CBOR; + } + + // a last chance for JSON for (int i = 0; i < length; i++) { if (bytes.get(i) == '{') { return XContentType.JSON; diff --git a/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java b/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java index e76293322535d..076c3e2629d0c 100644 --- a/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java +++ b/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.common.xcontent; +import com.fasterxml.jackson.dataformat.cbor.CBORConstants; +import com.fasterxml.jackson.dataformat.smile.SmileConstants; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.io.stream.BytesStreamInput; import org.elasticsearch.test.ElasticsearchTestCase; @@ -69,4 +71,21 @@ private void testGuessType(XContentType type) throws IOException { assertThat(XContentFactory.xContentType(builder.string()), equalTo(type)); } } + + public void testCBORBasedOnMajorObjectDetection() { + // for this {"foo"=> 5} perl encoder for example generates: + byte[] bytes = new byte[] {(byte) 0xA1, (byte) 0x43, (byte) 0x66, (byte) 6f, (byte) 6f, (byte) 5}; + assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR)); + + // also make sure major type check doesn't collide with SMILE and JSON, just in case + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, SmileConstants.HEADER_BYTE_1), equalTo(false)); + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '{'), equalTo(false)); + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) ' '), equalTo(false)); + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '-'), equalTo(false)); + } + + public void testCBORBasedOnMagicHeaderDetection() { + byte[] bytes = new byte[] {(byte) 0xd9, (byte) 0xd9, (byte) 0xf7}; + assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR)); + } }