Skip to content

Commit

Permalink
Better detection of CBOR
Browse files Browse the repository at this point in the history
CBOR has a special header that is optional, if exists, allows for exact detection. Also, since we know which formats we support in ES, we can support the object major type case.
closes elastic#7640
  • Loading branch information
kimchy committed Mar 6, 2015
1 parent 583c492 commit 860b31b
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 10 deletions.
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -234,28 +234,28 @@
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.4.2</version>
<version>2.4.4</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-smile</artifactId>
<version>2.4.2</version>
<version>2.4.4</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-yaml</artifactId>
<version>2.4.2</version>
<version>2.4.4</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-cbor</artifactId>
<version>2.4.2</version>
<version>2.4.4</version>
<scope>compile</scope>
</dependency>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,16 @@ public static XContentType xContentType(byte[] data) {
* Guesses the content type based on the provided input stream.
*/
public static XContentType xContentType(InputStream si) throws IOException {
int first = si.read();
if (first == -1) {
int iFirst = si.read();
if (iFirst == -1) {
return null;
}
int second = si.read();
if (second == -1) {
byte first = (byte) iFirst;
int iSecond = si.read();
if (iSecond == -1) {
return null;
}
byte second = (byte) iSecond;
if (first == SmileConstants.HEADER_BYTE_1 && second == SmileConstants.HEADER_BYTE_2) {
int third = si.read();
if (third == SmileConstants.HEADER_BYTE_3) {
Expand All @@ -231,9 +233,27 @@ public static XContentType xContentType(InputStream si) throws IOException {
return XContentType.YAML;
}
}
if (first == (CBORConstants.BYTE_OBJECT_INDEFINITE & 0xff)){
// CBOR logic similar to CBORFactory#hasCBORFormat
if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){
return XContentType.CBOR;
}
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first)) {
// Actually, specific "self-describe tag" is a very good indicator
int iThird = si.read();
if (iThird == -1) {
return null;
}
byte third = (byte) iThird;
if (first == (byte) 0xD9 && second == (byte) 0xD9 && third == (byte) 0xF7) {
return XContentType.CBOR;
}
}
// for small objects, some encoders just encode as major type object, we can safely
// say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) {
return XContentType.CBOR;
}

for (int i = 2; i < GUESS_HEADER_LENGTH; i++) {
int val = si.read();
if (val == -1) {
Expand Down Expand Up @@ -279,9 +299,23 @@ public static XContentType xContentType(BytesReference bytes) {
if (length > 2 && first == '-' && bytes.get(1) == '-' && bytes.get(2) == '-') {
return XContentType.YAML;
}
if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){
// CBOR logic similar to CBORFactory#hasCBORFormat
if (first == CBORConstants.BYTE_OBJECT_INDEFINITE && length > 1){
return XContentType.CBOR;
}
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first) && length > 2) {
// Actually, specific "self-describe tag" is a very good indicator
if (first == (byte) 0xD9 && bytes.get(1) == (byte) 0xD9 && bytes.get(2) == (byte) 0xF7) {
return XContentType.CBOR;
}
}
// for small objects, some encoders just encode as major type object, we can safely
// say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) {
return XContentType.CBOR;
}

// a last chance for JSON
for (int i = 0; i < length; i++) {
if (bytes.get(i) == '{') {
return XContentType.JSON;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.common.xcontent;

import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
import com.fasterxml.jackson.dataformat.smile.SmileConstants;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import org.elasticsearch.test.ElasticsearchTestCase;
Expand Down Expand Up @@ -69,4 +71,21 @@ private void testGuessType(XContentType type) throws IOException {
assertThat(XContentFactory.xContentType(builder.string()), equalTo(type));
}
}

public void testCBORBasedOnMajorObjectDetection() {
// for this {"foo"=> 5} perl encoder for example generates:
byte[] bytes = new byte[] {(byte) 0xA1, (byte) 0x43, (byte) 0x66, (byte) 6f, (byte) 6f, (byte) 5};
assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR));

// also make sure major type check doesn't collide with SMILE and JSON, just in case
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, SmileConstants.HEADER_BYTE_1), equalTo(false));
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '{'), equalTo(false));
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) ' '), equalTo(false));
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '-'), equalTo(false));
}

public void testCBORBasedOnMagicHeaderDetection() {
byte[] bytes = new byte[] {(byte) 0xd9, (byte) 0xd9, (byte) 0xf7};
assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR));
}
}

0 comments on commit 860b31b

Please sign in to comment.