Skip to content

Commit

Permalink
Re-design of ProgressiveStringDecoder to utilize CharsetDecoder API
Browse files Browse the repository at this point in the history
  • Loading branch information
dryganets committed Jan 11, 2018
1 parent c319a85 commit 4a6510e
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 135 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -413,42 +413,21 @@ private void readWithProgress(
Charset charset = responseBody.contentType() == null ? StandardCharsets.UTF_8 :
responseBody.contentType().charset(StandardCharsets.UTF_8);

if (StandardCharsets.UTF_8.equals(charset)) {
ProgressiveUTF8StreamDecoder streamDecoder = new ProgressiveUTF8StreamDecoder();
InputStream inputStream = responseBody.byteStream();
try {
byte[] buffer = new byte[MAX_CHUNK_SIZE_BETWEEN_FLUSHES];
int read;
while ((read = inputStream.read(buffer)) != -1) {
ResponseUtil.onIncrementalDataReceived(
eventEmitter,
requestId,
streamDecoder.decodeNext(buffer, read),
totalBytesRead,
contentLength);
}
} finally {
inputStream.close();
}
} else {
// TODO: in UTF-16 some symbols took 4 bytes or 2 chars (HIGH and LOW surrogates)
// Ideally we need to take care of this but it's way more complex task as it involves handling
// of Byte Order Mark and little/big endian of UTF-16. Let's keep it in sync with iOS for now.
Reader reader = responseBody.charStream();
try {
char[] buffer = new char[MAX_CHUNK_SIZE_BETWEEN_FLUSHES];
int read;
while ((read = reader.read(buffer)) != -1) {
ResponseUtil.onIncrementalDataReceived(
eventEmitter,
requestId,
new String(buffer, 0, read),
totalBytesRead,
contentLength);
}
} finally {
reader.close();
ProgressiveStringDecoder streamDecoder = new ProgressiveStringDecoder(charset);
InputStream inputStream = responseBody.byteStream();
try {
byte[] buffer = new byte[MAX_CHUNK_SIZE_BETWEEN_FLUSHES];
int read;
while ((read = inputStream.read(buffer)) != -1) {
ResponseUtil.onIncrementalDataReceived(
eventEmitter,
requestId,
streamDecoder.decodeNext(buffer, read),
totalBytesRead,
contentLength);
}
} finally {
inputStream.close();
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
package com.facebook.react.modules.network;

import com.facebook.common.logging.FLog;
import com.facebook.react.common.ReactConstants;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;

/**
* Class to decode encoded strings from byte array chunks.
* As in different encodings single character could take up to 4 characters byte array passed to
* decode could have parts of the characters which can't be correctly decoded.
*
* This class is designed in assumption that original byte stream is correctly formatted string in
* given encoding. Otherwise some parts of the data won't be decoded.
*
*/
public class ProgressiveStringDecoder {

private static final String EMPTY_STRING = "";

private final CharsetDecoder mDecoder;

private byte[] remainder = null;

/**
* @param charset expected charset of the data
*/
public ProgressiveStringDecoder(Charset charset) {
mDecoder = charset.newDecoder();
}

/**
* Parses data to String
* If there is a partial multi-byte symbol on the edge of the String it get saved to the
* reminder and added to the string on the decodeNext call.
* @param data
* @return
*/
public String decodeNext(byte[] data, int length) {
byte[] decodeData;

if (remainder != null) {
decodeData = new byte[remainder.length + length];
System.arraycopy(remainder, 0, decodeData, 0, remainder.length);
System.arraycopy(data, 0, decodeData, remainder.length, length);
length += remainder.length;
} else {
decodeData = data;
}

ByteBuffer decodeBuffer = ByteBuffer.wrap(decodeData, 0, length);
CharBuffer result = null;
boolean decoded = false;
int remainderLenght = 0;
while (!decoded && (remainderLenght < 4)) {
try {
result = mDecoder.decode(decodeBuffer);
decoded = true;
} catch (CharacterCodingException e) {
remainderLenght++;
decodeBuffer = ByteBuffer.wrap(decodeData, 0, length - remainderLenght);
}
}
boolean hasRemainder = decoded && remainderLenght > 0;
if (hasRemainder) {
remainder = new byte[remainderLenght];
System.arraycopy(decodeData, length - remainderLenght, remainder, 0, remainderLenght);
} else {
remainder = null;
}

if (!decoded) {
FLog.w(ReactConstants.TAG, "failed to decode string from byte array");
return EMPTY_STRING;
} else {
return new String(result.array(), 0, result.length());
}
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
*/
package com.facebook.react.modules.network;

import com.facebook.react.common.StandardCharsets;

import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand All @@ -17,7 +19,7 @@


@RunWith(RobolectricTestRunner.class)
public class ProgressiveUTF8StreamDecoderTest {
public class ProgressiveStringDecoderTest {

private static String TEST_DATA_1_BYTE = "Lorem ipsum dolor sit amet, ea ius viris laoreet gloriatur, ea enim illud mel. Ea eligendi erroribus inciderint sea, id nemore sensibus contentiones qui. Eos et nulla abhorreant, noluisse adipiscing reprehendunt an sit. Harum iriure meliore ne nec, clita semper voluptaria at sea. Ius civibus vituperata reprehendunt ut.\n" +
"\n" +
Expand Down Expand Up @@ -71,30 +73,58 @@ public class ProgressiveUTF8StreamDecoderTest {
"\uD800\uDE80\uD800\uDE80\uD800\uDE80";

@Test
public void testUnicode1Byte() {
chunkString(TEST_DATA_1_BYTE, 64);
public void testUTF8SingleByteSymbols() {
chunkString(TEST_DATA_1_BYTE, StandardCharsets.UTF_8, 64);
}

@Test
public void testUTF8twoBytesSymbols() {
chunkString(TEST_DATA_2_BYTES, StandardCharsets.UTF_8, 63);
}

@Test
public void testUTF8ThreeBytesSymbols() throws Exception {
chunkString(TEST_DATA_3_BYTES, StandardCharsets.UTF_8, 64);
}

@Test
public void testUTF8FourBytesSymbols() throws Exception {
chunkString(TEST_DATA_4_BYTES, StandardCharsets.UTF_8, 111);
}

@Test
public void testUTF16LEStandard() throws Exception {
chunkString(TEST_DATA_3_BYTES, StandardCharsets.UTF_16LE, 47);
}

@Test
public void testUTF16LESurrogates() throws Exception {
// 4 bytes UTF-8 symbols are encoded as two 2 byte surrogate symbols in UTF-16
chunkString(TEST_DATA_4_BYTES, StandardCharsets.UTF_16LE, 47);
}

@Test
public void testUnicode2Bytes() {
chunkString(TEST_DATA_2_BYTES, 63);
public void testUTF16BEStandard() throws Exception {
chunkString(TEST_DATA_3_BYTES, StandardCharsets.UTF_16BE, 47);
}

@Test
public void testUnicode3Bytes() throws Exception {
chunkString(TEST_DATA_3_BYTES, 64);
public void testUTF16BESurrogates() throws Exception {
// 4 bytes UTF-8 symbols are encoded as two 2 byte surrogate symbols in UTF-16
chunkString(TEST_DATA_4_BYTES, StandardCharsets.UTF_16BE, 47);
}

@Test
public void testUnicode4Bytes() throws Exception {
chunkString(TEST_DATA_4_BYTES, 111);
public void testUTF32() throws Exception {
// UTF-32 data symbols always 4 bytes
chunkString(TEST_DATA_4_BYTES, Charset.forName("UTF-32"), 65);
}

private void chunkString(String originalString, int chunkSize) {
byte data [] = originalString.getBytes(Charset.forName("UTF-8"));
private void chunkString(String originalString, Charset charset, int chunkSize) {
byte data [] = originalString.getBytes(charset);

StringBuilder builder = new StringBuilder();
ProgressiveUTF8StreamDecoder collector = new ProgressiveUTF8StreamDecoder();
ProgressiveStringDecoder collector = new ProgressiveStringDecoder(charset);
byte[] buffer = new byte[chunkSize];
for (int i = 0; i < data.length; i+= chunkSize) {
int bytesRead = Math.min(chunkSize, data.length - i);
Expand Down

0 comments on commit 4a6510e

Please sign in to comment.