Re-design of ProgressiveStringDecoder to utilize CharsetDecoder API

facebook · Jan 11, 2018 · 4a6510e · 4a6510e
1 parent c319a85
commit 4a6510e
Show file tree

Hide file tree

Showing 4 changed files with 147 additions and 135 deletions.
diff --git a/ReactAndroid/src/main/java/com/facebook/react/modules/network/NetworkingModule.java b/ReactAndroid/src/main/java/com/facebook/react/modules/network/NetworkingModule.java
@@ -413,42 +413,21 @@ private void readWithProgress(
     Charset charset = responseBody.contentType() == null ? StandardCharsets.UTF_8 :
       responseBody.contentType().charset(StandardCharsets.UTF_8);
 
-    if (StandardCharsets.UTF_8.equals(charset)) {
-      ProgressiveUTF8StreamDecoder streamDecoder = new ProgressiveUTF8StreamDecoder();
-      InputStream inputStream = responseBody.byteStream();
-      try {
-        byte[] buffer = new byte[MAX_CHUNK_SIZE_BETWEEN_FLUSHES];
-        int read;
-        while ((read = inputStream.read(buffer)) != -1) {
-          ResponseUtil.onIncrementalDataReceived(
-            eventEmitter,
-            requestId,
-            streamDecoder.decodeNext(buffer, read),
-            totalBytesRead,
-            contentLength);
-        }
-      } finally {
-        inputStream.close();
-      }
-    } else {
-      // TODO: in UTF-16 some symbols took 4 bytes or 2 chars (HIGH and LOW surrogates)
-      // Ideally we need to take care of this but it's way more complex task as it involves handling
-      // of Byte Order Mark and little/big endian of UTF-16. Let's keep it in sync with iOS for now.
-      Reader reader = responseBody.charStream();
-      try {
-        char[] buffer = new char[MAX_CHUNK_SIZE_BETWEEN_FLUSHES];
-        int read;
-        while ((read = reader.read(buffer)) != -1) {
-          ResponseUtil.onIncrementalDataReceived(
-            eventEmitter,
-            requestId,
-            new String(buffer, 0, read),
-            totalBytesRead,
-            contentLength);
-        }
-      } finally {
-        reader.close();
+    ProgressiveStringDecoder streamDecoder = new ProgressiveStringDecoder(charset);
+    InputStream inputStream = responseBody.byteStream();
+    try {
+      byte[] buffer = new byte[MAX_CHUNK_SIZE_BETWEEN_FLUSHES];
+      int read;
+      while ((read = inputStream.read(buffer)) != -1) {
+        ResponseUtil.onIncrementalDataReceived(
+          eventEmitter,
+          requestId,
+          streamDecoder.decodeNext(buffer, read),
+          totalBytesRead,
+          contentLength);
       }
+    } finally {
+      inputStream.close();
     }
   }
 

diff --git a/ReactAndroid/src/main/java/com/facebook/react/modules/network/ProgressiveStringDecoder.java b/ReactAndroid/src/main/java/com/facebook/react/modules/network/ProgressiveStringDecoder.java
@@ -0,0 +1,91 @@
+/**
+* Copyright (c) 2017-present, Facebook, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD-style license found in the
+* LICENSE file in the root directory of this source tree. An additional grant
+* of patent rights can be found in the PATENTS file in the same directory.
+*/
+package com.facebook.react.modules.network;
+
+import com.facebook.common.logging.FLog;
+import com.facebook.react.common.ReactConstants;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+
+/**
+ * Class to decode encoded strings from byte array chunks.
+ * As in different encodings single character could take up to 4 characters byte array passed to
+ * decode could have parts of the characters which can't be correctly decoded.
+ *
+ * This class is designed in assumption that original byte stream is correctly formatted string in
+ * given encoding. Otherwise some parts of the data won't be decoded.
+ *
+ */
+public class ProgressiveStringDecoder {
+
+  private static final String EMPTY_STRING = "";
+
+  private final CharsetDecoder mDecoder;
+
+  private byte[] remainder = null;
+
+  /**
+   * @param charset expected charset of the data
+   */
+  public ProgressiveStringDecoder(Charset charset) {
+    mDecoder = charset.newDecoder();
+  }
+
+  /**
+   * Parses data to String
+   * If there is a partial multi-byte symbol on the edge of the String it get saved to the
+   * reminder and added to the string on the decodeNext call.
+   * @param data
+   * @return
+   */
+  public String decodeNext(byte[] data, int length) {
+    byte[] decodeData;
+
+    if (remainder != null) {
+      decodeData = new byte[remainder.length + length];
+      System.arraycopy(remainder, 0, decodeData, 0, remainder.length);
+      System.arraycopy(data, 0, decodeData, remainder.length, length);
+      length += remainder.length;
+    } else {
+      decodeData = data;
+    }
+
+    ByteBuffer decodeBuffer = ByteBuffer.wrap(decodeData, 0, length);
+    CharBuffer result = null;
+    boolean decoded = false;
+    int remainderLenght = 0;
+    while (!decoded && (remainderLenght < 4)) {
+      try {
+        result = mDecoder.decode(decodeBuffer);
+        decoded = true;
+      } catch (CharacterCodingException e) {
+        remainderLenght++;
+        decodeBuffer = ByteBuffer.wrap(decodeData, 0, length - remainderLenght);
+      }
+    }
+    boolean hasRemainder = decoded && remainderLenght > 0;
+    if (hasRemainder) {
+      remainder = new byte[remainderLenght];
+      System.arraycopy(decodeData, length - remainderLenght, remainder, 0, remainderLenght);
+    } else {
+      remainder = null;
+    }
+
+    if (!decoded) {
+      FLog.w(ReactConstants.TAG, "failed to decode string from byte array");
+      return EMPTY_STRING;
+    } else {
+      return new String(result.array(), 0, result.length());
+    }
+  }
+}
diff --git a/...ndroid/src/main/java/com/facebook/react/modules/network/ProgressiveUTF8StreamDecoder.java b/...ndroid/src/main/java/com/facebook/react/modules/network/ProgressiveUTF8StreamDecoder.java
diff --git a/...ork/ProgressiveUTF8StreamDecoderTest.java → ...network/ProgressiveStringDecoderTest.java b/...ork/ProgressiveUTF8StreamDecoderTest.java → ...network/ProgressiveStringDecoderTest.java
@@ -8,6 +8,8 @@
  */
 package com.facebook.react.modules.network;
 
+import com.facebook.react.common.StandardCharsets;
+
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -17,7 +19,7 @@
 
 
 @RunWith(RobolectricTestRunner.class)
-public class ProgressiveUTF8StreamDecoderTest {
+public class ProgressiveStringDecoderTest {
 
   private static String TEST_DATA_1_BYTE = "Lorem ipsum dolor sit amet, ea ius viris laoreet gloriatur, ea enim illud mel. Ea eligendi erroribus inciderint sea, id nemore sensibus contentiones qui. Eos et nulla abhorreant, noluisse adipiscing reprehendunt an sit. Harum iriure meliore ne nec, clita semper voluptaria at sea. Ius civibus vituperata reprehendunt ut.\n" +
     "\n" +
@@ -71,30 +73,58 @@ public class ProgressiveUTF8StreamDecoderTest {
     "\uD800\uDE80\uD800\uDE80\uD800\uDE80";
 
   @Test
-  public void testUnicode1Byte() {
-    chunkString(TEST_DATA_1_BYTE, 64);
+  public void testUTF8SingleByteSymbols() {
+    chunkString(TEST_DATA_1_BYTE, StandardCharsets.UTF_8, 64);
+  }
+
+  @Test
+  public void testUTF8twoBytesSymbols() {
+    chunkString(TEST_DATA_2_BYTES, StandardCharsets.UTF_8, 63);
+  }
+
+  @Test
+  public void testUTF8ThreeBytesSymbols() throws Exception {
+    chunkString(TEST_DATA_3_BYTES, StandardCharsets.UTF_8, 64);
+  }
+
+  @Test
+  public void testUTF8FourBytesSymbols() throws Exception {
+    chunkString(TEST_DATA_4_BYTES, StandardCharsets.UTF_8, 111);
+  }
+
+  @Test
+  public  void testUTF16LEStandard() throws Exception {
+    chunkString(TEST_DATA_3_BYTES, StandardCharsets.UTF_16LE, 47);
+  }
+
+  @Test
+  public  void testUTF16LESurrogates() throws Exception {
+    // 4 bytes UTF-8 symbols are encoded as two 2 byte surrogate symbols in UTF-16
+    chunkString(TEST_DATA_4_BYTES, StandardCharsets.UTF_16LE, 47);
   }
 
   @Test
-  public void testUnicode2Bytes() {
-    chunkString(TEST_DATA_2_BYTES, 63);
+  public  void testUTF16BEStandard() throws Exception {
+    chunkString(TEST_DATA_3_BYTES, StandardCharsets.UTF_16BE, 47);
   }
 
   @Test
-  public void testUnicode3Bytes() throws Exception {
-    chunkString(TEST_DATA_3_BYTES, 64);
+  public  void testUTF16BESurrogates() throws Exception {
+    // 4 bytes UTF-8 symbols are encoded as two 2 byte surrogate symbols in UTF-16
+    chunkString(TEST_DATA_4_BYTES, StandardCharsets.UTF_16BE, 47);
   }
 
   @Test
-  public void testUnicode4Bytes() throws Exception {
-    chunkString(TEST_DATA_4_BYTES, 111);
+  public void testUTF32() throws Exception {
+    // UTF-32 data symbols always 4 bytes
+    chunkString(TEST_DATA_4_BYTES, Charset.forName("UTF-32"), 65);
   }
 
-  private void chunkString(String originalString, int chunkSize) {
-    byte data [] = originalString.getBytes(Charset.forName("UTF-8"));
+  private void chunkString(String originalString, Charset charset, int chunkSize) {
+    byte data [] = originalString.getBytes(charset);
 
     StringBuilder builder = new StringBuilder();
-    ProgressiveUTF8StreamDecoder collector = new ProgressiveUTF8StreamDecoder();
+    ProgressiveStringDecoder collector = new ProgressiveStringDecoder(charset);
     byte[] buffer = new byte[chunkSize];
     for (int i = 0; i < data.length; i+= chunkSize) {
       int bytesRead = Math.min(chunkSize, data.length - i);