Pass missing length into SubtitleParser from SubtitleExtractor

If the length of the `ExtractorInput` is not known then the `subtitleData` field is re-sized by 1kB each time (`SubtitleExtractor.DEFAULT_BUFFER_SIZE`), so the end of the array is often not populated. This change ensures that `length` is propagated to `SubtitleParser`, so that implementations don't try and parse the garbage/zero bytes at the end of the array. Discovered while investigating Issue: #1516 #cherrypick PiperOrigin-RevId: 661195634
androidx · Aug 9, 2024 · f37f969 · f37f969
1 parent 8b33ad5
commit f37f969
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 0 deletions.
diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleExtractor.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleExtractor.java
@@ -237,6 +237,8 @@ private void parseAndWriteToOutput() throws IOException {
               : SubtitleParser.OutputOptions.allCues();
       subtitleParser.parse(
           subtitleData,
+          /* offset= */ 0,
+          /* length= */ bytesRead,
           outputOptions,
           cuesWithTiming -> {
             Sample sample =

diff --git a/libraries/extractor/src/test/java/androidx/media3/extractor/text/SubtitleExtractorTest.java b/libraries/extractor/src/test/java/androidx/media3/extractor/text/SubtitleExtractorTest.java
@@ -20,6 +20,7 @@
 
 import androidx.media3.common.Format;
 import androidx.media3.common.MimeTypes;
+import androidx.media3.common.util.Consumer;
 import androidx.media3.common.util.Util;
 import androidx.media3.extractor.Extractor;
 import androidx.media3.extractor.text.webvtt.WebvttParser;
@@ -28,6 +29,7 @@
 import androidx.media3.test.utils.FakeTrackOutput;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import com.google.common.primitives.Ints;
+import java.util.concurrent.atomic.AtomicInteger;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -188,6 +190,45 @@ public void extractor_seekBetweenReads_outputsCues() throws Exception {
     assertThat(cues2.cues.get(0).text.toString()).isEqualTo("This is the third subtitle.");
   }
 
+  @Test
+  public void extractor_unknownLengthInput_passesNumberOfBytesReadToSubtitleParser()
+      throws Exception {
+    FakeExtractorOutput output = new FakeExtractorOutput();
+    byte[] inputData = Util.getUtf8Bytes(TEST_DATA);
+    FakeExtractorInput input =
+        new FakeExtractorInput.Builder()
+            .setData(inputData)
+            .setSimulatePartialReads(true)
+            .setSimulateUnknownLength(true)
+            .build();
+    AtomicInteger lengthFromParse = new AtomicInteger();
+    SubtitleParser fakeSubtitleParser =
+        new SubtitleParser() {
+          @Override
+          public void parse(
+              byte[] data,
+              int offset,
+              int length,
+              OutputOptions outputOptions,
+              Consumer<CuesWithTiming> output) {
+            lengthFromParse.set(length);
+          }
+
+          @Override
+          public @Format.CueReplacementBehavior int getCueReplacementBehavior() {
+            return Format.CUE_REPLACEMENT_BEHAVIOR_MERGE;
+          }
+        };
+    SubtitleExtractor extractor =
+        new SubtitleExtractor(
+            fakeSubtitleParser, new Format.Builder().setSampleMimeType(MimeTypes.TEXT_VTT).build());
+
+    extractor.init(output);
+    while (extractor.read(input, null) != Extractor.RESULT_END_OF_INPUT) {}
+
+    assertThat(lengthFromParse.get()).isEqualTo(inputData.length);
+  }
+
   @Test
   public void read_withoutInit_fails() {
     FakeExtractorInput input = new FakeExtractorInput.Builder().setData(new byte[0]).build();