From 66fa5919590789b384506a4e604fe02a5a5e0877 Mon Sep 17 00:00:00 2001 From: ibaker Date: Tue, 10 Oct 2023 08:51:07 -0700 Subject: [PATCH] Add experimental opt-in to parse DASH subtitles during extraction This currently only applies to subtitles muxed into mp4 segments, and not standalone text files linked directly from the manifest. Issue: androidx/media#288 #minor-release PiperOrigin-RevId: 572263764 --- RELEASENOTES.md | 7 ++ .../source/chunk/BundledChunkExtractor.java | 94 +++++++++++++------ .../exoplayer/dash/DashMediaPeriod.java | 30 +++++- .../exoplayer/dash/DashMediaSource.java | 45 ++++++++- .../dash/DefaultDashChunkSource.java | 17 ++++ .../exoplayer/dash/DashMediaPeriodTest.java | 3 +- .../dash/e2etest/DashPlaybackTest.java | 7 +- .../text/SubtitleTranscodingTrackOutput.java | 5 +- .../playbackdumps/dash/webvtt-in-mp4.dump | 3 + 9 files changed, 170 insertions(+), 41 deletions(-) diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 6c377bd1a62..b14ccd1aa13 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -56,6 +56,13 @@ This release includes the following changes since the Android Auto. * DASH Extension: * Allow multiple of the same DASH identifier in segment template url. + * Add experimental support for parsing subtitles during extraction. This + has better support for merging overlapping subtitles, including + resolving flickering when transitioning between subtitle segments. You + can enable this using + `DashMediaSource.Factory.experimentalParseSubtitlesDuringExtraction()` + ([#288](https://github.com/androidx/media/issues/288)). +* Smooth Streaming Extension: * RTSP Extension: * Use RTSP Setup Response timeout value in time interval of sending keep-alive RTSP Options requests diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/source/chunk/BundledChunkExtractor.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/source/chunk/BundledChunkExtractor.java index 99c15aa65f2..f05a5d63e4c 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/source/chunk/BundledChunkExtractor.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/source/chunk/BundledChunkExtractor.java @@ -26,6 +26,7 @@ import androidx.media3.common.util.Assertions; import androidx.media3.common.util.ParsableByteArray; import androidx.media3.common.util.UnstableApi; +import androidx.media3.exoplayer.analytics.PlayerId; import androidx.media3.extractor.ChunkIndex; import androidx.media3.extractor.DummyTrackOutput; import androidx.media3.extractor.Extractor; @@ -36,7 +37,10 @@ import androidx.media3.extractor.TrackOutput; import androidx.media3.extractor.mkv.MatroskaExtractor; import androidx.media3.extractor.mp4.FragmentedMp4Extractor; +import androidx.media3.extractor.text.SubtitleParser; +import androidx.media3.extractor.text.SubtitleTranscodingExtractor; import java.io.IOException; +import java.util.List; import org.checkerframework.checker.nullness.qual.MonotonicNonNull; /** @@ -46,36 +50,68 @@ @UnstableApi public final class BundledChunkExtractor implements ExtractorOutput, ChunkExtractor { - /** {@link ChunkExtractor.Factory} for instances of this class. */ - public static final ChunkExtractor.Factory FACTORY = - (primaryTrackType, - format, - enableEventMessageTrack, - closedCaptionFormats, - playerEmsgTrackOutput, - playerId) -> { - @Nullable String containerMimeType = format.containerMimeType; - Extractor extractor; - if (MimeTypes.isText(containerMimeType)) { - // Text types do not need an extractor. - return null; - } else if (MimeTypes.isMatroska(containerMimeType)) { - extractor = new MatroskaExtractor(MatroskaExtractor.FLAG_DISABLE_SEEK_FOR_CUES); - } else { - int flags = 0; - if (enableEventMessageTrack) { - flags |= FragmentedMp4Extractor.FLAG_ENABLE_EMSG_TRACK; - } - extractor = - new FragmentedMp4Extractor( - flags, - /* timestampAdjuster= */ null, - /* sideloadedTrack= */ null, - closedCaptionFormats, - playerEmsgTrackOutput); + /** {@link ChunkExtractor.Factory} for {@link BundledChunkExtractor}. */ + public static final class Factory implements ChunkExtractor.Factory { + + /** Non-null if subtitles should be parsed during extraction, null otherwise. */ + @Nullable private SubtitleParser.Factory subtitleParserFactory; + + /** + * Sets the {@link SubtitleParser.Factory} to use for parsing subtitles during extraction, or + * null to parse subtitles during decoding. The default is null (subtitles parsed after + * decoding). + * + *

This method is experimental. Its default value may change, or it may be renamed or removed + * in a future release. + * + * @param subtitleParserFactory The {@link SubtitleParser.Factory} for parsing subtitles during + * extraction. + * @return This factory, for convenience. + */ + public Factory experimentalSetSubtitleParserFactory( + @Nullable SubtitleParser.Factory subtitleParserFactory) { + this.subtitleParserFactory = subtitleParserFactory; + return this; + } + + @Nullable + @Override + public ChunkExtractor createProgressiveMediaExtractor( + @C.TrackType int primaryTrackType, + Format representationFormat, + boolean enableEventMessageTrack, + List closedCaptionFormats, + @Nullable TrackOutput playerEmsgTrackOutput, + PlayerId playerId) { + @Nullable String containerMimeType = representationFormat.containerMimeType; + Extractor extractor; + if (MimeTypes.isText(containerMimeType)) { + // Text types do not need an extractor. + return null; + } else if (MimeTypes.isMatroska(containerMimeType)) { + extractor = new MatroskaExtractor(MatroskaExtractor.FLAG_DISABLE_SEEK_FOR_CUES); + } else { + int flags = 0; + if (enableEventMessageTrack) { + flags |= FragmentedMp4Extractor.FLAG_ENABLE_EMSG_TRACK; } - return new BundledChunkExtractor(extractor, primaryTrackType, format); - }; + extractor = + new FragmentedMp4Extractor( + flags, + /* timestampAdjuster= */ null, + /* sideloadedTrack= */ null, + closedCaptionFormats, + playerEmsgTrackOutput); + } + if (subtitleParserFactory != null) { + extractor = new SubtitleTranscodingExtractor(extractor, subtitleParserFactory); + } + return new BundledChunkExtractor(extractor, primaryTrackType, representationFormat); + } + } + + /** {@link Factory} for {@link BundledChunkExtractor}. */ + public static final Factory FACTORY = new Factory(); private static final PositionHolder POSITION_HOLDER = new PositionHolder(); diff --git a/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaPeriod.java b/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaPeriod.java index 04ba72a6ad4..c4bfbb085f4 100644 --- a/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaPeriod.java +++ b/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaPeriod.java @@ -58,6 +58,7 @@ import androidx.media3.exoplayer.upstream.CmcdConfiguration; import androidx.media3.exoplayer.upstream.LoadErrorHandlingPolicy; import androidx.media3.exoplayer.upstream.LoaderErrorThrower; +import androidx.media3.extractor.text.SubtitleParser; import com.google.common.collect.Maps; import com.google.common.primitives.Ints; import java.io.IOException; @@ -130,7 +131,8 @@ public DashMediaPeriod( Allocator allocator, CompositeSequenceableLoaderFactory compositeSequenceableLoaderFactory, PlayerEmsgCallback playerEmsgCallback, - PlayerId playerId) { + PlayerId playerId, + @Nullable SubtitleParser.Factory subtitleParserFactory) { this.id = id; this.manifest = manifest; this.baseUrlExclusionList = baseUrlExclusionList; @@ -156,7 +158,8 @@ public DashMediaPeriod( Period period = manifest.getPeriod(periodIndex); eventStreams = period.eventStreams; Pair result = - buildTrackGroups(drmSessionManager, period.adaptationSets, eventStreams); + buildTrackGroups( + drmSessionManager, subtitleParserFactory, period.adaptationSets, eventStreams); trackGroups = result.first; trackGroupInfos = result.second; } @@ -501,6 +504,7 @@ private int getPrimaryStreamIndex(int embeddedStreamIndex, int[] streamIndexToTr private static Pair buildTrackGroups( DrmSessionManager drmSessionManager, + @Nullable SubtitleParser.Factory subtitleParserFactory, List adaptationSets, List eventStreams) { int[][] groupedAdaptationSetIndices = getGroupedAdaptationSetIndices(adaptationSets); @@ -523,6 +527,7 @@ private static Pair buildTrackGroups( int trackGroupCount = buildPrimaryAndEmbeddedTrackGroupInfos( drmSessionManager, + subtitleParserFactory, adaptationSets, groupedAdaptationSetIndices, primaryGroupCount, @@ -662,6 +667,7 @@ private static int identifyEmbeddedTracks( private static int buildPrimaryAndEmbeddedTrackGroupInfos( DrmSessionManager drmSessionManager, + @Nullable SubtitleParser.Factory subtitleParserFactory, List adaptationSets, int[][] groupedAdaptationSetIndices, int primaryGroupCount, @@ -678,8 +684,24 @@ private static int buildPrimaryAndEmbeddedTrackGroupInfos( } Format[] formats = new Format[representations.size()]; for (int j = 0; j < formats.length; j++) { - Format format = representations.get(j).format; - formats[j] = format.copyWithCryptoType(drmSessionManager.getCryptoType(format)); + Format originalFormat = representations.get(j).format; + Format.Builder updatedFormat = + originalFormat + .buildUpon() + .setCryptoType(drmSessionManager.getCryptoType(originalFormat)); + if (subtitleParserFactory != null && subtitleParserFactory.supportsFormat(originalFormat)) { + updatedFormat + .setSampleMimeType(MimeTypes.APPLICATION_MEDIA3_CUES) + .setCueReplacementBehavior( + subtitleParserFactory.getCueReplacementBehavior(originalFormat)) + .setCodecs( + originalFormat.sampleMimeType + + (originalFormat.codecs != null ? " " + originalFormat.codecs : "")) + // Reset this value to the default. All non-default timestamp adjustments are done + // by SubtitleTranscodingExtractor and there are no 'subsamples' after transcoding. + .setSubsampleOffsetUs(Format.OFFSET_SAMPLE_RELATIVE); + } + formats[j] = updatedFormat.build(); } AdaptationSet firstAdaptationSet = adaptationSets.get(adaptationSetIndices[0]); diff --git a/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaSource.java b/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaSource.java index 344fcd6f532..36fe6171db1 100644 --- a/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaSource.java +++ b/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DashMediaSource.java @@ -77,6 +77,8 @@ import androidx.media3.exoplayer.upstream.LoaderErrorThrower; import androidx.media3.exoplayer.upstream.ParsingLoadable; import androidx.media3.exoplayer.util.SntpClient; +import androidx.media3.extractor.text.DefaultSubtitleParserFactory; +import androidx.media3.extractor.text.SubtitleParser; import com.google.common.base.Charsets; import com.google.common.math.LongMath; import com.google.errorprone.annotations.CanIgnoreReturnValue; @@ -112,6 +114,7 @@ public static final class Factory implements MediaSourceFactory { private DrmSessionManagerProvider drmSessionManagerProvider; private CompositeSequenceableLoaderFactory compositeSequenceableLoaderFactory; private LoadErrorHandlingPolicy loadErrorHandlingPolicy; + @Nullable private SubtitleParser.Factory subtitleParserFactory; private long fallbackTargetLiveOffsetMs; private long minLiveStartPositionUs; @Nullable private ParsingLoadable.Parser manifestParser; @@ -196,6 +199,40 @@ public Factory setLoadErrorHandlingPolicy(LoadErrorHandlingPolicy loadErrorHandl return this; } + /** + * Sets whether subtitles should be parsed as part of extraction (before the sample queue) or as + * part of rendering (after the sample queue). Defaults to false (i.e. subtitles will be parsed + * as part of rendering). + * + *

This method is experimental. Its default value may change, or it may be renamed or removed + * in a future release. + * + *

This method may only be used with {@link DefaultDashChunkSource.Factory}. + * + * @param parseSubtitlesDuringExtraction Whether to parse subtitles during extraction or + * rendering. + * @return This factory, for convenience. + */ + // TODO: b/289916598 - Flip the default of this to true (probably wired up to a single method on + // DefaultMediaSourceFactory via the MediaSource.Factory interface). + public Factory experimentalParseSubtitlesDuringExtraction( + boolean parseSubtitlesDuringExtraction) { + if (parseSubtitlesDuringExtraction) { + if (subtitleParserFactory == null) { + this.subtitleParserFactory = new DefaultSubtitleParserFactory(); + } + } else { + this.subtitleParserFactory = null; + } + if (chunkSourceFactory instanceof DefaultDashChunkSource.Factory) { + ((DefaultDashChunkSource.Factory) chunkSourceFactory) + .setSubtitleParserFactory(subtitleParserFactory); + } else { + throw new IllegalStateException(); + } + return this; + } + /** * Sets the target {@link Player#getCurrentLiveOffset() offset for live streams} that is used if * no value is defined in the {@link MediaItem} or the manifest. @@ -315,6 +352,7 @@ public DashMediaSource createMediaSource(DashManifest manifest, MediaItem mediaI cmcdConfiguration, drmSessionManagerProvider.get(mediaItem), loadErrorHandlingPolicy, + subtitleParserFactory, fallbackTargetLiveOffsetMs, minLiveStartPositionUs); } @@ -353,6 +391,7 @@ public DashMediaSource createMediaSource(MediaItem mediaItem) { cmcdConfiguration, drmSessionManagerProvider.get(mediaItem), loadErrorHandlingPolicy, + subtitleParserFactory, fallbackTargetLiveOffsetMs, minLiveStartPositionUs); } @@ -411,6 +450,7 @@ public DashMediaSource createMediaSource(MediaItem mediaItem) { private final Runnable simulateManifestRefreshRunnable; private final PlayerEmsgCallback playerEmsgCallback; private final LoaderErrorThrower manifestLoadErrorThrower; + @Nullable private final SubtitleParser.Factory subtitleParserFactory; private DataSource dataSource; private Loader loader; @@ -446,6 +486,7 @@ private DashMediaSource( @Nullable CmcdConfiguration cmcdConfiguration, DrmSessionManager drmSessionManager, LoadErrorHandlingPolicy loadErrorHandlingPolicy, + @Nullable SubtitleParser.Factory subtitleParserFactory, long fallbackTargetLiveOffsetMs, long minLiveStartPositionUs) { this.mediaItem = mediaItem; @@ -459,6 +500,7 @@ private DashMediaSource( this.cmcdConfiguration = cmcdConfiguration; this.drmSessionManager = drmSessionManager; this.loadErrorHandlingPolicy = loadErrorHandlingPolicy; + this.subtitleParserFactory = subtitleParserFactory; this.fallbackTargetLiveOffsetMs = fallbackTargetLiveOffsetMs; this.minLiveStartPositionUs = minLiveStartPositionUs; this.compositeSequenceableLoaderFactory = compositeSequenceableLoaderFactory; @@ -564,7 +606,8 @@ public MediaPeriod createPeriod(MediaPeriodId id, Allocator allocator, long star allocator, compositeSequenceableLoaderFactory, playerEmsgCallback, - getPlayerId()); + getPlayerId(), + subtitleParserFactory); periodsById.put(mediaPeriod.id, mediaPeriod); return mediaPeriod; } diff --git a/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DefaultDashChunkSource.java b/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DefaultDashChunkSource.java index 7a0cd41f67c..2873be50936 100644 --- a/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DefaultDashChunkSource.java +++ b/libraries/exoplayer_dash/src/main/java/androidx/media3/exoplayer/dash/DefaultDashChunkSource.java @@ -60,6 +60,7 @@ import androidx.media3.exoplayer.upstream.LoadErrorHandlingPolicy; import androidx.media3.exoplayer.upstream.LoaderErrorThrower; import androidx.media3.extractor.ChunkIndex; +import androidx.media3.extractor.text.SubtitleParser; import com.google.common.collect.ImmutableMap; import java.io.IOException; import java.util.ArrayList; @@ -71,6 +72,7 @@ @UnstableApi public class DefaultDashChunkSource implements DashChunkSource { + /** {@link DashChunkSource.Factory} for {@link DefaultDashChunkSource} instances. */ public static final class Factory implements DashChunkSource.Factory { private final DataSource.Factory dataSourceFactory; @@ -110,6 +112,21 @@ public Factory( this.maxSegmentsPerLoad = maxSegmentsPerLoad; } + /** + * Sets the {@link SubtitleParser.Factory} to be used for parsing subtitles during extraction, + * or null to parse subtitles during decoding. + * + *

This may only be used with {@link BundledChunkExtractor.Factory}. + */ + /* package */ Factory setSubtitleParserFactory( + @Nullable SubtitleParser.Factory subtitleParserFactory) { + if (chunkExtractorFactory instanceof BundledChunkExtractor.Factory) { + ((BundledChunkExtractor.Factory) chunkExtractorFactory) + .experimentalSetSubtitleParserFactory(subtitleParserFactory); + } + return this; + } + @Override public DashChunkSource createDashChunkSource( LoaderErrorThrower manifestLoaderErrorThrower, diff --git a/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/DashMediaPeriodTest.java b/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/DashMediaPeriodTest.java index 59d898be6b9..388b48912c5 100644 --- a/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/DashMediaPeriodTest.java +++ b/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/DashMediaPeriodTest.java @@ -224,7 +224,8 @@ private static DashMediaPeriod createDashMediaPeriod(DashManifest manifest, int mock(Allocator.class), mock(CompositeSequenceableLoaderFactory.class), mock(PlayerEmsgCallback.class), - PlayerId.UNSET); + PlayerId.UNSET, + /* subtitleParserFactory= */ null); } private static DashManifest parseManifest(String fileName) throws IOException { diff --git a/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/e2etest/DashPlaybackTest.java b/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/e2etest/DashPlaybackTest.java index 882a3e2594f..78930c7b5af 100644 --- a/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/e2etest/DashPlaybackTest.java +++ b/libraries/exoplayer_dash/src/test/java/androidx/media3/exoplayer/dash/e2etest/DashPlaybackTest.java @@ -22,9 +22,11 @@ import android.view.Surface; import androidx.media3.common.MediaItem; import androidx.media3.common.Player; +import androidx.media3.datasource.DefaultDataSource; import androidx.media3.exoplayer.ExoPlayer; import androidx.media3.exoplayer.Renderer; import androidx.media3.exoplayer.RenderersFactory; +import androidx.media3.exoplayer.dash.DashMediaSource; import androidx.media3.exoplayer.metadata.MetadataDecoderFactory; import androidx.media3.exoplayer.metadata.MetadataRenderer; import androidx.media3.exoplayer.trackselection.DefaultTrackSelector; @@ -79,14 +81,15 @@ public void ttmlStandaloneXmlFile() throws Exception { // https://github.com/google/ExoPlayer/issues/7985 @Test - @Ignore( - "Disabled until subtitles are reliably asserted in robolectric tests [internal b/174661563].") public void webvttInMp4() throws Exception { Context applicationContext = ApplicationProvider.getApplicationContext(); CapturingRenderersFactory capturingRenderersFactory = new CapturingRenderersFactory(applicationContext); ExoPlayer player = new ExoPlayer.Builder(applicationContext, capturingRenderersFactory) + .setMediaSourceFactory( + new DashMediaSource.Factory(new DefaultDataSource.Factory(applicationContext)) + .experimentalParseSubtitlesDuringExtraction(true)) .setClock(new FakeClock(/* isAutoAdvancing= */ true)) .build(); player.setVideoSurface(new Surface(new SurfaceTexture(/* texName= */ 1))); diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java index d593d1eb498..eb797e8f902 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/text/SubtitleTranscodingTrackOutput.java @@ -26,7 +26,6 @@ import androidx.media3.common.C; import androidx.media3.common.DataReader; import androidx.media3.common.Format; -import androidx.media3.common.Format.CueReplacementBehavior; import androidx.media3.common.MimeTypes; import androidx.media3.common.util.ParsableByteArray; import androidx.media3.common.util.Util; @@ -86,8 +85,6 @@ public void format(Format format) { if (currentSubtitleParser == null) { delegate.format(format); } else { - @CueReplacementBehavior - int nextCuesBehavior = currentSubtitleParser.getCueReplacementBehavior(); delegate.format( format .buildUpon() @@ -96,7 +93,7 @@ public void format(Format format) { // Reset this value to the default. All non-default timestamp adjustments are done // below in sampleMetadata() and there are no 'subsamples' after transcoding. .setSubsampleOffsetUs(Format.OFFSET_SAMPLE_RELATIVE) - .setCueReplacementBehavior(nextCuesBehavior) + .setCueReplacementBehavior(subtitleParserFactory.getCueReplacementBehavior(format)) .build()); } } diff --git a/libraries/test_data/src/test/assets/playbackdumps/dash/webvtt-in-mp4.dump b/libraries/test_data/src/test/assets/playbackdumps/dash/webvtt-in-mp4.dump index b1fbea6c595..fd7f0a624c2 100644 --- a/libraries/test_data/src/test/assets/playbackdumps/dash/webvtt-in-mp4.dump +++ b/libraries/test_data/src/test/assets/playbackdumps/dash/webvtt-in-mp4.dump @@ -244,3 +244,6 @@ TextOutput: position = 0.5 positionAnchor = 1 size = 1.0 + Subtitle[4]: + presentationTimeUs = 456000 + Cues = []