Skip to content

Commit

Permalink
Improve frame rate calculation by using media duration from mdhd box
Browse files Browse the repository at this point in the history
- Added logic to parse media duration from the `mdhd` box for accurate frame rate calculation.
- Fallbacks to track duration from `tkhd` when `mdhd` contains invalid or missing data.
- Avoids incorrect frame rate calculations in MP4 files with an edit list (`elst`) box.
- Adds frame rate calculations for partially fragmented MP4 files.
- Verified accuracy with tools like `mediainfo` and `ffprobe`.

Issue: #1531

**Note**: The slight difference in frame rate values in dump files that aren’t MP4s with an edit list or fragmented MP4s isn’t due to differences in `tkhd` and `mdhd` duration values (which should be identical for non-edited or non-fragmented files). Rather, it’s because they are calculated using different timescales. The `mvhd` box defines a global movie timescale, which is used for the track's `tkhd` duration. Meanwhile, each track’s `mdhd` box defines its own timescale specific to its content type, which we now use for more accurate frame rate calculation.

PiperOrigin-RevId: 676046744
  • Loading branch information
rohitjoins authored and copybara-github committed Sep 18, 2024
1 parent 8799bf4 commit ecb0024
Show file tree
Hide file tree
Showing 54 changed files with 98 additions and 54 deletions.
3 changes: 3 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
* Fix preroll sample handling for non-keyframe media start positions when
processing edit lists in MP4 files
([#1659](https://github.com/google/ExoPlayer/issues/1659)).
* Improved frame rate calculation by using media duration from the `mdhd`
box in `Mp4Extractor` and `FragmentedMp4Extractor`
([#1531](https://github.com/androidx/media/issues/1531)).
* DataSource:
* Audio:
* Video:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ public DefaultSsChunkSource(
streamElement.timescale,
C.TIME_UNSET,
manifest.durationUs,
/* mediaDurationUs= */ manifest.durationUs,
format,
Track.TRANSFORMATION_NONE,
trackEncryptionBoxes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,7 @@ public static Track parseTrak(
checkNotNull(mdia.getContainerBoxOfType(Mp4Box.TYPE_minf))
.getContainerBoxOfType(Mp4Box.TYPE_stbl));

Pair<Long, String> mdhdData =
parseMdhd(checkNotNull(mdia.getLeafBoxOfType(Mp4Box.TYPE_mdhd)).data);
MdhdData mdhdData = parseMdhd(checkNotNull(mdia.getLeafBoxOfType(Mp4Box.TYPE_mdhd)).data);
LeafBox stsd = stbl.getLeafBoxOfType(Mp4Box.TYPE_stsd);
if (stsd == null) {
throw ParserException.createForMalformedContainer(
Expand All @@ -363,7 +362,7 @@ public static Track parseTrak(
stsd.data,
tkhdData.id,
tkhdData.rotationDegrees,
mdhdData.second,
mdhdData.language,
drmInitData,
isQuickTime);
@Nullable long[] editListDurations = null;
Expand All @@ -383,9 +382,10 @@ public static Track parseTrak(
: new Track(
tkhdData.id,
trackType,
mdhdData.first,
mdhdData.timescale,
movieTimescale,
durationUs,
mdhdData.mediaDurationUs,
stsdData.format,
stsdData.requiredSampleTransformation,
stsdData.trackEncryptionBoxes,
Expand Down Expand Up @@ -431,6 +431,12 @@ public static TrackSampleTable parseStbl(
/* durationUs= */ 0);
}

if (track.type == C.TRACK_TYPE_VIDEO && track.mediaDurationUs > 0) {
float frameRate = sampleCount / (track.mediaDurationUs / 1000000f);
Format format = track.format.buildUpon().setFrameRate(frameRate).build();
track = track.copyWithFormat(format);
}

// Entries are byte offsets of chunks.
boolean chunkOffsetsAreLongs = false;
@Nullable LeafBox chunkOffsetsAtom = stblBox.getLeafBoxOfType(Mp4Box.TYPE_stco);
Expand Down Expand Up @@ -927,23 +933,30 @@ private static int parseHdlr(ParsableByteArray hdlr) {
* Parses an mdhd atom (defined in ISO/IEC 14496-12).
*
* @param mdhd The mdhd atom to decode.
* @return A pair consisting of the media timescale defined as the number of time units that pass
* in one second, and the language code.
* @return An {@link MdhdData} object containing the parsed data.
*/
private static Pair<Long, String> parseMdhd(ParsableByteArray mdhd) {
private static MdhdData parseMdhd(ParsableByteArray mdhd) {
mdhd.setPosition(Mp4Box.HEADER_SIZE);
int fullAtom = mdhd.readInt();
int version = parseFullBoxVersion(fullAtom);
mdhd.skipBytes(version == 0 ? 8 : 16);
long timescale = mdhd.readUnsignedInt();
mdhd.skipBytes(version == 0 ? 4 : 8);
long mediaDuration = version == 0 ? mdhd.readUnsignedInt() : mdhd.readUnsignedLongToLong();
long mediaDurationUs;
if (mediaDuration == 0) {
// 0 duration normally indicates that the file is fully fragmented (i.e. all of the media
// samples are in fragments). Treat as unknown.
mediaDurationUs = C.TIME_UNSET;
} else {
mediaDurationUs = Util.scaleLargeTimestamp(mediaDuration, C.MICROS_PER_SECOND, timescale);
}
int languageCode = mdhd.readUnsignedShort();
String language =
""
+ (char) (((languageCode >> 10) & 0x1F) + 0x60)
+ (char) (((languageCode >> 5) & 0x1F) + 0x60)
+ (char) ((languageCode & 0x1F) + 0x60);
return Pair.create(timescale, language);
return new MdhdData(timescale, mediaDurationUs, language);
}

/**
Expand Down Expand Up @@ -2408,6 +2421,19 @@ public EyesData(StriData striData) {
}
}

/** Data parsed from mdhd box. */
private static final class MdhdData {
private final long timescale;
private final long mediaDurationUs;
private final String language;

public MdhdData(long timescale, long mediaDurationUs, String language) {
this.timescale = timescale;
this.mediaDurationUs = mediaDurationUs;
this.language = language;
}
}

/** Data parsed from vexu box. */
/* package */ static final class VexuData {
@Nullable private final EyesData eyesData;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,9 @@ private void processMoovAtom(ContainerBox moov) throws ParserException {
roleFlags |=
firstVideoTrackIndex == C.INDEX_UNSET ? C.ROLE_FLAG_MAIN : C.ROLE_FLAG_ALTERNATE;
}
if (trackDurationUs > 0 && trackSampleTable.sampleCount > 0) {
if (track.format.frameRate == Format.NO_VALUE
&& trackDurationUs > 0
&& trackSampleTable.sampleCount > 0) {
float frameRate = trackSampleTable.sampleCount / (trackDurationUs / 1000000f);
formatBuilder.setFrameRate(frameRate);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ public final class Track {
/** The duration of the track in microseconds, or {@link C#TIME_UNSET} if unknown. */
public final long durationUs;

/** The duration of the media in microseconds, or {@link C#TIME_UNSET} if unknown. */
public final long mediaDurationUs;

/** The format. */
public final Format format;

Expand Down Expand Up @@ -93,6 +96,7 @@ public Track(
long timescale,
long movieTimescale,
long durationUs,
long mediaDurationUs,
Format format,
@Transformation int sampleTransformation,
@Nullable TrackEncryptionBox[] sampleDescriptionEncryptionBoxes,
Expand All @@ -104,6 +108,7 @@ public Track(
this.timescale = timescale;
this.movieTimescale = movieTimescale;
this.durationUs = durationUs;
this.mediaDurationUs = mediaDurationUs;
this.format = format;
this.sampleTransformation = sampleTransformation;
this.sampleDescriptionEncryptionBoxes = sampleDescriptionEncryptionBoxes;
Expand Down Expand Up @@ -133,6 +138,7 @@ public Track copyWithFormat(Format format) {
timescale,
movieTimescale,
durationUs,
mediaDurationUs,
format,
sampleTransformation,
sampleDescriptionEncryptionBoxes,
Expand All @@ -148,6 +154,7 @@ public Track copyWithoutEditLists() {
timescale,
movieTimescale,
durationUs,
mediaDurationUs,
format,
sampleTransformation,
sampleDescriptionEncryptionBoxes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public class FragmentedMp4ExtractorNoSniffingTest {
/* timescale= */ 30_000,
/* movieTimescale= */ 1000,
/* durationUs= */ C.TIME_UNSET,
/* mediaDurationUs= */ C.TIME_UNSET,
new Format.Builder().setSampleMimeType(MimeTypes.VIDEO_H264).build(),
/* sampleTransformation= */ Track.TRANSFORMATION_NONE,
/* sampleDescriptionEncryptionBoxes= */ null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6
Expand Down
Loading

0 comments on commit ecb0024

Please sign in to comment.