From a5c669a31e2bf76b99043ce827794ff7a64d7f74 Mon Sep 17 00:00:00 2001 From: Rob Walch Date: Tue, 12 Oct 2021 15:18:39 -0700 Subject: [PATCH] Add support for emsg ID3 metadata in fmp4 segments - Closes #2360 --- README.md | 3 +- src/demux/mp4demuxer.ts | 28 ++++++- src/remux/mp4-remuxer.ts | 125 +++++++++++++++++-------------- src/remux/passthrough-remuxer.ts | 10 ++- src/utils/mp4-tools.ts | 96 ++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index b318c708e27..c22eb06696e 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ HLS.js is written in [ECMAScript6] (`*.js`) and [TypeScript] (`*.ts`) (strongly - Packetized metadata (ID3v2.3.0) Elementary Stream - AAC container (audio only streams) - MPEG Audio container (MPEG-1/2 Audio Layer III audio only streams) -- Timed Metadata for HTTP Live Streaming (in ID3 format, carried in MPEG-2 TS) +- Timed Metadata for HTTP Live Streaming (in ID3 format, carried in MPEG-2 TS and FMP4 Emsg) - AES-128 decryption - SAMPLE-AES decryption (only supported if using MPEG-2 TS container) - Encrypted media extensions (EME) support for DRM (digital rights management) @@ -114,7 +114,6 @@ The following tags are added to their respective fragment's attribute list but a For a complete list of issues, see ["Top priorities" in the Release Planning and Backlog project tab](https://github.com/video-dev/hls.js/projects/6). Codec support is dependent on the runtime environment (for example, not all browsers on the same OS support HEVC). - CMAF CC support [#2623](https://github.com/video-dev/hls.js/issues/2623) -- `Emsg` Inband Timed Metadata for FMP4 (ID3 within Emsgv1) in "metadata" TextTracks [#2360](https://github.com/video-dev/hls.js/issues/2360) - `#EXT-X-DATERANGE` in "metadata" TextTracks [#2218](https://github.com/video-dev/hls.js/issues/2218) - `#EXT-X-GAP` filling [#2940](https://github.com/video-dev/hls.js/issues/2940) - `#EXT-X-I-FRAME-STREAM-INF` I-frame Media Playlist files diff --git a/src/demux/mp4demuxer.ts b/src/demux/mp4demuxer.ts index 5666ab6d830..c0946c1fd35 100644 --- a/src/demux/mp4demuxer.ts +++ b/src/demux/mp4demuxer.ts @@ -14,11 +14,14 @@ import { findBox, segmentValidRange, appendUint8Array, + parseEmsg, } from '../utils/mp4-tools'; import { dummyTrack } from './dummy-demuxed-track'; import type { HlsEventEmitter } from '../events'; import type { HlsConfig } from '../config'; +const emsgSchemePattern = /\/emsg[-/]ID3/i; + class MP4Demuxer implements Demuxer { static readonly minProbeByteLength = 1024; private remainderData: Uint8Array | null = null; @@ -43,7 +46,7 @@ class MP4Demuxer implements Demuxer { ); } - demux(data): DemuxerResult { + demux(data: Uint8Array, timeOffset: number): DemuxerResult { // Load all data into the avc track. The CMAF remuxer will look for the data in the samples object; the rest of the fields do not matter let avcSamples = data; const avcTrack = dummyTrack() as PassthroughVideoTrack; @@ -61,10 +64,31 @@ class MP4Demuxer implements Demuxer { avcTrack.samples = avcSamples; } + const id3Track = dummyTrack() as DemuxedMetadataTrack; + const emsgs = findBox(avcTrack.samples, ['emsg']); + if (emsgs) { + id3Track.inputTimeScale = 1; + emsgs.forEach(({ data, start, end }) => { + const emsgInfo = parseEmsg(data.subarray(start, end)); + if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) { + const pts = Number.isFinite(emsgInfo.presentationTime) + ? emsgInfo.presentationTime! / emsgInfo.timeScale + : timeOffset + emsgInfo.presentationTimeDelta! / emsgInfo.timeScale; + const payload = emsgInfo.payload; + id3Track.samples.push({ + data: payload, + len: payload.byteLength, + dts: pts, + pts: pts, + }); + } + }); + } + return { audioTrack: dummyTrack() as DemuxedAudioTrack, avcTrack, - id3Track: dummyTrack() as DemuxedMetadataTrack, + id3Track, textTrack: dummyTrack() as DemuxedUserdataTrack, }; } diff --git a/src/remux/mp4-remuxer.ts b/src/remux/mp4-remuxer.ts index 22fceb82cc1..8204f3fa0b0 100644 --- a/src/remux/mp4-remuxer.ts +++ b/src/remux/mp4-remuxer.ts @@ -244,11 +244,20 @@ export default class MP4Remuxer implements Remuxer { // Allow ID3 and text to remux, even if more audio/video samples are required if (this.ISGenerated) { if (id3Track.samples.length) { - id3 = this.remuxID3(id3Track, timeOffset); + id3 = flushTextTrackMetadataCueSamples( + id3Track, + timeOffset, + this._initPTS, + this._initDTS + ); } if (textTrack.samples.length) { - text = this.remuxText(textTrack, timeOffset); + text = flushTextTrackUserdataCueSamples( + textTrack, + timeOffset, + this._initPTS + ); } } @@ -971,62 +980,6 @@ export default class MP4Remuxer implements Remuxer { return this.remuxAudio(track, timeOffset, contiguous, false); } - - remuxID3( - track: DemuxedMetadataTrack, - timeOffset: number - ): RemuxedMetadata | undefined { - const length = track.samples.length; - if (!length) { - return; - } - const inputTimeScale = track.inputTimeScale; - const initPTS = this._initPTS; - const initDTS = this._initDTS; - for (let index = 0; index < length; index++) { - const sample = track.samples[index]; - // setting id3 pts, dts to relative time - // using this._initPTS and this._initDTS to calculate relative time - sample.pts = - normalizePts(sample.pts - initPTS, timeOffset * inputTimeScale) / - inputTimeScale; - sample.dts = - normalizePts(sample.dts - initDTS, timeOffset * inputTimeScale) / - inputTimeScale; - } - const samples = track.samples; - track.samples = []; - return { - samples, - }; - } - - remuxText( - track: DemuxedUserdataTrack, - timeOffset: number - ): RemuxedUserdata | undefined { - const length = track.samples.length; - if (!length) { - return; - } - - const inputTimeScale = track.inputTimeScale; - const initPTS = this._initPTS; - for (let index = 0; index < length; index++) { - const sample = track.samples[index]; - // setting text pts, dts to relative time - // using this._initPTS and this._initDTS to calculate relative time - sample.pts = - normalizePts(sample.pts - initPTS, timeOffset * inputTimeScale) / - inputTimeScale; - } - track.samples.sort((a, b) => a.pts - b.pts); - const samples = track.samples; - track.samples = []; - return { - samples, - }; - } } export function normalizePts(value: number, reference: number | null): number { @@ -1061,6 +1014,62 @@ function findKeyframeIndex(samples: Array): number { return -1; } +export function flushTextTrackMetadataCueSamples( + track: DemuxedMetadataTrack, + timeOffset: number, + initPTS: number, + initDTS: number +): RemuxedMetadata | undefined { + const length = track.samples.length; + if (!length) { + return; + } + const inputTimeScale = track.inputTimeScale; + for (let index = 0; index < length; index++) { + const sample = track.samples[index]; + // setting id3 pts, dts to relative time + // using this._initPTS and this._initDTS to calculate relative time + sample.pts = + normalizePts(sample.pts - initPTS, timeOffset * inputTimeScale) / + inputTimeScale; + sample.dts = + normalizePts(sample.dts - initDTS, timeOffset * inputTimeScale) / + inputTimeScale; + } + const samples = track.samples; + track.samples = []; + return { + samples, + }; +} + +export function flushTextTrackUserdataCueSamples( + track: DemuxedUserdataTrack, + timeOffset: number, + initPTS: number +): RemuxedUserdata | undefined { + const length = track.samples.length; + if (!length) { + return; + } + + const inputTimeScale = track.inputTimeScale; + for (let index = 0; index < length; index++) { + const sample = track.samples[index]; + // setting text pts, dts to relative time + // using this._initPTS and this._initDTS to calculate relative time + sample.pts = + normalizePts(sample.pts - initPTS, timeOffset * inputTimeScale) / + inputTimeScale; + } + track.samples.sort((a, b) => a.pts - b.pts); + const samples = track.samples; + track.samples = []; + return { + samples, + }; +} + class Mp4Sample { public size: number; public duration: number; diff --git a/src/remux/passthrough-remuxer.ts b/src/remux/passthrough-remuxer.ts index 6bd4c2ed704..ef6eb2e76f9 100644 --- a/src/remux/passthrough-remuxer.ts +++ b/src/remux/passthrough-remuxer.ts @@ -1,3 +1,4 @@ +import { flushTextTrackMetadataCueSamples } from './mp4-remuxer'; import type { InitData, InitDataTrack } from '../utils/mp4-tools'; import { getDuration, @@ -201,9 +202,14 @@ class PassThroughRemuxer implements Remuxer { result.audio = track.type === 'audio' ? track : undefined; result.video = track.type !== 'audio' ? track : undefined; - result.text = textTrack; - result.id3 = id3Track; result.initSegment = initSegment; + const id3InitPts = this.initPTS ?? 0; + result.id3 = flushTextTrackMetadataCueSamples( + id3Track, + timeOffset, + id3InitPts, + id3InitPts + ); return result; } diff --git a/src/utils/mp4-tools.ts b/src/utils/mp4-tools.ts index 8016ec2d322..492f9e15c83 100644 --- a/src/utils/mp4-tools.ts +++ b/src/utils/mp4-tools.ts @@ -584,3 +584,99 @@ export function appendUint8Array( return temp; } + +export interface IEmsgParsingData { + schemeIdUri: string; + value: string; + timeScale: number; + presentationTimeDelta?: number; + presentationTime?: number; + eventDuration: number; + id: number; + payload: Uint8Array; +} + +export function parseEmsg(data: Uint8Array): IEmsgParsingData { + const version = data[0]; + let schemeIdUri: string = ''; + let value: string = ''; + let timeScale: number = 0; + let presentationTimeDelta: number = 0; + let presentationTime: number = 0; + let eventDuration: number = 0; + let id: number = 0; + let offset: number = 0; + + if (version === 0) { + while (bin2str(data.subarray(offset, offset + 1)) !== '\0') { + schemeIdUri += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + } + + schemeIdUri += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + + while (bin2str(data.subarray(offset, offset + 1)) !== '\0') { + value += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + } + + value += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + + timeScale = readUint32(data, 12); + presentationTimeDelta = readUint32(data, 16); + eventDuration = readUint32(data, 20); + id = readUint32(data, 24); + offset = 28; + } else if (version === 1) { + offset += 4; + timeScale = readUint32(data, offset); + offset += 4; + const leftPresentationTime = readUint32(data, offset); + offset += 4; + const rightPresentationTime = readUint32(data, offset); + offset += 4; + presentationTime = 2 ** 32 * leftPresentationTime + rightPresentationTime; + if (!Number.isSafeInteger(presentationTime)) { + presentationTime = Number.MAX_SAFE_INTEGER; + // eslint-disable-next-line no-console + console.warn( + 'Presentation time exceeds safe integer limit and wrapped to max safe integer in parsing emsg box' + ); + } + + eventDuration = readUint32(data, offset); + offset += 4; + id = readUint32(data, offset); + offset += 4; + + while (bin2str(data.subarray(offset, offset + 1)) !== '\0') { + schemeIdUri += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + } + + schemeIdUri += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + + while (bin2str(data.subarray(offset, offset + 1)) !== '\0') { + value += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + } + + value += bin2str(data.subarray(offset, offset + 1)); + offset += 1; + } + const payload = data.subarray(offset, data.byteLength); + + return { + schemeIdUri, + value, + timeScale, + presentationTime, + presentationTimeDelta, + eventDuration, + id, + payload, + }; +}