diff --git a/CONTRIBUTORS b/CONTRIBUTORS index db6bd504..7920bae9 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -16,6 +16,7 @@ Thom Chiovoloni # Please keep this section sorted in ascending order. BlackHoleFox [https://github.com/blackholefox] +darksv [https://github.com/darksv] djugei [https://github.com/djugei] FelixMcFelix [https://github.com/FelixMcFelix] Herohtar [https://github.com/herohtar] diff --git a/symphonia-format-mkv/Cargo.toml b/symphonia-format-mkv/Cargo.toml index e6d8334f..2bce9d69 100644 --- a/symphonia-format-mkv/Cargo.toml +++ b/symphonia-format-mkv/Cargo.toml @@ -1,17 +1,19 @@ [package] name = "symphonia-format-mkv" -version = "0.0.1" +version = "0.4.0" description = "Pure Rust MKV/WebM demuxer (a part of project Symphonia)." homepage = "https://github.com/pdeljanov/Symphonia" repository = "https://github.com/pdeljanov/Symphonia" -authors = ["Philip Deljanov "] +authors = ["Dariusz Niedoba "] license = "MPL-2.0" readme = "README.md" categories = ["multimedia", "multimedia::audio", "multimedia::encoding"] -keywords = ["audio", "media", "demuxer", "mp4", "iso"] +keywords = ["audio", "media", "demuxer", "mkv", "matroska", "webm"] edition = "2018" [dependencies] log = "0.4" +lazy_static = "1.4.0" symphonia-core = { version = "0.4", path = "../symphonia-core" } -symphonia-metadata = { version = "0.4", path = "../symphonia-metadata" } \ No newline at end of file +symphonia-metadata = { version = "0.4", path = "../symphonia-metadata" } +symphonia-utils-xiph = { version = "0.4", path = "../symphonia-utils-xiph" } \ No newline at end of file diff --git a/symphonia-format-mkv/README.md b/symphonia-format-mkv/README.md index 075ca06c..72ca5c50 100644 --- a/symphonia-format-mkv/README.md +++ b/symphonia-format-mkv/README.md @@ -1,8 +1,6 @@ # Symphonia MKV/WebM Demuxer -This is a placeholder crate for Project Symphonia's MKV/WebM Demuxer. - -Please consider contributing! +MKV/WebM demuxer for Project Symphonia. **Note:** This crate is part of Symphonia. Please use the [`symphonia`](https://crates.io/crates/symphonia) crate instead of this one directly. diff --git a/symphonia-format-mkv/src/codecs.rs b/symphonia-format-mkv/src/codecs.rs new file mode 100644 index 00000000..6724c822 --- /dev/null +++ b/symphonia-format-mkv/src/codecs.rs @@ -0,0 +1,48 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use symphonia_core::codecs; +use symphonia_core::codecs::CodecType; + +use crate::segment::TrackElement; + +pub(crate) fn codec_id_to_type(track: &TrackElement) -> Option { + let bit_depth = track.audio.as_ref().and_then(|a| a.bit_depth); + + match track.codec_id.as_str() { + "A_MPEG/L1" => Some(codecs::CODEC_TYPE_MP1), + "A_MPEG/L2" => Some(codecs::CODEC_TYPE_MP2), + "A_MPEG/L3" => Some(codecs::CODEC_TYPE_MP3), + "A_FLAC" => Some(codecs::CODEC_TYPE_FLAC), + "A_OPUS" => Some(codecs::CODEC_TYPE_OPUS), + "A_VORBIS" => Some(codecs::CODEC_TYPE_VORBIS), + "A_AAC/MPEG2/MAIN" | "A_AAC/MPEG2/LC" | "A_AAC/MPEG2/LC/SBR" | "A_AAC/MPEG2/SSR" + | "A_AAC/MPEG4/MAIN" | "A_AAC/MPEG4/LC" | "A_AAC/MPEG4/LC/SBR" | "A_AAC/MPEG4/SSR" + | "A_AAC/MPEG4/LTP" | "A_AAC" => Some(codecs::CODEC_TYPE_AAC), + "A_PCM/INT/BIG" => match bit_depth? { + 16 => Some(codecs::CODEC_TYPE_PCM_S16BE), + 24 => Some(codecs::CODEC_TYPE_PCM_S24BE), + 32 => Some(codecs::CODEC_TYPE_PCM_S32BE), + _ => None, + }, + "A_PCM/INT/LIT" => match bit_depth? { + 16 => Some(codecs::CODEC_TYPE_PCM_S16LE), + 24 => Some(codecs::CODEC_TYPE_PCM_S24LE), + 32 => Some(codecs::CODEC_TYPE_PCM_S32LE), + _ => None, + }, + "A_PCM/FLOAT/IEEE" => match bit_depth? { + 32 => Some(codecs::CODEC_TYPE_PCM_F32LE), + 64 => Some(codecs::CODEC_TYPE_PCM_F64LE), + _ => None, + }, + _ => { + log::warn!("unknown codec: {}", &track.codec_id); + None + } + } +} diff --git a/symphonia-format-mkv/src/demuxer.rs b/symphonia-format-mkv/src/demuxer.rs new file mode 100644 index 00000000..eae00a7e --- /dev/null +++ b/symphonia-format-mkv/src/demuxer.rs @@ -0,0 +1,598 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::{HashMap, VecDeque}; +use std::convert::TryFrom; +use std::io::{Seek, SeekFrom}; + +use symphonia_core::audio::Layout; +use symphonia_core::codecs::{CodecParameters, CODEC_TYPE_FLAC, CODEC_TYPE_VORBIS}; +use symphonia_core::errors::{ + decode_error, end_of_stream_error, seek_error, unsupported_error, Error, Result, SeekErrorKind, +}; +use symphonia_core::formats::{ + Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track, +}; +use symphonia_core::io::{BufReader, MediaSource, MediaSourceStream, ReadBytes}; +use symphonia_core::meta::{Metadata, MetadataLog}; +use symphonia_core::probe::Instantiate; +use symphonia_core::probe::{Descriptor, QueryDescriptor}; +use symphonia_core::sample::SampleFormat; +use symphonia_core::support_format; +use symphonia_core::units::TimeBase; +use symphonia_utils_xiph::flac::metadata::{MetadataBlockHeader, MetadataBlockType}; + +use crate::codecs::codec_id_to_type; +use crate::ebml::{EbmlElement, ElementHeader, ElementIterator}; +use crate::element_ids::{ElementType, ELEMENTS}; +use crate::lacing::{extract_frames, read_xiph_sizes, Frame}; +use crate::segment::{ + BlockGroupElement, ClusterElement, CuesElement, InfoElement, SeekHeadElement, TagsElement, + TracksElement, +}; + +#[allow(dead_code)] +pub struct TrackState { + /// Codec parameters. + pub(crate) codec_params: CodecParameters, + /// The track number. + track_num: u32, + /// Default frame duration in nanoseconds. + pub(crate) default_frame_duration: Option, +} + +pub struct MkvReader { + /// Iterator over EBML element headers + iter: ElementIterator, + tracks: Vec, + track_states: HashMap, + current_cluster: Option, + metadata: MetadataLog, + cues: Vec, + frames: VecDeque, + timestamp_scale: u64, + clusters: Vec, +} + +#[derive(Debug)] +struct ClusterState { + timestamp: Option, + end: Option, +} + +fn vorbis_extra_data_from_codec_private(extra: &[u8]) -> Result> { + const VORBIS_PACKET_TYPE_IDENTIFICATION: u8 = 1; + const VORBIS_PACKET_TYPE_SETUP: u8 = 5; + + // Private Data for this codec has the following layout: + // - 1 byte that represents number of packets minus one; + // - Xiph coded lengths of packets, length of the last packet must be deduced (as in Xiph lacing) + // - packets in order: + // - The Vorbis identification header + // - Vorbis comment header + // - codec setup header + + let mut reader = BufReader::new(extra); + let packet_count = reader.read_byte()? as usize; + let packet_lengths = read_xiph_sizes(&mut reader, packet_count)?; + + let mut packets = Vec::new(); + for length in packet_lengths { + packets.push(reader.read_boxed_slice_exact(length as usize)?); + } + + let last_packet_length = extra.len() - reader.pos() as usize; + packets.push(reader.read_boxed_slice_exact(last_packet_length)?); + + let mut ident_header = None; + let mut setup_header = None; + + for packet in packets { + match packet.get(0).copied() { + Some(VORBIS_PACKET_TYPE_IDENTIFICATION) => { + ident_header = Some(packet); + } + Some(VORBIS_PACKET_TYPE_SETUP) => { + setup_header = Some(packet); + } + _ => { + log::debug!("unsupported vorbis packet type"); + } + } + } + + // This is layout expected currently by Vorbis codec. + Ok([ + ident_header.ok_or(Error::DecodeError("mkv: missing vorbis identification packet"))?, + setup_header.ok_or(Error::DecodeError("mkv: missing vorbis setup packet"))?, + ] + .concat() + .into_boxed_slice()) +} + +fn flac_extra_data_from_codec_private(codec_private: &[u8]) -> Result> { + let mut reader = BufReader::new(codec_private); + + let marker = reader.read_quad_bytes()?; + if marker != *b"fLaC" { + return decode_error("mkv (flac): missing flac stream marker"); + } + + let header = MetadataBlockHeader::read(&mut reader)?; + + loop { + match header.block_type { + MetadataBlockType::StreamInfo => { + break Ok(reader.read_boxed_slice_exact(header.block_len as usize)?); + } + _ => reader.ignore_bytes(u64::from(header.block_len))?, + } + } +} + +impl MkvReader { + fn seek_track_by_ts_forward(&mut self, track_id: u32, ts: u64) -> Result { + let actual_ts = 'out: loop { + // Skip frames from the buffer until the given timestamp + while let Some(frame) = self.frames.front() { + if frame.timestamp + frame.duration >= ts && frame.track == track_id { + break 'out frame.timestamp; + } + else { + self.frames.pop_front(); + } + } + self.next_element()? + }; + + Ok(SeekedTo { track_id, required_ts: ts, actual_ts }) + } + + fn seek_track_by_ts(&mut self, track_id: u32, ts: u64) -> Result { + if self.clusters.is_empty() { + self.seek_track_by_ts_forward(track_id, ts) + } + else { + let mut target_cluster = None; + for cluster in &self.clusters { + if cluster.timestamp > ts { + break; + } + target_cluster = Some(cluster); + } + let cluster = target_cluster.ok_or(Error::SeekError(SeekErrorKind::OutOfRange))?; + + let mut target_block = None; + for block in cluster.blocks.iter() { + if block.track as u32 != track_id { + continue; + } + if block.timestamp > ts { + break; + } + target_block = Some(block); + } + + let pos = match target_block { + Some(block) => block.pos, + None => cluster.pos, + }; + self.iter.seek(pos)?; + + // Restore cluster's metadata + self.current_cluster = + Some(ClusterState { timestamp: Some(cluster.timestamp), end: cluster.end }); + + // Seek to a specified block inside the cluster. + self.seek_track_by_ts_forward(track_id, ts) + } + } + + fn next_element(&mut self) -> Result<()> { + if let Some(ClusterState { end: Some(end), .. }) = &self.current_cluster { + // Make sure we don't read past the current cluster if its size is known. + if self.iter.pos() >= *end { + log::debug!("ended cluster"); + self.current_cluster = None; + } + } + + // Each Cluster is being read incrementally so we need to keep track of + // which cluster we are currently in. + + let header = match self.iter.read_child_header()? { + Some(header) => header, + None => { + // If we reached here, it must be an end of stream. + return end_of_stream_error(); + } + }; + + match header.etype { + ElementType::Cluster => { + self.current_cluster = Some(ClusterState { timestamp: None, end: header.end() }); + } + ElementType::Timestamp => match self.current_cluster.as_mut() { + Some(cluster) => { + cluster.timestamp = Some(self.iter.read_u64()?); + } + None => { + self.iter.ignore_data()?; + log::warn!("timestamp element outside of a cluster"); + return Ok(()); + } + }, + ElementType::SimpleBlock => { + let cluster_ts = match self.current_cluster.as_ref() { + Some(ClusterState { timestamp: Some(ts), .. }) => *ts, + Some(_) => { + self.iter.ignore_data()?; + log::warn!("missing cluster timestamp"); + return Ok(()); + } + None => { + self.iter.ignore_data()?; + log::warn!("simple block element outside of a cluster"); + return Ok(()); + } + }; + + let data = self.iter.read_boxed_slice()?; + extract_frames( + &data, + None, + &self.track_states, + cluster_ts, + self.timestamp_scale, + &mut self.frames, + )?; + } + ElementType::BlockGroup => { + let cluster_ts = match self.current_cluster.as_ref() { + Some(ClusterState { timestamp: Some(ts), .. }) => *ts, + Some(_) => { + self.iter.ignore_data()?; + log::warn!("missing cluster timestamp"); + return Ok(()); + } + None => { + self.iter.ignore_data()?; + log::warn!("block group element outside of a cluster"); + return Ok(()); + } + }; + + let group = self.iter.read_element_data::()?; + extract_frames( + &group.data, + group.duration, + &self.track_states, + cluster_ts, + self.timestamp_scale, + &mut self.frames, + )?; + } + ElementType::Tags => { + let tags = self.iter.read_element_data::()?; + self.metadata.push(tags.to_metadata()); + self.current_cluster = None; + } + _ if header.etype.is_top_level() => { + self.current_cluster = None; + } + other => { + log::debug!("ignored element {:?}", other); + self.iter.ignore_data()?; + } + } + + Ok(()) + } +} + +impl FormatReader for MkvReader { + fn try_new(mut reader: MediaSourceStream, _options: &FormatOptions) -> Result + where + Self: Sized, + { + let is_seekable = reader.is_seekable(); + + // Get the total length of the stream, if possible. + let total_len = if is_seekable { + let pos = reader.pos(); + let len = reader.seek(SeekFrom::End(0))?; + reader.seek(SeekFrom::Start(pos))?; + log::info!("stream is seekable with len={} bytes.", len); + Some(len) + } + else { + None + }; + + let mut it = ElementIterator::new(reader, total_len); + let ebml = it.read_element::()?; + log::warn!("ebml header: {:#?}", ebml.header); + + if !matches!(ebml.header.doc_type.as_str(), "matroska" | "webm") { + return unsupported_error("mkv: not a matroska / webm file"); + } + + let segment_pos = match it.read_child_header()? { + Some(ElementHeader { etype: ElementType::Segment, data_pos, .. }) => data_pos, + _ => return unsupported_error("mkv: missing segment element"), + }; + + let mut segment_tracks = None; + let mut info = None; + let mut clusters = Vec::new(); + let mut metadata = MetadataLog::default(); + let mut current_cluster = None; + + let mut seek_positions = Vec::new(); + while let Ok(Some(header)) = it.read_child_header() { + match header.etype { + ElementType::SeekHead => { + let seek_head = it.read_element_data::()?; + for element in seek_head.seeks.into_vec() { + let tag = element.id as u32; + let etype = match ELEMENTS.get(&tag) { + Some((_, etype)) => *etype, + None => continue, + }; + seek_positions.push((etype, segment_pos + element.position)); + } + } + ElementType::Tracks => { + segment_tracks = Some(it.read_element_data::()?); + } + ElementType::Info => { + info = Some(it.read_element_data::()?); + } + ElementType::Cues => { + let cues = it.read_element_data::()?; + for cue in cues.points.into_vec() { + clusters.push(ClusterElement { + timestamp: cue.time, + pos: segment_pos + cue.positions.cluster_position, + end: None, + blocks: Box::new([]), + }); + } + } + ElementType::Tags => { + let tags = it.read_element_data::()?; + metadata.push(tags.to_metadata()); + } + ElementType::Cluster => { + // Set state for current cluster for the first call of `next_element`. + current_cluster = Some(ClusterState { timestamp: None, end: header.end() }); + + // Don't look forward into the stream since + // we can't be sure that we'll find anything useful. + break; + } + other => { + it.ignore_data()?; + log::debug!("ignored element {:?}", other); + } + } + } + + if is_seekable { + // Make sure we don't jump backwards unnecessarily. + seek_positions.sort_by_key(|sp| sp.1); + + for (etype, pos) in seek_positions { + it.seek(pos)?; + match etype { + ElementType::Tracks => { + segment_tracks = Some(it.read_element::()?); + } + ElementType::Info => { + info = Some(it.read_element::()?); + } + ElementType::Tags => { + let tags = it.read_element::()?; + metadata.push(tags.to_metadata()); + } + ElementType::Cues => { + let cues = it.read_element::()?; + for cue in cues.points.into_vec() { + clusters.push(ClusterElement { + timestamp: cue.time, + pos: segment_pos + cue.positions.cluster_position, + end: None, + blocks: Box::new([]), + }); + } + } + _ => (), + } + } + } + + let segment_tracks = + segment_tracks.ok_or(Error::DecodeError("mkv: missing Tracks element"))?; + + if is_seekable { + let mut reader = it.into_inner(); + reader.seek(SeekFrom::Start(segment_pos))?; + it = ElementIterator::new(reader, total_len); + } + + let info = info.ok_or(Error::DecodeError("mkv: missing Info element"))?; + + // TODO: remove this unwrap? + let time_base = TimeBase::new(u32::try_from(info.timestamp_scale).unwrap(), 1_000_000_000); + + let mut tracks = Vec::new(); + let mut states = HashMap::new(); + for track in segment_tracks.tracks.into_vec() { + let codec_type = codec_id_to_type(&track); + + let mut codec_params = CodecParameters::new(); + codec_params.with_time_base(time_base); + + if let Some(duration) = info.duration { + codec_params.with_n_frames(duration as u64); + } + + if let Some(audio) = track.audio { + codec_params.with_sample_rate(audio.sampling_frequency.round() as u32); + + let format = audio.bit_depth.and_then(|bits| match bits { + 8 => Some(SampleFormat::S8), + 16 => Some(SampleFormat::S16), + 24 => Some(SampleFormat::S24), + 32 => Some(SampleFormat::S32), + _ => None, + }); + + if let Some(format) = format { + codec_params.with_sample_format(format); + } + + if let Some(bits) = audio.bit_depth { + codec_params.with_bits_per_sample(bits as u32); + } + + let layout = match audio.channels { + 1 => Some(Layout::Mono), + 2 => Some(Layout::Stereo), + 3 => Some(Layout::TwoPointOne), + 6 => Some(Layout::FivePointOne), + other => { + log::warn!( + "track #{} has custom number of channels: {}", + track.number, + other + ); + None + } + }; + + if let Some(layout) = layout { + codec_params.with_channel_layout(layout); + } + + if let Some(codec_type) = codec_type { + codec_params.for_codec(codec_type); + if let Some(codec_private) = track.codec_private { + let extra_data = match codec_type { + CODEC_TYPE_VORBIS => { + vorbis_extra_data_from_codec_private(&codec_private)? + } + CODEC_TYPE_FLAC => flac_extra_data_from_codec_private(&codec_private)?, + _ => codec_private, + }; + codec_params.with_extra_data(extra_data); + } + } + } + + let track_id = track.number as u32; + tracks.push(Track { + id: track_id, + codec_params: codec_params.clone(), + language: track.language, + }); + + states.insert( + track_id, + TrackState { + codec_params, + track_num: track_id, + default_frame_duration: track.default_duration, + }, + ); + } + + Ok(Self { + iter: it, + tracks, + track_states: states, + current_cluster, + metadata, + cues: Vec::new(), + frames: VecDeque::new(), + timestamp_scale: info.timestamp_scale, + clusters, + }) + } + + fn cues(&self) -> &[Cue] { + &self.cues + } + + fn metadata(&mut self) -> Metadata<'_> { + self.metadata.metadata() + } + + fn seek(&mut self, _mode: SeekMode, to: SeekTo) -> Result { + if self.tracks.is_empty() { + return seek_error(SeekErrorKind::Unseekable); + } + + match to { + SeekTo::Time { time, track_id } => { + let track = match track_id { + Some(id) => self.tracks.iter().find(|track| track.id == id), + None => self.tracks.first(), + }; + let track = track.ok_or(Error::SeekError(SeekErrorKind::InvalidTrack))?; + let tb = track.codec_params.time_base.unwrap(); + let ts = tb.calc_timestamp(time); + let track_id = track.id; + self.seek_track_by_ts(track_id, ts) + } + SeekTo::TimeStamp { ts, track_id } => { + match self.tracks.iter().find(|t| t.id == track_id) { + Some(_) => self.seek_track_by_ts(track_id, ts), + None => seek_error(SeekErrorKind::InvalidTrack), + } + } + } + } + + fn tracks(&self) -> &[Track] { + &self.tracks + } + + fn next_packet(&mut self) -> Result { + loop { + if let Some(frame) = self.frames.pop_front() { + return Ok(Packet::new_from_boxed_slice( + frame.track as u32, + frame.timestamp, + frame.duration, + frame.data, + )); + } + self.next_element()?; + } + } + + fn into_inner(self: Box) -> MediaSourceStream { + self.iter.into_inner() + } +} + +impl QueryDescriptor for MkvReader { + fn query() -> &'static [Descriptor] { + &[support_format!( + "matroska", + "Matroska / WebM", + &["webm", "mkv"], + &["video/webm", "video/x-matroska"], + &[b"\x1A\x45\xDF\xA3"] // Top-level element Ebml element + )] + } + + fn score(_context: &[u8]) -> u8 { + 255 + } +} diff --git a/symphonia-format-mkv/src/ebml.rs b/symphonia-format-mkv/src/ebml.rs new file mode 100644 index 00000000..a848dd97 --- /dev/null +++ b/symphonia-format-mkv/src/ebml.rs @@ -0,0 +1,518 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::io::{Seek, SeekFrom}; + +use symphonia_core::errors::{decode_error, Error, Result}; +use symphonia_core::io::ReadBytes; +use symphonia_core::util::bits::sign_extend_leq64_to_i64; + +use crate::element_ids::{ElementType, Type, ELEMENTS}; +use crate::segment::EbmlHeaderElement; + +/// Reads a single EBML element ID (as in RFC8794) from the stream +/// and returns its value, length in bytes (1-4 bytes) +/// and a flag indicating whether any data was ignored, or an error. +#[allow(clippy::never_loop)] +pub(crate) fn read_tag(mut reader: R) -> Result<(u32, u32, bool)> { + // Try to read a tag at current reader position. + loop { + let byte = reader.read_byte()?; + let remaining_octets = byte.leading_zeros(); + if remaining_octets > 3 { + // First byte should be ignored since we know it could not start a tag. + // We immediately proceed to seek a first valid tag. + break; + } + + // Read remaining octets + let mut vint = u32::from(byte); + for _ in 0..remaining_octets { + let byte = reader.read_byte()?; + vint = (vint << 8) | u32::from(byte); + } + + log::debug!("element with tag: {:X}", vint); + return Ok((vint, remaining_octets + 1, false)); + } + + // Seek to next supported tag of a top level element (`Cluster`, `Info`, etc.) + let mut tag = 0u32; + loop { + let ty = ELEMENTS.get(&tag).map(|(_, ty)| ty).filter(|ty| ty.is_top_level()); + + if let Some(ty) = ty { + log::info!("found next supported tag {:08X} ({:?})", tag, ty); + return Ok((tag, 4, true)); + } + tag = (tag << 8) | u32::from(reader.read_u8()?); + } +} + +pub(crate) fn read_size(reader: R) -> Result> { + let (size, len) = read_vint(reader)?; + if size == u64::MAX && len == 1 { + return Ok(None); + } + Ok(Some(size)) +} + +/// Reads a single unsigned variable size integer (as in RFC8794) from the stream +/// and returns it or an error. +pub(crate) fn read_unsigned_vint(reader: R) -> Result { + Ok(read_vint(reader)?.0) +} + +/// Reads a single signed variable size integer (as in RFC8794) from the stream +/// and returns it or an error. +pub(crate) fn read_signed_vint(mut reader: R) -> Result { + let (value, len) = read_vint(&mut reader)?; + // Convert to a signed integer by range shifting. + let half_range = i64::pow(2, (len * 7) as u32 - 1) - 1; + Ok(value as i64 - half_range) +} + +/// Reads a single unsigned variable size integer (as in RFC8794) from the stream +/// and returns both its value and length in octects, or an error. +fn read_vint(mut reader: R) -> Result<(u64, u32)> { + let byte = reader.read_byte()?; + if byte == 0xFF { + // Special case: unknown size elements. + return Ok((u64::MAX, 1)); + } + + let vint_width = byte.leading_zeros(); + let mut vint = u64::from(byte); + // Clear VINT_MARKER bit + vint ^= 1 << (7 - vint_width); + + // Read remaining octets + for _ in 0..vint_width { + let byte = reader.read_byte()?; + vint = (vint << 8) | u64::from(byte); + } + + Ok((vint, vint_width + 1)) +} + +#[cfg(test)] +mod tests { + use symphonia_core::io::BufReader; + + use super::{read_signed_vint, read_tag, read_unsigned_vint}; + + #[test] + fn element_tag_parsing() { + assert_eq!(read_tag(BufReader::new(&[0x82])).unwrap(), (0x82, 1, false)); + assert_eq!(read_tag(BufReader::new(&[0x40, 0x02])).unwrap(), (0x4002, 2, false)); + assert_eq!(read_tag(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), (0x200002, 3, false)); + assert_eq!( + read_tag(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), + (0x10000002, 4, false) + ); + } + + #[test] + fn variable_unsigned_integer_parsing() { + assert_eq!(read_unsigned_vint(BufReader::new(&[0x82])).unwrap(), 2); + assert_eq!(read_unsigned_vint(BufReader::new(&[0x40, 0x02])).unwrap(), 2); + assert_eq!(read_unsigned_vint(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), 2); + assert_eq!(read_unsigned_vint(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), 2); + assert_eq!(read_unsigned_vint(BufReader::new(&[0x08, 0x00, 0x00, 0x00, 0x02])).unwrap(), 2); + assert_eq!( + read_unsigned_vint(BufReader::new(&[0x04, 0x00, 0x00, 0x00, 0x00, 0x02])).unwrap(), + 2 + ); + assert_eq!( + read_unsigned_vint(BufReader::new(&[0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02])) + .unwrap(), + 2 + ); + assert_eq!( + read_unsigned_vint(BufReader::new(&[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02])) + .unwrap(), + 2 + ); + } + + #[test] + fn variable_signed_integer_parsing() { + assert_eq!(read_signed_vint(BufReader::new(&[0x80])).unwrap(), -63); + assert_eq!(read_signed_vint(BufReader::new(&[0x40, 0x00])).unwrap(), -8191); + } +} + +#[derive(Copy, Clone, Debug)] +pub struct ElementHeader { + /// The element tag. + pub tag: u32, + /// The element type. + pub etype: ElementType, + /// The element's offset in the stream. + pub pos: u64, + /// The total size of the element including the header. + pub len: u64, + /// The element's data offset in the stream. + pub data_pos: u64, + /// The size of the payload data. + pub data_len: u64, +} + +impl ElementHeader { + /// Returns an iterator over child elements of the current element. + pub(crate) fn children(&self, reader: R) -> ElementIterator { + assert_eq!(reader.pos(), self.data_pos, "unexpected position"); + ElementIterator::new_of(reader, *self) + } + + pub(crate) fn end(&self) -> Option { + if self.data_len == 0 { + None + } + else { + Some(self.data_pos + self.data_len) + } + } +} + +pub trait Element: Sized { + const ID: ElementType; + fn read(reader: &mut B, header: ElementHeader) -> Result; +} + +impl ElementHeader { + /// Reads a single EBML element header from the stream. + pub(crate) fn read(mut reader: &mut R) -> Result<(ElementHeader, bool)> { + let (tag, tag_len, reset) = read_tag(&mut reader)?; + let header_start = reader.pos() - u64::from(tag_len); + + // According to spec, elements like Segment and Cluster can have unknown size. + // Currently, these cases are represented as `data_len` equal to 0, + // but it might be worth changing it to an Option at some point. + let size = read_size(&mut reader)?.unwrap_or(0); + Ok(( + ElementHeader { + tag, + etype: ELEMENTS.get(&tag).map_or(ElementType::Unknown, |(_, etype)| *etype), + pos: header_start, + len: reader.pos() - header_start + size, + data_len: size, + data_pos: reader.pos(), + }, + reset, + )) + } +} + +#[derive(Debug)] +pub(crate) struct EbmlElement { + pub(crate) header: EbmlHeaderElement, +} + +impl Element for EbmlElement { + const ID: ElementType = ElementType::Ebml; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut it = header.children(reader); + Ok(Self { header: it.read_element_data::()? }) + } +} + +pub(crate) struct ElementIterator { + /// Reader of the stream containing this element. + reader: R, + /// Store current element header (for sanity check purposes). + current: Option, + /// Position of the next element header that would be read. + next_pos: u64, + /// Position immediately past last byte of this element. + end: Option, +} + +impl ElementIterator { + /// Creates a new iterator over elements starting from the current stream position. + pub(crate) fn new(reader: R, end: Option) -> Self { + let pos = reader.pos(); + Self::new_at(reader, pos, end) + } + + /// Creates a new iterator over elements starting from the given stream position. + fn new_at(reader: R, start: u64, end: Option) -> Self { + Self { reader, current: None, next_pos: start, end } + } + + /// Creates a new iterator over children of the given parent element. + fn new_of(reader: R, parent: ElementHeader) -> Self { + Self { reader, current: Some(parent), next_pos: parent.data_pos, end: parent.end() } + } + + /// Seek to a specified offset inside of the stream. + pub(crate) fn seek(&mut self, pos: u64) -> Result<()> + where + R: Seek, + { + self.current = None; + self.reader.seek(SeekFrom::Start(pos))?; + self.next_pos = pos; + Ok(()) + } + + /// Consumes this iterator and return the original stream. + pub(crate) fn into_inner(self) -> R { + self.reader + } + + /// Reads a single element header and moves to its next sibling by ignoring all the children. + pub(crate) fn read_header(&mut self) -> Result> { + let header = self.read_header_no_consume()?; + if let Some(header) = &header { + // Move to next sibling. + self.next_pos += header.len; + } + Ok(header) + } + + /// Reads a single element header and shifts the stream to element's child + /// if it'a a master element or to next sibling otherwise. + pub(crate) fn read_child_header(&mut self) -> Result> { + let header = self.read_header_no_consume()?; + if let Some(header) = &header { + match ELEMENTS.get(&header.tag).map(|it| it.0) { + Some(Type::Master) => { + // Move to start of a child element. + self.next_pos = header.data_pos; + } + _ => { + // Move to next sibling. + self.next_pos += header.len; + } + } + } + Ok(header) + } + + /// Reads element header at the current stream position + /// without moving to the end of the parent element. + /// Returns [None] if the current element has no more children or reached end of the stream. + fn read_header_no_consume(&mut self) -> Result> { + let pos = self.reader.pos(); + if pos < self.next_pos { + // Ignore bytes that were not read + self.reader.ignore_bytes(self.next_pos - pos)?; + } + + assert_eq!(self.next_pos, self.reader.pos(), "invalid position"); + + if self.reader.pos() < self.end.unwrap_or(u64::MAX) { + let (header, reset) = ElementHeader::read(&mut self.reader)?; + if reset { + // After finding a new top-level element in a broken stream + // it is necessary to update `next_pos` so it refers to a position + // of a child header. + self.next_pos = self.reader.pos(); + } + self.current = Some(header); + return Ok(Some(header)); + } + + Ok(None) + } + + /// Reads a single element with its data. + pub(crate) fn read_element(&mut self) -> Result { + let _header = self.read_header()?; + self.read_element_data() + } + + /// Reads data of current element. Must be used after + /// [Self::read_header] or [Self::read_child_header]. + pub(crate) fn read_element_data(&mut self) -> Result { + let header = self.current.expect("EBML header must be read before calling this function"); + assert_eq!( + header.etype, + E::ID, + "EBML element type must be checked before calling this function" + ); + + let element = E::read(&mut self.reader, header)?; + // Update position to match the position element reader finished at + self.next_pos = self.reader.pos(); + Ok(element) + } + + /// Reads a collection of element with the given type. + pub(crate) fn read_elements(&mut self) -> Result> { + let mut elements = vec![]; + while let Some(header) = self.read_header()? { + if header.etype == ElementType::Crc32 { + // TODO: ignore crc for now + continue; + } + + if header.etype != E::ID { + log::warn!("found element with invalid type {:?}", header); + self.ignore_data()?; + continue; + } + + elements.push(E::read(&mut self.reader, header)?); + } + Ok(elements.into_boxed_slice()) + } + + /// Reads any primitive data inside of the current element. + pub(crate) fn read_data(&mut self) -> Result { + let hdr = self.current.expect("not in an element"); + let value = self + .try_read_data(hdr)? + .ok_or(Error::DecodeError("mkv: element has no primitive data"))?; + Ok(value) + } + + /// Reads data of the current element as an unsigned integer. + pub(crate) fn read_u64(&mut self) -> Result { + match self.read_data()? { + ElementData::UnsignedInt(s) => Ok(s), + _ => Err(Error::DecodeError("mkv: expected an unsigned int")), + } + } + + /// Reads data of the current element as a floating-point number. + pub(crate) fn read_f64(&mut self) -> Result { + match self.read_data()? { + ElementData::Float(s) => Ok(s), + _ => Err(Error::DecodeError("mkv: expected a float")), + } + } + + /// Reads data of the current element as a string. + pub(crate) fn read_string(&mut self) -> Result { + match self.read_data()? { + ElementData::String(s) => Ok(s), + _ => Err(Error::DecodeError("mkv: expected a string")), + } + } + + /// Reads binary data of the current element as boxed slice. + pub(crate) fn read_boxed_slice(&mut self) -> Result> { + match self.read_data()? { + ElementData::Binary(b) => Ok(b), + _ => Err(Error::DecodeError("mkv: expected binary data")), + } + } + + /// Reads any primitive data of the current element. It returns [None] + /// if the it is a master element. + pub(crate) fn try_read_data(&mut self, header: ElementHeader) -> Result> { + Ok(match ELEMENTS.get(&header.tag) { + Some((ty, _)) => { + // Position must always be valid, because this function is called + // after reading the element header. + assert_eq!(header.data_pos, self.reader.pos(), "invalid stream position"); + if let (Some(cur), Some(end)) = (self.current, self.end) { + if cur.pos + cur.len > end { + log::debug!("reading element data {:?}; parent end={}", cur, end); + return decode_error( + "mkv: attempt to read element data past master element ", + ); + } + } + Some(match ty { + Type::Master => { + return Ok(None); + } + Type::Unsigned => { + if header.data_len > 8 { + self.ignore_data()?; + return decode_error("mkv: invalid unsigned integer length"); + } + + let mut buff = [0u8; 8]; + let offset = 8 - header.data_len as usize; + self.reader.read_buf_exact(&mut buff[offset..])?; + let value = u64::from_be_bytes(buff); + ElementData::UnsignedInt(value) + } + Type::Signed | Type::Date => { + if header.data_len > 8 { + self.ignore_data()?; + return decode_error("mkv: invalid signed integer length"); + } + + let len = header.data_len as usize; + let mut buff = [0u8; 8]; + self.reader.read_buf_exact(&mut buff[8 - len..])?; + let value = u64::from_be_bytes(buff); + let value = sign_extend_leq64_to_i64(value, (len as u32) * 8); + + match ty { + Type::Signed => ElementData::SignedInt(value), + Type::Date => ElementData::Date(value), + _ => unreachable!(), + } + } + Type::Float => { + let value = match header.data_len { + 0 => 0.0, + 4 => self.reader.read_be_f32()? as f64, + 8 => self.reader.read_be_f64()?, + _ => { + self.ignore_data()?; + return Err(Error::DecodeError("mkv: invalid float length")); + } + }; + ElementData::Float(value) + } + Type::String => { + let data = self.reader.read_boxed_slice_exact(header.data_len as usize)?; + let bytes = data.split(|b| *b == 0).next().unwrap_or(&data); + ElementData::String(String::from_utf8_lossy(bytes).into_owned()) + } + Type::Binary => ElementData::Binary( + self.reader.read_boxed_slice_exact(header.data_len as usize)?, + ), + }) + } + None => None, + }) + } + + /// Ignores content of the current element. It can be used after calling + /// [Self::read_child_header] to ignore children of a master element. + pub(crate) fn ignore_data(&mut self) -> Result<()> { + if let Some(header) = self.current { + log::debug!("ignoring data of {:?} element", header.etype); + self.reader.ignore_bytes(header.data_len)?; + self.next_pos = header.data_pos + header.data_len; + } + Ok(()) + } + + /// Gets the position of the underlying stream. + pub(crate) fn pos(&self) -> u64 { + self.reader.pos() + } +} + +/// An EBML element data. +#[derive(Clone, Debug)] +pub(crate) enum ElementData { + /// A binary buffer. + Binary(Box<[u8]>), + /// A floating point number. + Float(f64), + /// A signed integer. + SignedInt(i64), + /// A string. + String(String), + /// An unsigned integer. + UnsignedInt(u64), + /// A point in time referenced in nanoseconds from the precise beginning + /// of the third millennium of the Gregorian Calendar in Coordinated Universal Time + /// (also known as 2001-01-01T00:00:00.000000000 UTC). + Date(i64), +} diff --git a/symphonia-format-mkv/src/element_ids.rs b/symphonia-format-mkv/src/element_ids.rs new file mode 100644 index 00000000..9b290384 --- /dev/null +++ b/symphonia-format-mkv/src/element_ids.rs @@ -0,0 +1,334 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use lazy_static::lazy_static; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(crate) enum Type { + Master, + Unsigned, + Signed, + Binary, + String, + Float, + Date, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ElementType { + Ebml, + EbmlVersion, + EbmlReadVersion, + EbmlMaxIdLength, + EbmlMaxSizeLength, + DocType, + DocTypeVersion, + DocTypeReadVersion, + Crc32, + Void, + Segment, + SeekHead, + Seek, + SeekId, + SeekPosition, + Info, + TimestampScale, + Duration, + DateUtc, + Title, + MuxingApp, + WritingApp, + Cluster, + Timestamp, + PrevSize, + SimpleBlock, + BlockGroup, + Block, + BlockAdditions, + BlockMore, + BlockAddId, + BlockAdditional, + BlockDuration, + ReferenceBlock, + DiscardPadding, + Tracks, + TrackEntry, + TrackNumber, + TrackUid, + TrackType, + FlagEnabled, + FlagDefault, + FlagForced, + FlagHearingImpaired, + FlagVisualImpaired, + FlagTextDescriptions, + FlagOriginal, + FlagCommentary, + FlagLacing, + DefaultDuration, + Name, + Language, + CodecId, + CodecPrivate, + CodecName, + CodecDelay, + SeekPreRoll, + Video, + FlagInterlaced, + StereoMode, + AlphaMode, + PixelWidth, + PixelHeight, + PixelCropBottom, + PixelCropTop, + PixelCropLeft, + PixelCropRight, + DisplayWidth, + DisplayHeight, + DisplayUnit, + AspectRatioType, + Audio, + SamplingFrequency, + OutputSamplingFrequency, + Channels, + BitDepth, + ContentEncodings, + ContentEncoding, + ContentEncodingOrder, + ContentEncodingScope, + ContentEncodingType, + ContentEncryption, + ContentEncAlgo, + ContentEncKeyId, + ContentEncAesSettings, + AesSettingsCipherMode, + Colour, + MatrixCoefficients, + BitsPerChannel, + ChromaSubsamplingHorz, + ChromaSubsamplingVert, + CbSubsamplingHorz, + CbSubsamplingVert, + ChromaSitingHorz, + ChromaSitingVert, + Range, + TransferCharacteristics, + Primaries, + MaxCll, + MaxFall, + MasteringMetadata, + PrimaryRChromaticityX, + PrimaryRChromaticityY, + PrimaryGChromaticityX, + PrimaryGChromaticityY, + PrimaryBChromaticityX, + PrimaryBChromaticityY, + WhitePointChromaticityX, + WhitePointChromaticityY, + LuminanceMax, + LuminanceMin, + Cues, + CuePoint, + CueTime, + CueTrackPositions, + CueTrack, + CueClusterPosition, + CueRelativePosition, + CueDuration, + CueBlockNumber, + Chapters, + EditionEntry, + ChapterAtom, + ChapterUid, + ChapterStringUid, + ChapterTimeStart, + ChapterTimeEnd, + ChapterDisplay, + ChapString, + ChapLanguage, + ChapLanguageIetf, + ChapCountry, + Tags, + Tag, + Targets, + TargetTypeValue, + TargetType, + TagTrackUid, + SimpleTag, + TagName, + TagLanguage, + TagDefault, + TagString, + TagBinary, + /// Special type for unknown tags. + Unknown, +} + +impl ElementType { + pub(crate) fn is_top_level(&self) -> bool { + matches!( + self, + ElementType::Cluster + | ElementType::Cues + | ElementType::Info + | ElementType::SeekHead + | ElementType::Tags + | ElementType::Tracks + ) + } +} + +lazy_static! { + pub(crate) static ref ELEMENTS: HashMap = HashMap::from([ + (0x1A45DFA3, (Type::Master, ElementType::Ebml)), + (0x4286, (Type::Unsigned, ElementType::EbmlVersion)), + (0x42F7, (Type::Unsigned, ElementType::EbmlReadVersion)), + (0x42F2, (Type::Unsigned, ElementType::EbmlMaxIdLength)), + (0x42F3, (Type::Unsigned, ElementType::EbmlMaxSizeLength)), + (0x4282, (Type::String, ElementType::DocType)), + (0x4287, (Type::Unsigned, ElementType::DocTypeVersion)), + (0x4285, (Type::Unsigned, ElementType::DocTypeReadVersion)), + (0xBF, (Type::Binary, ElementType::Crc32)), + (0xEC, (Type::Binary, ElementType::Void)), + (0x18538067, (Type::Master, ElementType::Segment)), + (0x114D9B74, (Type::Master, ElementType::SeekHead)), + (0x4DBB, (Type::Master, ElementType::Seek)), + (0x53AB, (Type::Unsigned, ElementType::SeekId)), + (0x53AC, (Type::Unsigned, ElementType::SeekPosition)), + (0x1549A966, (Type::Master, ElementType::Info)), + (0x2AD7B1, (Type::Unsigned, ElementType::TimestampScale)), + (0x4489, (Type::Float, ElementType::Duration)), + (0x4461, (Type::Date, ElementType::DateUtc)), + (0x7BA9, (Type::String, ElementType::Title)), + (0x4D80, (Type::String, ElementType::MuxingApp)), + (0x5741, (Type::String, ElementType::WritingApp)), + (0x1F43B675, (Type::Master, ElementType::Cluster)), + (0xE7, (Type::Unsigned, ElementType::Timestamp)), + (0xAB, (Type::Unsigned, ElementType::PrevSize)), + (0xA3, (Type::Binary, ElementType::SimpleBlock)), + (0xA0, (Type::Master, ElementType::BlockGroup)), + (0xA1, (Type::Binary, ElementType::Block)), + (0x75A1, (Type::Master, ElementType::BlockAdditions)), + (0xA6, (Type::Master, ElementType::BlockMore)), + (0xEE, (Type::Unsigned, ElementType::BlockAddId)), + (0xA5, (Type::Binary, ElementType::BlockAdditional)), + (0x9B, (Type::Unsigned, ElementType::BlockDuration)), + (0xFB, (Type::Signed, ElementType::ReferenceBlock)), + (0x75A2, (Type::Signed, ElementType::DiscardPadding)), + (0x1654AE6B, (Type::Master, ElementType::Tracks)), + (0xAE, (Type::Master, ElementType::TrackEntry)), + (0xD7, (Type::Unsigned, ElementType::TrackNumber)), + (0x73C5, (Type::Unsigned, ElementType::TrackUid)), + (0x83, (Type::Unsigned, ElementType::TrackType)), + (0xB9, (Type::Unsigned, ElementType::FlagEnabled)), + (0x88, (Type::Unsigned, ElementType::FlagDefault)), + (0x55AA, (Type::Unsigned, ElementType::FlagForced)), + (0x55AB, (Type::Unsigned, ElementType::FlagHearingImpaired)), + (0x55AC, (Type::Unsigned, ElementType::FlagVisualImpaired)), + (0x55AD, (Type::Unsigned, ElementType::FlagTextDescriptions)), + (0x55AE, (Type::Unsigned, ElementType::FlagOriginal)), + (0x55AF, (Type::Unsigned, ElementType::FlagCommentary)), + (0x9C, (Type::Unsigned, ElementType::FlagLacing)), + (0x23E383, (Type::Unsigned, ElementType::DefaultDuration)), + (0x536E, (Type::String, ElementType::Name)), + (0x22B59C, (Type::String, ElementType::Language)), + (0x86, (Type::String, ElementType::CodecId)), + (0x63A2, (Type::Binary, ElementType::CodecPrivate)), + (0x258688, (Type::String, ElementType::CodecName)), + (0x56AA, (Type::Unsigned, ElementType::CodecDelay)), + (0x56BB, (Type::Unsigned, ElementType::SeekPreRoll)), + (0xE0, (Type::Master, ElementType::Video)), + (0x9A, (Type::Unsigned, ElementType::FlagInterlaced)), + (0x53B8, (Type::Unsigned, ElementType::StereoMode)), + (0x53C0, (Type::Unsigned, ElementType::AlphaMode)), + (0xB0, (Type::Unsigned, ElementType::PixelWidth)), + (0xBA, (Type::Unsigned, ElementType::PixelHeight)), + (0x54AA, (Type::Unsigned, ElementType::PixelCropBottom)), + (0x54BB, (Type::Unsigned, ElementType::PixelCropTop)), + (0x54CC, (Type::Unsigned, ElementType::PixelCropLeft)), + (0x54DD, (Type::Unsigned, ElementType::PixelCropRight)), + (0x54B0, (Type::Unsigned, ElementType::DisplayWidth)), + (0x54BA, (Type::Unsigned, ElementType::DisplayHeight)), + (0x54B2, (Type::Unsigned, ElementType::DisplayUnit)), + (0x54B3, (Type::Unsigned, ElementType::AspectRatioType)), + (0xE1, (Type::Master, ElementType::Audio)), + (0xB5, (Type::Float, ElementType::SamplingFrequency)), + (0x78B5, (Type::Float, ElementType::OutputSamplingFrequency)), + (0x9F, (Type::Unsigned, ElementType::Channels)), + (0x6264, (Type::Unsigned, ElementType::BitDepth)), + (0x6D80, (Type::Master, ElementType::ContentEncodings)), + (0x6240, (Type::Master, ElementType::ContentEncoding)), + (0x5031, (Type::Unsigned, ElementType::ContentEncodingOrder)), + (0x5032, (Type::Unsigned, ElementType::ContentEncodingScope)), + (0x5033, (Type::Unsigned, ElementType::ContentEncodingType)), + (0x5035, (Type::Master, ElementType::ContentEncryption)), + (0x47E1, (Type::Unsigned, ElementType::ContentEncAlgo)), + (0x47E2, (Type::Unsigned, ElementType::ContentEncKeyId)), + (0x47E7, (Type::Master, ElementType::ContentEncAesSettings)), + (0x47E8, (Type::Unsigned, ElementType::AesSettingsCipherMode)), + (0x55B0, (Type::Master, ElementType::Colour)), + (0x55B1, (Type::Unsigned, ElementType::MatrixCoefficients)), + (0x55B2, (Type::Unsigned, ElementType::BitsPerChannel)), + (0x55B3, (Type::Unsigned, ElementType::ChromaSubsamplingHorz)), + (0x55B4, (Type::Unsigned, ElementType::ChromaSubsamplingVert)), + (0x55B5, (Type::Unsigned, ElementType::CbSubsamplingHorz)), + (0x55B6, (Type::Unsigned, ElementType::CbSubsamplingVert)), + (0x55B7, (Type::Unsigned, ElementType::ChromaSitingHorz)), + (0x55B8, (Type::Unsigned, ElementType::ChromaSitingVert)), + (0x55B9, (Type::Unsigned, ElementType::Range)), + (0x55BA, (Type::Unsigned, ElementType::TransferCharacteristics)), + (0x55BB, (Type::Unsigned, ElementType::Primaries)), + (0x55BC, (Type::Unsigned, ElementType::MaxCll)), + (0x55BD, (Type::Unsigned, ElementType::MaxFall)), + (0x55D0, (Type::Master, ElementType::MasteringMetadata)), + (0x55D1, (Type::Float, ElementType::PrimaryRChromaticityX)), + (0x55D2, (Type::Float, ElementType::PrimaryRChromaticityY)), + (0x55D3, (Type::Float, ElementType::PrimaryGChromaticityX)), + (0x55D4, (Type::Float, ElementType::PrimaryGChromaticityY)), + (0x55D5, (Type::Float, ElementType::PrimaryBChromaticityX)), + (0x55D6, (Type::Float, ElementType::PrimaryBChromaticityY)), + (0x55D7, (Type::Float, ElementType::WhitePointChromaticityX)), + (0x55D8, (Type::Float, ElementType::WhitePointChromaticityY)), + (0x55D9, (Type::Float, ElementType::LuminanceMax)), + (0x55DA, (Type::Float, ElementType::LuminanceMin)), + (0x1C53BB6B, (Type::Master, ElementType::Cues)), + (0xBB, (Type::Master, ElementType::CuePoint)), + (0xB3, (Type::Unsigned, ElementType::CueTime)), + (0xB7, (Type::Master, ElementType::CueTrackPositions)), + (0xF7, (Type::Unsigned, ElementType::CueTrack)), + (0xF1, (Type::Unsigned, ElementType::CueClusterPosition)), + (0xF0, (Type::Unsigned, ElementType::CueRelativePosition)), + (0xB2, (Type::Unsigned, ElementType::CueDuration)), + (0x5378, (Type::Unsigned, ElementType::CueBlockNumber)), + (0x1043A770, (Type::Master, ElementType::Chapters)), + (0x45B9, (Type::Master, ElementType::EditionEntry)), + (0xB6, (Type::Master, ElementType::ChapterAtom)), + (0x73C4, (Type::Unsigned, ElementType::ChapterUid)), + (0x5654, (Type::String, ElementType::ChapterStringUid)), + (0x91, (Type::Unsigned, ElementType::ChapterTimeStart)), + (0x92, (Type::Unsigned, ElementType::ChapterTimeEnd)), + (0x80, (Type::Master, ElementType::ChapterDisplay)), + (0x85, (Type::String, ElementType::ChapString)), + (0x437C, (Type::String, ElementType::ChapLanguage)), + (0x437D, (Type::String, ElementType::ChapLanguageIetf)), + (0x437E, (Type::String, ElementType::ChapCountry)), + (0x1254C367, (Type::Master, ElementType::Tags)), + (0x7373, (Type::Master, ElementType::Tag)), + (0x63C0, (Type::Master, ElementType::Targets)), + (0x68CA, (Type::Unsigned, ElementType::TargetTypeValue)), + (0x63CA, (Type::String, ElementType::TargetType)), + (0x63C5, (Type::Unsigned, ElementType::TagTrackUid)), + (0x67C8, (Type::Master, ElementType::SimpleTag)), + (0x45A3, (Type::String, ElementType::TagName)), + (0x447A, (Type::String, ElementType::TagLanguage)), + (0x4484, (Type::Unsigned, ElementType::TagDefault)), + (0x4487, (Type::String, ElementType::TagString)), + (0x4485, (Type::Binary, ElementType::TagBinary)), + ]); +} diff --git a/symphonia-format-mkv/src/lacing.rs b/symphonia-format-mkv/src/lacing.rs new file mode 100644 index 00000000..c62c716b --- /dev/null +++ b/symphonia-format-mkv/src/lacing.rs @@ -0,0 +1,155 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::{HashMap, VecDeque}; + +use symphonia_core::errors::{decode_error, Result}; +use symphonia_core::io::{BufReader, ReadBytes}; + +use crate::demuxer::TrackState; +use crate::ebml::{read_signed_vint, read_unsigned_vint}; + +enum Lacing { + None, + Xiph, + FixedSize, + Ebml, +} + +fn parse_flags(flags: u8) -> Result { + match (flags >> 1) & 0b11 { + 0b00 => Ok(Lacing::None), + 0b01 => Ok(Lacing::Xiph), + 0b10 => Ok(Lacing::FixedSize), + 0b11 => Ok(Lacing::Ebml), + _ => unreachable!(), + } +} + +fn read_ebml_sizes(mut reader: R, frames: usize) -> Result> { + let mut sizes = Vec::new(); + for _ in 0..frames { + if let Some(last_size) = sizes.last().copied() { + let delta = read_signed_vint(&mut reader)?; + sizes.push((last_size as i64 + delta) as u64) + } + else { + let size = read_unsigned_vint(&mut reader)?; + sizes.push(size); + } + } + + Ok(sizes) +} + +pub(crate) fn read_xiph_sizes(mut reader: R, frames: usize) -> Result> { + let mut prefixes = 0; + let mut sizes = Vec::new(); + while sizes.len() < frames as usize { + let byte = reader.read_byte()? as u64; + if byte == 255 { + prefixes += 1; + } + else { + let size = prefixes * 255 + byte; + prefixes = 0; + sizes.push(size); + } + } + + Ok(sizes) +} + +pub(crate) struct Frame { + pub(crate) track: u32, + /// Absolute frame timestamp. + pub(crate) timestamp: u64, + pub(crate) duration: u64, + pub(crate) data: Box<[u8]>, +} + +pub(crate) fn calc_abs_block_timestamp(cluster_ts: u64, rel_block_ts: i16) -> u64 { + if rel_block_ts < 0 { + cluster_ts - (-rel_block_ts) as u64 + } + else { + cluster_ts + rel_block_ts as u64 + } +} + +pub(crate) fn extract_frames( + block: &[u8], + block_duration: Option, + tracks: &HashMap, + cluster_timestamp: u64, + timestamp_scale: u64, + buffer: &mut VecDeque, +) -> Result<()> { + let mut reader = BufReader::new(block); + let track = read_unsigned_vint(&mut reader)? as u32; + let rel_ts = reader.read_be_u16()? as i16; + let flags = reader.read_byte()?; + let lacing = parse_flags(flags)?; + + let default_frame_duration = + tracks.get(&track).and_then(|it| it.default_frame_duration).map(|it| it / timestamp_scale); + + let mut timestamp = calc_abs_block_timestamp(cluster_timestamp, rel_ts); + + match lacing { + Lacing::None => { + let data = reader.read_boxed_slice_exact(block.len() - reader.pos() as usize)?; + let duration = block_duration.or(default_frame_duration).unwrap_or(0); + buffer.push_back(Frame { track, timestamp, data, duration }); + } + Lacing::Xiph | Lacing::Ebml => { + // Read number of stored sizes which is actually `number of frames` - 1 + // since size of the last frame is deduced from block size. + let frames = reader.read_byte()? as usize; + let sizes = match lacing { + Lacing::Xiph => read_xiph_sizes(&mut reader, frames)?, + Lacing::Ebml => read_ebml_sizes(&mut reader, frames)?, + _ => unreachable!(), + }; + + let frame_duration = block_duration + .map(|it| it / (frames + 1) as u64) + .or(default_frame_duration) + .unwrap_or(0); + + for frame_size in sizes { + let data = reader.read_boxed_slice_exact(frame_size as usize)?; + buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); + timestamp += frame_duration; + } + + // Size of last frame is not provided so we read to the end of the block. + let size = block.len() - reader.pos() as usize; + let data = reader.read_boxed_slice_exact(size)?; + buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); + } + Lacing::FixedSize => { + let frames = reader.read_byte()? as usize + 1; + let total_size = block.len() - reader.pos() as usize; + if total_size % frames != 0 { + return decode_error("mkv: invalid block size"); + } + + let frame_duration = + block_duration.map(|it| it / frames as u64).or(default_frame_duration).unwrap_or(0); + + let frame_size = total_size / frames; + for _ in 0..frames { + let data = reader.read_boxed_slice_exact(frame_size)?; + buffer.push_back(Frame { track, timestamp, data, duration: frame_duration }); + timestamp += frame_duration; + } + } + } + + Ok(()) +} diff --git a/symphonia-format-mkv/src/lib.rs b/symphonia-format-mkv/src/lib.rs index 8b137891..8b21037d 100644 --- a/symphonia-format-mkv/src/lib.rs +++ b/symphonia-format-mkv/src/lib.rs @@ -1 +1,24 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +#![warn(rust_2018_idioms)] +#![forbid(unsafe_code)] +// The following lints are allowed in all Symphonia crates. Please see clippy.toml for their +// justification. +#![allow(clippy::comparison_chain)] +#![allow(clippy::excessive_precision)] +#![allow(clippy::identity_op)] +#![allow(clippy::manual_range_contains)] + +mod codecs; +mod demuxer; +mod ebml; +mod element_ids; +mod lacing; +mod segment; + +pub use crate::demuxer::MkvReader; diff --git a/symphonia-format-mkv/src/segment.rs b/symphonia-format-mkv/src/segment.rs new file mode 100644 index 00000000..bda51ad9 --- /dev/null +++ b/symphonia-format-mkv/src/segment.rs @@ -0,0 +1,622 @@ +// Symphonia +// Copyright (c) 2019-2022 The Project Symphonia Developers. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use symphonia_core::errors::{Error, Result}; +use symphonia_core::io::{BufReader, ReadBytes}; +use symphonia_core::meta::{MetadataBuilder, MetadataRevision, Tag, Value}; + +use crate::ebml::{read_unsigned_vint, Element, ElementData, ElementHeader}; +use crate::element_ids::ElementType; +use crate::lacing::calc_abs_block_timestamp; + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct TrackElement { + pub(crate) number: u64, + pub(crate) uid: u64, + pub(crate) language: Option, + pub(crate) codec_id: String, + pub(crate) codec_private: Option>, + pub(crate) audio: Option, + pub(crate) default_duration: Option, +} + +impl Element for TrackElement { + const ID: ElementType = ElementType::TrackEntry; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut number = None; + let mut uid = None; + let mut language = None; + let mut audio = None; + let mut codec_private = None; + let mut codec_id = None; + let mut default_duration = None; + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::TrackNumber => { + number = Some(it.read_u64()?); + } + ElementType::TrackUid => { + uid = Some(it.read_u64()?); + } + ElementType::Language => { + language = Some(it.read_string()?); + } + ElementType::CodecId => { + codec_id = Some(it.read_string()?); + } + ElementType::CodecPrivate => { + codec_private = Some(it.read_boxed_slice()?); + } + ElementType::Audio => { + audio = Some(it.read_element_data()?); + } + ElementType::DefaultDuration => { + default_duration = Some(it.read_u64()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + number: number.ok_or(Error::DecodeError("mkv: missing track number"))?, + uid: uid.ok_or(Error::DecodeError("mkv: missing track UID"))?, + language, + codec_id: codec_id.ok_or(Error::DecodeError("mkv: missing codec id"))?, + codec_private, + audio, + default_duration, + }) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct AudioElement { + pub(crate) sampling_frequency: f64, + pub(crate) output_sampling_frequency: Option, + pub(crate) channels: u64, + pub(crate) bit_depth: Option, +} + +impl Element for AudioElement { + const ID: ElementType = ElementType::Audio; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut sampling_frequency = None; + let mut output_sampling_frequency = None; + let mut channels = None; + let mut bit_depth = None; + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::SamplingFrequency => { + sampling_frequency = Some(it.read_f64()?); + } + ElementType::OutputSamplingFrequency => { + output_sampling_frequency = Some(it.read_f64()?); + } + ElementType::Channels => { + channels = Some(it.read_u64()?); + } + ElementType::BitDepth => { + bit_depth = Some(it.read_u64()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + sampling_frequency: sampling_frequency.unwrap_or(8000.0), + output_sampling_frequency, + channels: channels.unwrap_or(1), + bit_depth, + }) + } +} + +#[derive(Debug)] +pub(crate) struct SeekHeadElement { + pub(crate) seeks: Box<[SeekElement]>, +} + +impl Element for SeekHeadElement { + const ID: ElementType = ElementType::SeekHead; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut seeks = Vec::new(); + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::Seek => { + seeks.push(it.read_element_data()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { seeks: seeks.into_boxed_slice() }) + } +} + +#[derive(Debug)] +pub(crate) struct SeekElement { + pub(crate) id: u64, + pub(crate) position: u64, +} + +impl Element for SeekElement { + const ID: ElementType = ElementType::Seek; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut seek_id = None; + let mut seek_position = None; + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::SeekId => { + seek_id = Some(it.read_u64()?); + } + ElementType::SeekPosition => { + seek_position = Some(it.read_u64()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + id: seek_id.ok_or(Error::DecodeError("mkv: missing seek track id"))?, + position: seek_position.ok_or(Error::DecodeError("mkv: missing seek track pos"))?, + }) + } +} + +#[derive(Debug)] +pub(crate) struct TracksElement { + pub(crate) tracks: Box<[TrackElement]>, +} + +impl Element for TracksElement { + const ID: ElementType = ElementType::Tracks; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut it = header.children(reader); + Ok(Self { tracks: it.read_elements()? }) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct EbmlHeaderElement { + pub(crate) version: u64, + pub(crate) read_version: u64, + pub(crate) max_id_length: u64, + pub(crate) max_size_length: u64, + pub(crate) doc_type: String, + pub(crate) doc_type_version: u64, + pub(crate) doc_type_read_version: u64, +} + +impl Element for EbmlHeaderElement { + const ID: ElementType = ElementType::Ebml; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut version = None; + let mut read_version = None; + let mut max_id_length = None; + let mut max_size_length = None; + let mut doc_type = None; + let mut doc_type_version = None; + let mut doc_type_read_version = None; + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::EbmlVersion => { + version = Some(it.read_u64()?); + } + ElementType::EbmlReadVersion => { + read_version = Some(it.read_u64()?); + } + ElementType::EbmlMaxIdLength => { + max_id_length = Some(it.read_u64()?); + } + ElementType::EbmlMaxSizeLength => { + max_size_length = Some(it.read_u64()?); + } + ElementType::DocType => { + doc_type = Some(it.read_string()?); + } + ElementType::DocTypeVersion => { + doc_type_version = Some(it.read_u64()?); + } + ElementType::DocTypeReadVersion => { + doc_type_read_version = Some(it.read_u64()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + version: version.unwrap_or(1), + read_version: read_version.unwrap_or(1), + max_id_length: max_id_length.unwrap_or(4), + max_size_length: max_size_length.unwrap_or(8), + doc_type: doc_type.ok_or(Error::Unsupported("mkv: invalid ebml file"))?, + doc_type_version: doc_type_version.unwrap_or(1), + doc_type_read_version: doc_type_read_version.unwrap_or(1), + }) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct InfoElement { + pub(crate) timestamp_scale: u64, + pub(crate) duration: Option, + title: Option>, + muxing_app: Box, + writing_app: Box, +} + +impl Element for InfoElement { + const ID: ElementType = ElementType::Info; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut duration = None; + let mut timestamp_scale = None; + let mut title = None; + let mut muxing_app = None; + let mut writing_app = None; + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::TimestampScale => { + timestamp_scale = Some(it.read_u64()?); + } + ElementType::Duration => { + duration = Some(it.read_f64()?); + } + ElementType::Title => { + title = Some(it.read_string()?); + } + ElementType::MuxingApp => { + muxing_app = Some(it.read_string()?); + } + ElementType::WritingApp => { + writing_app = Some(it.read_string()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + timestamp_scale: timestamp_scale.unwrap_or(1_000_000), + duration, + title: title.map(|it| it.into_boxed_str()), + muxing_app: muxing_app.unwrap_or_default().into_boxed_str(), + writing_app: writing_app.unwrap_or_default().into_boxed_str(), + }) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct CuesElement { + pub(crate) points: Box<[CuePointElement]>, +} + +impl Element for CuesElement { + const ID: ElementType = ElementType::Cues; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut it = header.children(reader); + Ok(Self { points: it.read_elements()? }) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct CuePointElement { + pub(crate) time: u64, + pub(crate) positions: CueTrackPositionsElement, +} + +impl Element for CuePointElement { + const ID: ElementType = ElementType::CuePoint; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut it = header.children(reader); + + let mut time = None; + let mut pos = None; + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::CueTime => time = Some(it.read_u64()?), + ElementType::CueTrackPositions => { + pos = Some(it.read_element_data()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + time: time.ok_or(Error::DecodeError("mkv: missing time in cue"))?, + positions: pos.ok_or(Error::DecodeError("mkv: missing positions in cue"))?, + }) + } +} + +#[allow(dead_code)] +#[derive(Debug)] +pub(crate) struct CueTrackPositionsElement { + pub(crate) track: u64, + pub(crate) cluster_position: u64, +} + +impl Element for CueTrackPositionsElement { + const ID: ElementType = ElementType::CueTrackPositions; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut it = header.children(reader); + + let mut track = None; + let mut pos = None; + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::CueTrack => { + track = Some(it.read_u64()?); + } + ElementType::CueClusterPosition => { + pos = Some(it.read_u64()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + Ok(Self { + track: track.ok_or(Error::DecodeError("mkv: missing track in cue track positions"))?, + cluster_position: pos + .ok_or(Error::DecodeError("mkv: missing position in cue track positions"))?, + }) + } +} + +#[derive(Debug)] +pub(crate) struct BlockGroupElement { + pub(crate) data: Box<[u8]>, + pub(crate) duration: Option, +} + +impl Element for BlockGroupElement { + const ID: ElementType = ElementType::BlockGroup; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut it = header.children(reader); + + let mut data = None; + let mut block_duration = None; + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::DiscardPadding => { + let _nanos = it.read_data()?; + } + ElementType::Block => { + data = Some(it.read_boxed_slice()?); + } + ElementType::BlockDuration => { + block_duration = Some(it.read_u64()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + Ok(Self { + data: data.ok_or(Error::DecodeError("mkv: missing block inside block group"))?, + duration: block_duration, + }) + } +} + +#[derive(Debug)] +pub(crate) struct BlockElement { + pub(crate) track: u64, + pub(crate) timestamp: u64, + pub(crate) pos: u64, +} + +#[derive(Debug)] +pub(crate) struct ClusterElement { + pub(crate) timestamp: u64, + pub(crate) pos: u64, + pub(crate) end: Option, + pub(crate) blocks: Box<[BlockElement]>, +} + +impl Element for ClusterElement { + const ID: ElementType = ElementType::Cluster; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let pos = reader.pos(); + let mut timestamp = None; + let mut blocks = Vec::new(); + let has_size = header.end().is_some(); + + fn read_block(data: &[u8], timestamp: u64, offset: u64) -> Result { + let mut reader = BufReader::new(data); + let track = read_unsigned_vint(&mut reader)?; + let rel_ts = reader.read_be_u16()? as i16; + let timestamp = calc_abs_block_timestamp(timestamp, rel_ts); + Ok(BlockElement { track, timestamp, pos: offset }) + } + + fn get_timestamp(timestamp: Option) -> Result { + timestamp.ok_or(Error::DecodeError("mkv: missing timestamp for a cluster")) + } + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::Timestamp => { + timestamp = Some(it.read_u64()?); + } + ElementType::BlockGroup => { + let group = it.read_element_data::()?; + blocks.push(read_block(&group.data, get_timestamp(timestamp)?, header.pos)?); + } + ElementType::SimpleBlock => { + let data = it.read_boxed_slice()?; + blocks.push(read_block(&data, get_timestamp(timestamp)?, header.pos)?); + } + _ if header.etype.is_top_level() && !has_size => break, + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(ClusterElement { + timestamp: get_timestamp(timestamp)?, + blocks: blocks.into_boxed_slice(), + pos, + end: header.end(), + }) + } +} + +#[derive(Debug)] +pub(crate) struct TagsElement { + pub(crate) tags: Box<[TagElement]>, +} + +impl Element for TagsElement { + const ID: ElementType = ElementType::Tags; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut tags = Vec::new(); + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::Tag => { + tags.push(it.read_element_data::()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { tags: tags.into_boxed_slice() }) + } +} + +impl TagsElement { + pub(crate) fn to_metadata(&self) -> MetadataRevision { + let mut metadata = MetadataBuilder::new(); + for tag in self.tags.iter() { + for simple_tag in tag.simple_tags.iter() { + // TODO: support std_key + metadata.add_tag(Tag::new( + None, + &simple_tag.name, + match &simple_tag.value { + ElementData::Binary(b) => Value::Binary(b.clone()), + ElementData::String(s) => Value::String(s.clone()), + _ => unreachable!(), + }, + )); + } + } + metadata.metadata() + } +} + +#[derive(Debug)] +pub(crate) struct TagElement { + pub(crate) simple_tags: Box<[SimpleTagElement]>, +} + +impl Element for TagElement { + const ID: ElementType = ElementType::Tag; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut simple_tags = Vec::new(); + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::SimpleTag => { + simple_tags.push(it.read_element_data::()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { simple_tags: simple_tags.into_boxed_slice() }) + } +} + +#[derive(Debug)] +pub(crate) struct SimpleTagElement { + pub(crate) name: Box, + pub(crate) value: ElementData, +} + +impl Element for SimpleTagElement { + const ID: ElementType = ElementType::SimpleTag; + + fn read(reader: &mut B, header: ElementHeader) -> Result { + let mut name = None; + let mut value = None; + + let mut it = header.children(reader); + while let Some(header) = it.read_header()? { + match header.etype { + ElementType::TagName => { + name = Some(it.read_string()?); + } + ElementType::TagString | ElementType::TagBinary => { + value = Some(it.read_data()?); + } + other => { + log::debug!("ignored element {:?}", other); + } + } + } + + Ok(Self { + name: name.ok_or(Error::DecodeError("mkv: missing tag name"))?.into_boxed_str(), + value: value.ok_or(Error::DecodeError("mkv: missing tag value"))?, + }) + } +} diff --git a/symphonia-play/Cargo.toml b/symphonia-play/Cargo.toml index b201c6d2..916646aa 100644 --- a/symphonia-play/Cargo.toml +++ b/symphonia-play/Cargo.toml @@ -15,7 +15,7 @@ clap = "3.0.0" lazy_static = "1.4.0" log = { version = "0.4", features = ["release_max_level_info"] } pretty_env_logger = "0.4" -symphonia = { version = "0.4", path = "../symphonia", features = ["aac", "alac", "mp3", "isomp4"] } +symphonia = { version = "0.4", path = "../symphonia", features = [ "aac", "alac", "mp3", "isomp4", "mkv" ] } [target.'cfg(target_os = "linux")'.dependencies] libpulse-binding = "2.5.0" diff --git a/symphonia/Cargo.toml b/symphonia/Cargo.toml index a3c274dd..1d4e888d 100644 --- a/symphonia/Cargo.toml +++ b/symphonia/Cargo.toml @@ -17,6 +17,7 @@ aac = ["symphonia-codec-aac"] alac = ["symphonia-codec-alac"] flac = ["symphonia-bundle-flac"] isomp4 = ["symphonia-format-isomp4"] +mkv = ["symphonia-format-mkv"] mp3 = ["symphonia-bundle-mp3"] ogg = ["symphonia-format-ogg"] pcm = ["symphonia-codec-pcm"] @@ -36,6 +37,7 @@ symphonia-codec-vorbis = { version = "0.4", path = "../symphonia-codec-vorbis", symphonia-format-wav = { version = "0.4", path = "../symphonia-format-wav", optional = true } symphonia-format-ogg = { version = "0.4", path = "../symphonia-format-ogg", optional = true } symphonia-format-isomp4 = { version = "0.4", path = "../symphonia-format-isomp4", optional = true } +symphonia-format-mkv = { version = "0.4", path = "../symphonia-format-mkv", optional = true } # Show documentation with all features enabled on docs.rs [package.metadata.docs.rs] diff --git a/symphonia/src/lib.rs b/symphonia/src/lib.rs index 88f10e87..48156399 100644 --- a/symphonia/src/lib.rs +++ b/symphonia/src/lib.rs @@ -142,6 +142,8 @@ pub mod default { pub use symphonia_format_ogg::OggReader; #[cfg(feature = "wav")] pub use symphonia_format_wav::WavReader; + #[cfg(feature = "mkv")] + pub use symphonia_format_mkv::MkvReader; } use lazy_static::lazy_static; @@ -237,6 +239,9 @@ pub mod default { #[cfg(feature = "ogg")] probe.register_all::(); + #[cfg(feature = "mkv")] + probe.register_all::(); + // Metadata probe.register_all::(); }