From db953db10a7f250542ea7500ae4eb84b8e66e066 Mon Sep 17 00:00:00 2001 From: Dimiden Date: Wed, 13 Nov 2024 22:13:11 +0900 Subject: [PATCH] Refactored AAC parser --- .../base/info/decoder_configuration_record.h | 10 +- src/projects/base/ovlibrary/bit_reader.h | 201 ++++---- src/projects/base/ovlibrary/data.cpp | 10 + src/projects/base/ovlibrary/data.h | 6 +- .../mediarouter/mediarouter_stream.cpp | 6 +- .../modules/bitstream/aac/aac_adts.cpp | 45 +- src/projects/modules/bitstream/aac/aac_adts.h | 6 +- .../modules/bitstream/aac/aac_converter.cpp | 33 +- .../modules/bitstream/aac/aac_converter.h | 7 +- .../modules/bitstream/aac/aac_defines.cpp | 36 ++ .../modules/bitstream/aac/aac_defines.h | 103 ++++ .../bitstream/aac/audio_specific_config.cpp | 447 +++++++++++++++--- .../bitstream/aac/audio_specific_config.h | 112 ++--- .../h264_decoder_configuration_record.cpp | 4 +- .../h264/h264_decoder_configuration_record.h | 4 +- .../h265_decoder_configuration_record.cpp | 4 +- .../h265/h265_decoder_configuration_record.h | 4 +- .../bitstream/opus/opus_specific_config.h | 4 +- .../rtp_depacketizer_mpeg4_generic_audio.cpp | 2 +- .../modules/segment_writer/writer.cpp | 4 +- .../codec/decoder/decoder_avc_xma.cpp | 4 +- .../codec/decoder/decoder_hevc_xma.cpp | 4 +- 22 files changed, 726 insertions(+), 330 deletions(-) create mode 100644 src/projects/modules/bitstream/aac/aac_defines.cpp create mode 100644 src/projects/modules/bitstream/aac/aac_defines.h diff --git a/src/projects/base/info/decoder_configuration_record.h b/src/projects/base/info/decoder_configuration_record.h index 027a4bc05..8a929c344 100644 --- a/src/projects/base/info/decoder_configuration_record.h +++ b/src/projects/base/info/decoder_configuration_record.h @@ -5,7 +5,7 @@ class DecoderConfigurationRecord { public: - std::shared_ptr GetData() + std::shared_ptr GetData() { if (_updated) { @@ -16,7 +16,7 @@ class DecoderConfigurationRecord return _data; } - virtual bool Parse(const std::shared_ptr &data) = 0; + virtual bool Parse(const std::shared_ptr &data) = 0; virtual bool IsValid() const = 0; virtual bool Equals(const std::shared_ptr &other) = 0; @@ -24,10 +24,10 @@ class DecoderConfigurationRecord virtual ov::String GetCodecsParameter() const = 0; protected: - virtual std::shared_ptr Serialize() = 0; + virtual std::shared_ptr Serialize() = 0; // Set serialized data - void SetData(const std::shared_ptr &data) + void SetData(const std::shared_ptr &data) { _data = data; _updated = false; @@ -39,6 +39,6 @@ class DecoderConfigurationRecord } private: - std::shared_ptr _data = nullptr; + std::shared_ptr _data = nullptr; bool _updated = true; }; \ No newline at end of file diff --git a/src/projects/base/ovlibrary/bit_reader.h b/src/projects/base/ovlibrary/bit_reader.h index 7495d3228..382d73d02 100644 --- a/src/projects/base/ovlibrary/bit_reader.h +++ b/src/projects/base/ovlibrary/bit_reader.h @@ -1,30 +1,30 @@ #pragma once -#include +#include #include -#include + #include -#include +#include +#include #include "byte_io.h" class BitReader { - public: - BitReader(const uint8_t *buffer, size_t capacity) : - _buffer(buffer), - _position(buffer), - _capacity(capacity) - { - } - - template + BitReader(const uint8_t *buffer, size_t capacity) + : _buffer(buffer), + _position(buffer), + _capacity(capacity) + { + } + + template T ReadBytes(bool big_endian = true) { T value; bool result = ReadBytes(value, big_endian); - if(result == false) + if (result == false) { return 0; } @@ -39,7 +39,7 @@ class BitReader bool SkipBytes(size_t length) { - if (length > static_cast(_capacity - (_position - _buffer))) + if (length > static_cast(_capacity - (_position - _buffer))) { return false; } @@ -61,15 +61,15 @@ class BitReader } // Note: ReadBytes() API obtains the bits without considering _bit_offset - template - bool ReadBytes(T& value, bool big_endian = true) + template + bool ReadBytes(T &value, bool big_endian = true) { - if (sizeof(value) > static_cast(_capacity - (_position - _buffer))) + if (sizeof(value) > static_cast(_capacity - (_position - _buffer))) { return false; } - if(big_endian == true) + if (big_endian == true) { value = ByteReader::ReadBigEndian(_position); } @@ -83,66 +83,49 @@ class BitReader return true; } - template - T ReadBits(uint8_t bits) + template + T ReadBits(uint8_t bits) { T value; - bool result = ReadBits(bits, value); - if(result == false) + + if (ReadBits(bits, value)) { - return 0; + return value; } - return value; + return static_cast(0); } - template - bool ReadBits(uint8_t bits, T& value) - { - if (bits > sizeof(value) * 8) - { - OV_ASSERT2(false); - return false; - } - value = 0; - if (bits == 0) + template + bool ReadBits(uint8_t bits, T &value) + { + if constexpr (std::is_enum_v) { - return 0; - } + auto underlying_value = static_cast>(value); + if (ReadBitsInternal(bits, underlying_value)) + { + value = static_cast(underlying_value); + return true; + } - if (static_cast((bits + 7) / 8) > static_cast(_capacity - (_position - _buffer))) - { return false; } - - while (bits) - { - const uint8_t bits_from_this_byte = std::min(bits >= 8 ? 8 : bits % 8, 8 - _bit_offset); - const uint8_t mask_offset = 8 - bits_from_this_byte - _bit_offset; - const uint8_t mask = ((1 << bits_from_this_byte ) - 1) << mask_offset; - value <<= bits_from_this_byte; - value |= (*_position & mask) >> mask_offset; - bits -= bits_from_this_byte; - _bit_offset += bits_from_this_byte; - if (_bit_offset == 8) - { - NextPosition(); - _bit_offset = 0; - } - } - return true; - } + else + { + return ReadBitsInternal(bits, value); + } + } bool ReadBit(uint8_t &value) - { - return ReadBits(1, value); - } + { + return ReadBits(1, value); + } - bool ReadBoolBit() + bool ReadBoolBit() { bool value; bool result = ReadBit(value); - if(result == false) + if (result == false) { return false; } @@ -150,22 +133,22 @@ class BitReader return value; } - bool ReadBit(bool &value) - { - uint8_t bit; - if (ReadBit(bit)) - { - value = bit == 1 ? true : false; - return true; - } - return false; - } + bool ReadBit(bool &value) + { + uint8_t bit; + if (ReadBit(bit)) + { + value = bit == 1 ? true : false; + return true; + } + return false; + } uint8_t ReadBit() { uint8_t value; bool result = ReadBit(value); - if(result == false) + if (result == false) { return 0; } @@ -180,7 +163,7 @@ class BitReader size_t BytesSetionConsumed() { - if(_lap_position == nullptr) + if (_lap_position == nullptr) { return 0; } @@ -189,7 +172,7 @@ class BitReader return bytes; } - const uint8_t* CurrentPosition() + const uint8_t *CurrentPosition() { return _position; } @@ -204,26 +187,66 @@ class BitReader return (_capacity * 8) - BitsConsumed(); } - size_t BytesConsumed() const - { - return _position - _buffer; - } + size_t BytesConsumed() const + { + return _position - _buffer; + } - size_t BitsConsumed() const - { - return (BytesConsumed() * 8) + _bit_offset; - } + size_t BitsConsumed() const + { + return (BytesConsumed() * 8) + _bit_offset; + } protected: + template + bool ReadBitsInternal(uint8_t bits, T &value) + { + if (bits > sizeof(value) * 8) + { + OV_ASSERT2(false); + return false; + } + + value = 0; + + if (bits == 0) + { + return true; + } + + if (static_cast((bits + 7) / 8) > static_cast(_capacity - (_position - _buffer))) + { + return false; + } + + while (bits) + { + const uint8_t bits_from_this_byte = std::min(bits >= 8 ? 8 : bits % 8, 8 - _bit_offset); + const uint8_t mask_offset = 8 - bits_from_this_byte - _bit_offset; + const uint8_t mask = ((1 << bits_from_this_byte) - 1) << mask_offset; + value <<= bits_from_this_byte; + value |= (*_position & mask) >> mask_offset; + bits -= bits_from_this_byte; + _bit_offset += bits_from_this_byte; + if (_bit_offset == 8) + { + NextPosition(); + _bit_offset = 0; + } + } + + return true; + } + virtual void NextPosition() { - _position ++; + _position++; } - const uint8_t* _buffer; - const uint8_t* _position; - const uint8_t* _lap_position; - size_t _capacity; - int _bit_offset = 0; - uint8_t _mask = 0x80; + const uint8_t *_buffer; + const uint8_t *_position; + const uint8_t *_lap_position; + size_t _capacity; + int _bit_offset = 0; + uint8_t _mask = 0x80; }; diff --git a/src/projects/base/ovlibrary/data.cpp b/src/projects/base/ovlibrary/data.cpp index a3ff8855d..80a8fafe7 100644 --- a/src/projects/base/ovlibrary/data.cpp +++ b/src/projects/base/ovlibrary/data.cpp @@ -210,6 +210,16 @@ namespace ov return IsEqual(data->GetData(), data->GetLength()); } + bool Data::IsEqual(const std::shared_ptr &data) const + { + return IsEqual(data->GetData(), data->GetLength()); + } + + bool Data::IsEqual(const std::shared_ptr &data) const + { + return IsEqual(data->GetData(), data->GetLength()); + } + bool Data::IsEmpty() const { return (GetLength() == 0); diff --git a/src/projects/base/ovlibrary/data.h b/src/projects/base/ovlibrary/data.h index f7a56d692..9b1d6181e 100644 --- a/src/projects/base/ovlibrary/data.h +++ b/src/projects/base/ovlibrary/data.h @@ -193,10 +193,8 @@ namespace ov bool IsEqual(const void *data, size_t length) const; bool IsEqual(const Data &data) const; bool IsEqual(const Data *data) const; - bool IsEqual(const std::shared_ptr &data) const - { - return IsEqual(data->GetData(), data->GetLength()); - } + bool IsEqual(const std::shared_ptr &data) const; + bool IsEqual(const std::shared_ptr &data) const; bool IsEmpty() const; diff --git a/src/projects/mediarouter/mediarouter_stream.cpp b/src/projects/mediarouter/mediarouter_stream.cpp index 39b2a9269..782e165e5 100644 --- a/src/projects/mediarouter/mediarouter_stream.cpp +++ b/src/projects/mediarouter/mediarouter_stream.cpp @@ -549,7 +549,7 @@ bool MediaRouteStream::ProcessAACRawStream(std::shared_ptr &media_tr return false; } - media_track->SetSampleRate(audio_config->SamplerateNum()); + media_track->SetSampleRate(audio_config->Samplerate()); media_track->GetChannel().SetLayout(audio_config->Channel() == 1 ? AudioChannel::Layout::LayoutMono : AudioChannel::Layout::LayoutStereo); media_track->SetDecoderConfigurationRecord(audio_config); @@ -611,10 +611,10 @@ bool MediaRouteStream::ProcessAACAdtsStream(std::shared_ptr &media_t auto audio_config = std::make_shared(); audio_config->SetObjectType(adts.ObjectType()); - audio_config->SetSamplingFrequency(adts.Samplerate()); + audio_config->SetSamplingFrequencyIndex(adts.SamplingFrequencyIndex()); audio_config->SetChannel(adts.ChannelConfiguration()); - media_track->SetSampleRate(audio_config->SamplerateNum()); + media_track->SetSampleRate(audio_config->Samplerate()); media_track->GetChannel().SetLayout(audio_config->Channel() == 1 ? AudioChannel::Layout::LayoutMono : AudioChannel::Layout::LayoutStereo); media_track->SetDecoderConfigurationRecord(audio_config); diff --git a/src/projects/modules/bitstream/aac/aac_adts.cpp b/src/projects/modules/bitstream/aac/aac_adts.cpp index 92488821e..686421132 100755 --- a/src/projects/modules/bitstream/aac/aac_adts.cpp +++ b/src/projects/modules/bitstream/aac/aac_adts.cpp @@ -48,7 +48,7 @@ bool AACAdts::Parse(const uint8_t *data, size_t data_length, AACAdts &adts) adts._layer = parser.ReadBits(2); adts._protection_absent = parser.ReadBoolBit(); adts._profile = parser.ReadBits(2); - adts._sampling_frequency_index = parser.ReadBits(4); + adts._sampling_frequency_index = parser.ReadBits(4); adts._private_bit = parser.ReadBit(); adts._channel_configuration = parser.ReadBits(3); adts._original_copy = parser.ReadBoolBit(); @@ -155,47 +155,14 @@ ov::String AACAdts::ObjectTypeString() return "Unknown"; } -AacSamplingFrequencies AACAdts::Samplerate() +AacSamplingFrequencies AACAdts::SamplingFrequencyIndex() { - return static_cast(_sampling_frequency_index); + return _sampling_frequency_index; } -uint32_t AACAdts::SamplerateNum() +uint32_t AACAdts::Samplerate() { - switch (Samplerate()) - { - case RATES_96000HZ: - return 96000; - case RATES_88200HZ: - return 88200; - case RATES_64000HZ: - return 64000; - case RATES_48000HZ: - return 48000; - case RATES_44100HZ: - return 44100; - case RATES_32000HZ: - return 32000; - case RATES_24000HZ: - return 24000; - case RATES_22050HZ: - return 22050; - case RATES_16000HZ: - return 16000; - case RATES_12000HZ: - return 12000; - case RATES_11025HZ: - return 11025; - case RATES_8000HZ: - return 8000; - case RATES_7350HZ: - return 7350; - case RATES_RESERVED: - case EXPLICIT_RATE: - return 0; - } - - return 0; + return GetAacSamplingFrequency(_sampling_frequency_index); } uint8_t AACAdts::ChannelConfiguration() @@ -226,7 +193,7 @@ ov::String AACAdts::GetInfoString() out_str.AppendFormat("\tLayer(%d)\n", Layer()); out_str.AppendFormat("\tProtectionAbsent(%s)\n", ProtectionAbsent() ? "true" : "false"); out_str.AppendFormat("\tObjectType(%d/%s)\n", ObjectType(), ObjectTypeString().CStr()); - out_str.AppendFormat("\tSamplerate(%d/%d)\n", Samplerate(), SamplerateNum()); + out_str.AppendFormat("\tSamplerate(%d/%d)\n", SamplingFrequencyIndex(), Samplerate()); out_str.AppendFormat("\tChannelConfiguration(%d)\n", ChannelConfiguration()); out_str.AppendFormat("\tHome(%s)\n", Home() ? "true" : "false"); out_str.AppendFormat("\tAacFrameLength(%d)\n", AacFrameLength()); diff --git a/src/projects/modules/bitstream/aac/aac_adts.h b/src/projects/modules/bitstream/aac/aac_adts.h index 49f593bec..d7d92c685 100755 --- a/src/projects/modules/bitstream/aac/aac_adts.h +++ b/src/projects/modules/bitstream/aac/aac_adts.h @@ -20,8 +20,8 @@ class AACAdts bool ProtectionAbsent(); AudioObjectType ObjectType(); ov::String ObjectTypeString(); - AacSamplingFrequencies Samplerate(); - uint32_t SamplerateNum(); + AacSamplingFrequencies SamplingFrequencyIndex(); + uint32_t Samplerate(); uint8_t ChannelConfiguration(); bool Originality(); bool Home(); @@ -35,7 +35,7 @@ class AACAdts uint8_t _layer = 0; // 2 bits (always 0) bool _protection_absent; // 1 bit (1: no CRC | 0: CRC) uint8_t _profile; // 2 bits (AacObjectType - 1) - uint8_t _sampling_frequency_index; // 4 bits (15 is forbidden) + AacSamplingFrequencies _sampling_frequency_index; // 4 bits (15 is forbidden) uint8_t _private_bit; // 1 bit (never to be used by MPEG, set 0: encoding ignore when decoding) uint8_t _channel_configuration; // 3 bits (0 : sent via an inband PCE) bool _original_copy; // 1 bit (set 0: encoding, ignore when decoding) diff --git a/src/projects/modules/bitstream/aac/aac_converter.cpp b/src/projects/modules/bitstream/aac/aac_converter.cpp index 6d09511e5..b8b0ba0d4 100755 --- a/src/projects/modules/bitstream/aac/aac_converter.cpp +++ b/src/projects/modules/bitstream/aac/aac_converter.cpp @@ -9,7 +9,7 @@ #define OV_LOG_TAG "AACConverter" -std::shared_ptr AacConverter::MakeAdtsHeader(uint8_t aac_profile, uint8_t aac_sample_rate, uint8_t aac_channels, int16_t data_length) +std::shared_ptr AacConverter::MakeAdtsHeader(uint8_t aac_profile, uint8_t aac_sample_rate, uint8_t aac_channels, int16_t data_length, const std::shared_ptr &data) { uint8_t ADTS_HEADER_LENGTH = 7; int16_t aac_frame_length = data_length + 7; @@ -32,8 +32,12 @@ std::shared_ptr AacConverter::MakeAdtsHeader(uint8_t aac_profile, uint bits.WriteBits(11, 0x3F); // adts_buffer_fullness[11b] bits.WriteBits(2, 0); // no_raw_data_blocks_inframe[2b] - std::shared_ptr data = std::make_shared(bits.GetData(), bits.GetDataSize()); + if (data == nullptr) + { + return std::make_shared(bits.GetData(), bits.GetDataSize()); + } + data->Append(bits.GetData(), bits.GetDataSize()); return data; } @@ -73,19 +77,18 @@ std::shared_ptr AacConverter::MakeAdtsHeader(uint8_t aac_profile, uint 11 (3) (reserved) AAC LTP */ -// Raw audio data msut be 1 frame +// Raw audio data must be 1 frame std::shared_ptr AacConverter::ConvertRawToAdts(const uint8_t *data, size_t data_len, const AudioSpecificConfig &aac_config) { auto adts_data = std::make_shared(data_len + 16); - //Get the AudioSpecificConfig value from extradata; - uint8_t aac_profile = (uint8_t)aac_config.GetAacProfile(); - uint8_t aac_sample_rate = (uint8_t)aac_config.SamplingFrequency(); - uint8_t aac_channels = (uint8_t)aac_config.Channel(); - - auto adts_header = MakeAdtsHeader(aac_profile, aac_sample_rate, aac_channels, data_len); + MakeAdtsHeader( + static_cast(aac_config.GetAacProfile()), + static_cast(aac_config.ProbeAacSamplingFrequencyIndex()), + aac_config.Channel(), + data_len, + adts_data); - adts_data->Append(adts_header); adts_data->Append(data, data_len); return adts_data; @@ -93,17 +96,17 @@ std::shared_ptr AacConverter::ConvertRawToAdts(const uint8_t *data, si std::shared_ptr AacConverter::ConvertRawToAdts(const std::shared_ptr &data, const std::shared_ptr &aac_config) { - if(aac_config == nullptr) + if (aac_config == nullptr) { return nullptr; } - + return ConvertRawToAdts(data->GetDataAs(), data->GetLength(), *aac_config); } std::shared_ptr AacConverter::ConvertRawToAdts(const std::shared_ptr &data, const std::shared_ptr &aac_config_data) { - if(aac_config_data == nullptr) + if (aac_config_data == nullptr) { return nullptr; } @@ -211,12 +214,12 @@ std::shared_ptr AacConverter::ConvertAdtsToRaw(const std::shared_ptr &aac_config) { - if(aac_config == nullptr) + if (aac_config == nullptr) { return ""; } - return ov::String::FormatString("%d", static_cast(aac_config->ObjectType())); + return ov::String::FormatString("%d", static_cast(aac_config->ObjectType())); } ov::String AacConverter::GetProfileString(const std::shared_ptr &aac_config_data) diff --git a/src/projects/modules/bitstream/aac/aac_converter.h b/src/projects/modules/bitstream/aac/aac_converter.h index fac27fc28..92c11a078 100755 --- a/src/projects/modules/bitstream/aac/aac_converter.h +++ b/src/projects/modules/bitstream/aac/aac_converter.h @@ -1,10 +1,11 @@ #pragma once #include -#include "base/mediarouter/media_buffer.h" -#include "audio_specific_config.h" #include +#include "audio_specific_config.h" +#include "base/mediarouter/media_buffer.h" + // Default = AacObjectTypeAacLC #define AAC_CONVERTER_DEFAULT_PROFILE "2" @@ -19,5 +20,5 @@ class AacConverter static ov::String GetProfileString(const std::shared_ptr &aac_config); static ov::String GetProfileString(const std::shared_ptr &aac_config_data); - static std::shared_ptr MakeAdtsHeader(uint8_t aac_profile, uint8_t aac_sample_rate, uint8_t aac_channels, int16_t data_length); + static std::shared_ptr MakeAdtsHeader(uint8_t aac_profile, uint8_t aac_sample_rate, uint8_t aac_channels, int16_t data_length, const std::shared_ptr &data = nullptr); }; diff --git a/src/projects/modules/bitstream/aac/aac_defines.cpp b/src/projects/modules/bitstream/aac/aac_defines.cpp new file mode 100644 index 000000000..a7a3cbfd5 --- /dev/null +++ b/src/projects/modules/bitstream/aac/aac_defines.cpp @@ -0,0 +1,36 @@ +#include "aac_defines.h" + +#define _SAMPLE_RATE_CASE(rate, value) \ + case AacSamplingFrequencies::rate: \ + return value; + +uint32_t GetAacSamplingFrequency(AacSamplingFrequencies sampling_frequency_index) +{ + switch (sampling_frequency_index) + { + _SAMPLE_RATE_CASE(_96000, 96000); + _SAMPLE_RATE_CASE(_88200, 88200); + _SAMPLE_RATE_CASE(_64000, 64000); + _SAMPLE_RATE_CASE(_48000, 48000); + _SAMPLE_RATE_CASE(_44100, 44100); + _SAMPLE_RATE_CASE(_32000, 32000); + _SAMPLE_RATE_CASE(_24000, 24000); + _SAMPLE_RATE_CASE(_22050, 22050); + _SAMPLE_RATE_CASE(_16000, 16000); + _SAMPLE_RATE_CASE(_12000, 12000); + _SAMPLE_RATE_CASE(_11025, 11025); + _SAMPLE_RATE_CASE(_8000, 8000); + _SAMPLE_RATE_CASE(_7350, 7350); + case AacSamplingFrequencies::RESERVED1: + [[fallthrough]]; + case AacSamplingFrequencies::RESERVED2: + break; + + case AacSamplingFrequencies::ESCAPE_VALUE: + // ESCAPE_VALUE must be handled in the if statement above + OV_ASSERT2(false); + break; + } + + return 0; +} diff --git a/src/projects/modules/bitstream/aac/aac_defines.h b/src/projects/modules/bitstream/aac/aac_defines.h new file mode 100644 index 000000000..f84369cc9 --- /dev/null +++ b/src/projects/modules/bitstream/aac/aac_defines.h @@ -0,0 +1,103 @@ +#pragma once + +#include + +// Table 1.3 – Audio Profiles definition +// @see ISO/IEC 14496-3 (2009), Information technology - Coding of audio-visual objects - Part 3: Audio +enum class AudioObjectType : uint8_t +{ + Null = 0, // Null + AacMain = 1, // Main + AacLc = 2, // Low Complexity + AacSsr = 3, // Scalable Sample Rate + AacLtp = 4, // Long Term Predictor + Sbr = 5, // SBR Spectral Band Replication + AacScalable = 6, // AAC Scalable + Twinvq = 7, // Twin VQ Vector Quantizer + Celp = 8, // Code Excited Linear Prediction + Hvxc = 9, // Harmonic Vector eXcitation Coding + Reserved10 = 10, // (reserved) + Reserved11 = 11, // (reserved) + Ttsi = 12, // Text to Speech Interface + MainSynthetic = 13, // Main Synthetic + WavetableSynthesis = 14, // Wavetable Synthesis + GeneralMidi = 15, // General MIDI + AlgorithmicSynthesisAndAudioFx = 16, // Algorithmic Synthesis and Audio FX + ErAacLc = 17, // Error Resilient (ER) AAC Low Complexity (LC) + Reserved18 = 18, // (reserved) + ErAacLtp = 19, // Error Resilient (ER) AAC Long Term Predictor (LTP) + ErAacScalable = 20, // Error Resilient (ER) AAC scalable + ErTwinvq = 21, // Error Resilient (ER) TwinVQ + ErBsac = 22, // Error Resilient (ER) Bit Sliced Arithmetic Coding + ErAacLd = 23, // Error Resilient (ER) AAC Low Delay + ErCelp = 24, // Error Resilient (ER) Code Excited Linear Prediction + ErHvxc = 25, // Error Resilient (ER) Harmonic Vector eXcitation Coding + ErHiln = 26, // Error Resilient (ER) Harmonic and Individual Lines plus Noise + ErParametric = 27, // Error Resilient (ER) Parametric + Ssc = 28, // SinuSoidal Coding + Ps = 29, // Parametric Stereo + MpegSurround = 30, // MPEG Surround + Escape = 31, // (escape) + Layer1 = 32, // Layer-1 Audio + Layer2 = 33, // Layer-2 Audio + Layer3 = 34, // Layer-3 Audio + Dst = 35, // Direct Stream Transfer + Als = 36, // Audio Lossless Coding + Sls = 37, // Scalable Lossless Coding + SlsNonCore = 38, // Scalable Lossless Non-Core Audio + ErAacEld = 39, // Error Resilient (ER) AAC Enhanced Low Delay + SmrSimple = 40, // Symbolic Music Representation Simple + SmrMain = 41, // Symbolic Music Representation Main +}; + +enum class AacProfile : uint8_t +{ + Reserved = 3, + + // @see 7.1 Profiles, aac-iso-13818-7.pdf, page 40 + Main = 0, + LC = 1, + SSR = 2 +}; + +// Table 1.18 – Sampling Frequency Index +// +// | samplingFrequencyIndex | Value | +// +------------------------+--------------| +// | 0x0 | 96000 | +// | 0x1 | 88200 | +// | 0x2 | 64000 | +// | 0x3 | 48000 | +// | 0x4 | 44100 | +// | 0x5 | 32000 | +// | 0x6 | 24000 | +// | 0x7 | 22050 | +// | 0x8 | 16000 | +// | 0x9 | 12000 | +// | 0xa | 11025 | +// | 0xb | 8000 | +// | 0xc | 7350 | +// | 0xd | reserved | +// | 0xe | reserved | +// | 0xf | escape value | +// +enum class AacSamplingFrequencies : uint8_t +{ + _96000 = 0, + _88200 = 1, + _64000 = 2, + _48000 = 3, + _44100 = 4, + _32000 = 5, + _24000 = 6, + _22050 = 7, + _16000 = 8, + _12000 = 9, + _11025 = 10, + _8000 = 11, + _7350 = 12, + RESERVED1 = 13, + RESERVED2 = 14, + ESCAPE_VALUE = 15 +}; +uint32_t GetAacSamplingFrequency(AacSamplingFrequencies sampling_frequency_index); diff --git a/src/projects/modules/bitstream/aac/audio_specific_config.cpp b/src/projects/modules/bitstream/aac/audio_specific_config.cpp index 776c6bba5..89ba6aec1 100644 --- a/src/projects/modules/bitstream/aac/audio_specific_config.cpp +++ b/src/projects/modules/bitstream/aac/audio_specific_config.cpp @@ -7,16 +7,24 @@ #define OV_LOG_TAG "AACSpecificConfig" -bool AudioSpecificConfig::IsValid() const -{ - if (_object_type == AudioObjectType::Null || - _sampling_frequency_index == AacSamplingFrequencies::RATES_RESERVED || - _channel == 15) - { - return false; +#define _SET_PROBED_FREQUENCY_INDEX(frequency, max, min, index) \ + if ((max > frequency) && (frequency >= min)) \ + { \ + _probed_sampling_frequency_index = AacSamplingFrequencies::index; \ + break; \ } - return true; +#define _RETURN_IF_FAIL(expression) \ + if (expression == false) \ + { \ + return false; \ + } + +bool AudioSpecificConfig::IsValid() const +{ + return (_audio_object_type != AudioObjectType::Null) && + ((_sampling_frequency_index < AacSamplingFrequencies::RESERVED1) || (_probed_sampling_frequency_index < AacSamplingFrequencies::RESERVED1)) && + (_channel_configuration < 15); } ov::String AudioSpecificConfig::GetCodecsParameter() const @@ -30,10 +38,177 @@ ov::String AudioSpecificConfig::GetCodecsParameter() const // // OTN == profile_number // https://developer.mozilla.org/en-US/docs/Web/Media/Formats/codecs_parameter#MPEG-4_audio - return ov::String::FormatString("mp4a.40.%d", static_cast(_object_type)); + return ov::String::FormatString("mp4a.40.%d", static_cast(_audio_object_type)); +} + +bool AudioSpecificConfig::GetAudioObjectType(BitReader &reader, AudioObjectType &audio_object_type) const +{ + // Table 1.16 - Syntax of GetAudioObjectType() + // GetAudioObjectType() + // { + // audioObjectType; 5 uimsbf + // if (audioObjectType == 31) { + // audioObjectType = 32 + audioObjectTypeExt; 6 uimsbf + // } + // return audioObjectType; + // } + _RETURN_IF_FAIL(reader.ReadBits(5, audio_object_type)); + if (audio_object_type == AudioObjectType::Escape) + { + uint8_t audio_object_type_ext; + _RETURN_IF_FAIL(reader.ReadBits(6, audio_object_type_ext)); + + audio_object_type = static_cast(32 + audio_object_type_ext); + } + + return true; } -bool AudioSpecificConfig::Parse(const std::shared_ptr &data) +uint32_t AudioSpecificConfig::CalculateFrameLength(AudioObjectType audio_object_type, bool frame_length_flag) const +{ + // frameLengthFlag + // + // Length of the frame, number of spectral lines, respective. + // For all General Audio Object Types except AAC SSR and ER AAC LD: + // If set to "0" a 1024/128 lines IMDCT is used and frameLength is set to + // 1024, if set to "1" a 960/120 line IMDCT is used and frameLength is set + // to 960. + // For ER AAC LD: If set to "0" a 512 lines IMDCT is used and + // frameLength is set to 512, if set to "1" a 480 line IMDCT is used and + // frameLength is set to 480. + // For AAC SSR: Must be set to "0". A 256/32 lines IMDCT (first or second value) + // is distinguished by the value of window_sequence. + switch (audio_object_type) + { + // For all General Audio Object Types except AAC SSR and ER AAC LD + default: + return frame_length_flag ? 960 : 1024; + + // For ER AAC LD + case AudioObjectType::ErAacLd: + return frame_length_flag ? 480 : 512; + + // For AAC SSR + case AudioObjectType::AacSsr: + OV_ASSERT(frame_length_flag == 0, "AAC SSR must be set to 0"); + + // TODO: need to parse window_sequence +#ifdef DEBUG + logtw("AAC SSR is not supported yet"); +#endif // DEBUG + + return 256; + } +} + +bool AudioSpecificConfig::GASpecificConfig(BitReader &reader) +{ + // Table 4.1 - Syntax of GASpecificConfig() + // + // GASpecificConfig (samplingFrequencyIndex, + // channelConfiguration, + // audioObjectType) + // + // { + // frameLengthFlag; 1 bslbf + // dependsOnCoreCoder; 1 bslbf + // if (dependsOnCoreCoder) { + // coreCoderDelay; 14 uimsbf + // } + // extensionFlag; 1 bslbf + // if (! channelConfiguration) { + // program_config_element (); + // } + // if ((audioObjectType == 6) || (audioObjectType == 20)) { + // layerNr; 3 uimsbf + // } + // if (extensionFlag) { + // if (audioObjectType == 22) { + // numOfSubFrame; 5 bslbf + // layer_length; 11 bslbf + // } + // if (audioObjectType == 17 || audioObjectType == 19 || + // audioObjectType == 20 || audioObjectType == 23) { + // aacSectionDataResilienceFlag; 1 bslbf + // aacScalefactorDataResilienceFlag; 1 bslbf + // aacSpectralDataResilienceFlag; 1 bslbf + // } + // extensionFlag3; 1 bslbf + // if (extensionFlag3) { + // /* tbd in version 3 */ + // } + // } + // } + + bool frame_length_flag; + _RETURN_IF_FAIL(reader.ReadBit(frame_length_flag)); + + _frame_length = CalculateFrameLength(_audio_object_type, frame_length_flag); + + uint8_t depends_on_core_coder; + _RETURN_IF_FAIL(reader.ReadBits(1, depends_on_core_coder)); + + if (depends_on_core_coder) + { + [[maybe_unused]] uint16_t core_coder_delay; + _RETURN_IF_FAIL(reader.ReadBits(14, core_coder_delay)); + } + + uint8_t extension_flag; + _RETURN_IF_FAIL(reader.ReadBits(1, extension_flag)); + + if (_channel_configuration == 0) + { + // Not implemented + // program_config_element(); + } + + if ( + (_audio_object_type == AudioObjectType::AacScalable) || + (_audio_object_type == AudioObjectType::ErAacScalable)) + { + [[maybe_unused]] uint8_t layer_nr; + _RETURN_IF_FAIL(reader.ReadBits(3, layer_nr)); + } + + if (extension_flag) + { + if (_audio_object_type == AudioObjectType::ErBsac) + { + [[maybe_unused]] uint8_t num_of_sub_frame; + _RETURN_IF_FAIL(reader.ReadBits(5, num_of_sub_frame)); + + [[maybe_unused]] uint16_t layer_length; + _RETURN_IF_FAIL(reader.ReadBits(11, layer_length)); + } + else if ((_audio_object_type == AudioObjectType::ErAacLc) || + (_audio_object_type == AudioObjectType::ErAacLtp) || + (_audio_object_type == AudioObjectType::ErAacScalable) || + (_audio_object_type == AudioObjectType::ErAacLd)) + { + uint8_t aac_section_data_resilience_flag; + _RETURN_IF_FAIL(reader.ReadBits(1, aac_section_data_resilience_flag)); + + uint8_t aac_scalefactor_data_resilience_flag; + _RETURN_IF_FAIL(reader.ReadBits(1, aac_scalefactor_data_resilience_flag)); + + uint8_t aac_spectral_data_resilience_flag; + _RETURN_IF_FAIL(reader.ReadBits(1, aac_spectral_data_resilience_flag)); + } + + uint8_t extension_flag3; + _RETURN_IF_FAIL(reader.ReadBits(1, extension_flag3)); + + if (extension_flag3) + { + // /* tbd in version 3 */ + } + } + + return true; +} + +bool AudioSpecificConfig::Parse(const std::shared_ptr &data) { if (data->GetLength() < MIN_AAC_SPECIFIC_CONFIG_SIZE) { @@ -41,11 +216,163 @@ bool AudioSpecificConfig::Parse(const std::shared_ptr &data) return false; } - BitReader parser(data->GetDataAs(), data->GetLength()); + BitReader reader(data->GetDataAs(), data->GetLength()); + + // audioObjectType = GetAudioObjectType(); + if (GetAudioObjectType(reader, _audio_object_type) == false) + { + return false; + } + + // samplingFrequencyIndex; 4 bslbf + // if ( samplingFrequencyIndex == 0xf ) { + // samplingFrequency; 24 uimsbf + // } + _RETURN_IF_FAIL(reader.ReadBits(4, _sampling_frequency_index)); + + if (_sampling_frequency_index == AacSamplingFrequencies::ESCAPE_VALUE) + { + _RETURN_IF_FAIL(reader.ReadBits(24, _sampling_frequency)); + + do + { + // Table 4.82 - Sampling frequency mapping + // + // +-------------------------+-------------------------------------------+ + // | Frequency range (in Hz) | Use tables for sampling frequency (in Hz) | + // +-------------------------+-------------------------------------------+ + // | f >= 92017 | 96000 | + // | 92017 > f >= 75132 | 88200 | + // | 75132 > f >= 55426 | 64000 | + // | 55426 > f >= 46009 | 48000 | + // | 46009 > f >= 37566 | 44100 | + // | 37566 > f >= 27713 | 32000 | + // | 27713 > f >= 23004 | 24000 | + // | 23004 > f >= 18783 | 22050 | + // | 18783 > f >= 13856 | 16000 | + // | 13856 > f >= 11502 | 12000 | + // | 11502 > f >= 9391 | 11025 | + // | 9391 > f | 8000 | + // +-------------------------+-------------------------------------------+ + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, UINT32_MAX, 92017, _96000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 92016, 75132, _88200); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 75131, 55426, _64000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 55425, 46009, _48000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 46008, 37566, _44100); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 37565, 27713, _32000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 27712, 23004, _24000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 23003, 18783, _22050); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 18782, 13856, _16000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 13855, 11502, _12000); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 11501, 9391, _11025); + _SET_PROBED_FREQUENCY_INDEX(_sampling_frequency, 9390, 0, _8000); + } while (false); + } + else + { + SetSamplingFrequencyIndex(_sampling_frequency_index); + } + + // channelConfiguration; 4 bslbf + _RETURN_IF_FAIL(reader.ReadBits(4, _channel_configuration)); + // sbrPresentFlag = -1; + // psPresentFlag = -1; + // if ( audioObjectType == 5 || + // audioObjectType == 29 ) { + // extensionAudioObjectType = 5; + // sbrPresentFlag = 1; + // if ( audioObjectType == 29 ) { + // psPresentFlag = 1; + // } + // extensionSamplingFrequencyIndex; 4 uimsbf + // if ( extensionSamplingFrequencyIndex == 0xf ) + // extensionSamplingFrequency; 24 uimsbf + // audioObjectType = GetAudioObjectType(); + // if ( audioObjectType == 22 ) + // extensionChannelConfiguration; 4 uimsbf + // } + // else { + // extensionAudioObjectType = 0; + // } + // int32_t sbrPresentFlag = -1; + // int32_t psPresentFlag = -1; + // AudioObjectType extensionAudioObjectType; + + if ((_audio_object_type == AudioObjectType::Sbr) || (_audio_object_type == AudioObjectType::Ps)) + { + // extensionAudioObjectType = AudioObjectType::Sbr; + // sbrPresentFlag = 1; + // if (audioObjectType == AudioObjectType::Ps) { + // psPresentFlag = 1; + // } + + AacSamplingFrequencies extensionSamplingFrequencyIndex; + _RETURN_IF_FAIL(reader.ReadBits(4, extensionSamplingFrequencyIndex)); + if (extensionSamplingFrequencyIndex == AacSamplingFrequencies::ESCAPE_VALUE) + { + [[maybe_unused]] uint32_t extensionSamplingFrequency; + _RETURN_IF_FAIL(reader.ReadBits(24, _sampling_frequency)); + } + + _RETURN_IF_FAIL(GetAudioObjectType(reader, _audio_object_type)); + if (_audio_object_type == AudioObjectType::ErBsac) + { + [[maybe_unused]] uint8_t extensionChannelConfiguration; + _RETURN_IF_FAIL(reader.ReadBits(4, extensionChannelConfiguration)); + } + } + else + { + // extensionAudioObjectType = AudioObjectType::Null; + } + + // switch (audioObjectType) { + // case 1: + // case 2: + // case 3: + // case 4: + // case 6: + // case 7: + // case 17: + // case 19: + // case 20: + // case 21: + // case 22: + // case 23: + // + // GASpecificConfig(); + // break: + switch (_audio_object_type) + { + case AudioObjectType::AacMain: + [[fallthrough]]; + case AudioObjectType::AacLc: + [[fallthrough]]; + case AudioObjectType::AacSsr: + [[fallthrough]]; + case AudioObjectType::AacLtp: + [[fallthrough]]; + case AudioObjectType::AacScalable: + [[fallthrough]]; + case AudioObjectType::Twinvq: + [[fallthrough]]; + case AudioObjectType::ErAacLc: + [[fallthrough]]; + case AudioObjectType::ErAacLtp: + [[fallthrough]]; + case AudioObjectType::ErAacScalable: + [[fallthrough]]; + case AudioObjectType::ErTwinvq: + [[fallthrough]]; + case AudioObjectType::ErBsac: + [[fallthrough]]; + case AudioObjectType::ErAacLd: + GASpecificConfig(reader); + break; - _object_type = static_cast(parser.ReadBits(5)); - _sampling_frequency_index = static_cast(parser.ReadBits(4)); - _channel = parser.ReadBits(4); + default: + break; + } return true; } @@ -68,7 +395,7 @@ bool AudioSpecificConfig::Equals(const std::shared_ptrSamplingFrequency()) + if (SamplingFrequencyIndex() != other_config->SamplingFrequencyIndex()) { return false; } @@ -81,88 +408,74 @@ bool AudioSpecificConfig::Equals(const std::shared_ptr AudioSpecificConfig::Serialize() +std::shared_ptr AudioSpecificConfig::Serialize() { ov::BitWriter bits(2); - bits.WriteBits(5, ov::ToUnderlyingType(_object_type)); - bits.WriteBits(4, _sampling_frequency_index); - bits.WriteBits(4, _channel); + bits.WriteBits(5, ov::ToUnderlyingType(_audio_object_type)); + bits.WriteBits(4, ov::ToUnderlyingType(_sampling_frequency_index)); + bits.WriteBits(4, _channel_configuration); return std::make_shared(bits.GetData(), bits.GetDataSize()); } AudioObjectType AudioSpecificConfig::ObjectType() const { - return _object_type; + return _audio_object_type; } void AudioSpecificConfig::SetObjectType(AudioObjectType object_type) { - _object_type = object_type; -} - -uint32_t AudioSpecificConfig::SamplerateNum() const -{ - switch (SamplingFrequency()) - { - case RATES_96000HZ: - return 96000; - case RATES_88200HZ: - return 88200; - case RATES_64000HZ: - return 64000; - case RATES_48000HZ: - return 48000; - case RATES_44100HZ: - return 44100; - case RATES_32000HZ: - return 32000; - case RATES_24000HZ: - return 24000; - case RATES_22050HZ: - return 22050; - case RATES_16000HZ: - return 16000; - case RATES_12000HZ: - return 12000; - case RATES_11025HZ: - return 11025; - case RATES_8000HZ: - return 8000; - case RATES_7350HZ: - return 7350; - case RATES_RESERVED: - case EXPLICIT_RATE: - return 0; - } - - return 0; -} - -AacSamplingFrequencies AudioSpecificConfig::SamplingFrequency() const + _audio_object_type = object_type; +} + +uint32_t AudioSpecificConfig::Samplerate() const +{ + return _sampling_frequency; +} + +AacSamplingFrequencies AudioSpecificConfig::SamplingFrequencyIndex() const { return _sampling_frequency_index; } -void AudioSpecificConfig::SetSamplingFrequency(AacSamplingFrequencies sampling_frequency_index) +AacSamplingFrequencies AudioSpecificConfig::ProbeAacSamplingFrequencyIndex() const +{ + // If _sampling_frequency_index is valid, return it. + if (_sampling_frequency_index < AacSamplingFrequencies::RESERVED1) + { + return _sampling_frequency_index; + } + + // Return the probed frequency index + return _probed_sampling_frequency_index; +} + +void AudioSpecificConfig::SetSamplingFrequencyIndex(AacSamplingFrequencies sampling_frequency_index) { _sampling_frequency_index = sampling_frequency_index; + _probed_sampling_frequency_index = AacSamplingFrequencies::ESCAPE_VALUE; + _sampling_frequency = GetAacSamplingFrequency(_sampling_frequency_index); } uint8_t AudioSpecificConfig::Channel() const { - return _channel; + return _channel_configuration; } void AudioSpecificConfig::SetChannel(uint8_t channel) { - _channel = channel; + _channel_configuration = channel; +} + +uint32_t AudioSpecificConfig::FrameLength() const +{ + return _frame_length; } AacProfile AudioSpecificConfig::GetAacProfile() const { - switch (_object_type) + switch (_audio_object_type) { case AudioObjectType::AacMain: return AacProfile::Main; @@ -185,7 +498,7 @@ ov::String AudioSpecificConfig::GetInfoString() const ov::String out_str = ov::String::FormatString("\n[AudioSpecificConfig]\n"); out_str.AppendFormat("\tObjectType(%d)\n", ObjectType()); - out_str.AppendFormat("\tSamplingFrequency(%d)\n", SamplingFrequency()); + out_str.AppendFormat("\tSamplingFrequency(%d)\n", SamplingFrequencyIndex()); out_str.AppendFormat("\tChannel(%d)\n", Channel()); return out_str; diff --git a/src/projects/modules/bitstream/aac/audio_specific_config.h b/src/projects/modules/bitstream/aac/audio_specific_config.h index 0ccb830ff..4ce04fe6b 100644 --- a/src/projects/modules/bitstream/aac/audio_specific_config.h +++ b/src/projects/modules/bitstream/aac/audio_specific_config.h @@ -87,113 +87,53 @@ #include #include -// Table 1.3 – Audio Profiles definition -// @see ISO/IEC 14496-3 (2009), Information technology - Coding of audio-visual objects - Part 3: Audio -enum class AudioObjectType : uint8_t -{ - Null = 0, // Null - AacMain = 1, // Main - AacLc = 2, // Low Complexity - AacSsr = 3, // Scalable Sample Rate - AacLtp = 4, // Long Term Predictor - Sbr = 5, // SBR Spectral Band Replication - AacScalable = 6, // AAC Scalable - Twinvq = 7, // Twin VQ Vector Quantizer - Celp = 8, // Code Excited Linear Prediction - Hvxc = 9, // Harmonic Vector eXcitation Coding - Reserved10 = 10, // (reserved) - Reserved11 = 11, // (reserved) - Ttsi = 12, // Text to Speech Interface - MainSynthetic = 13, // Main Synthetic - WavetableSynthesis = 14, // Wavetable Synthesis - GeneralMidi = 15, // General MIDI - AlgorithmicSynthesisAndAudioFx = 16, // Algorithmic Synthesis and Audio FX - ErAacLc = 17, // Error Resilient (ER) AAC Low Complexity (LC) - Reserved18 = 18, // (reserved) - ErAacLtp = 19, // Error Resilient (ER) AAC Long Term Predictor (LTP) - ErAacScalable = 20, // Error Resilient (ER) AAC scalable - ErTwinvq = 21, // Error Resilient (ER) TwinVQ - ErBsac = 22, // Error Resilient (ER) Bit Sliced Arithmetic Coding - ErAacLd = 23, // Error Resilient (ER) AAC Low Delay - ErCelp = 24, // Error Resilient (ER) Code Excited Linear Prediction - ErHvxc = 25, // Error Resilient (ER) Harmonic Vector eXcitation Coding - ErHiln = 26, // Error Resilient (ER) Harmonic and Individual Lines plus Noise - ErParametric = 27, // Error Resilient (ER) Parametric - Ssc = 28, // SinuSoidal Coding - Ps = 29, // Parametric Stereo - MpegSurround = 30, // MPEG Surround - Escape = 31, // (escape) - Layer1 = 32, // Layer-1 Audio - Layer2 = 33, // Layer-2 Audio - Layer3 = 34, // Layer-3 Audio - Dst = 35, // Direct Stream Transfer - Als = 36, // Audio Lossless Coding - Sls = 37, // Scalable Lossless Coding - SlsNonCore = 38, // Scalable Lossless Non-Core Audio - ErAacEld = 39, // Error Resilient (ER) AAC Enhanced Low Delay - SmrSimple = 40, // Symbolic Music Representation Simple - SmrMain = 41, // Symbolic Music Representation Main -}; - -enum class AacProfile : uint8_t -{ - Reserved = 3, - - // @see 7.1 Profiles, aac-iso-13818-7.pdf, page 40 - Main = 0, - LC = 1, - SSR = 2 -}; - -enum AacSamplingFrequencies : uint8_t -{ - RATES_96000HZ = 0, - RATES_88200HZ = 1, - RATES_64000HZ = 2, - RATES_48000HZ = 3, - RATES_44100HZ = 4, - RATES_32000HZ = 5, - RATES_24000HZ = 6, - RATES_22050HZ = 7, - RATES_16000HZ = 8, - RATES_12000HZ = 9, - RATES_11025HZ = 10, - RATES_8000HZ = 11, - RATES_7350HZ = 12, - RATES_RESERVED = 13, - EXPLICIT_RATE = 15 -}; +#include "aac_defines.h" #define MIN_AAC_SPECIFIC_CONFIG_SIZE 2 +// Based on ISO14496-3 class AudioSpecificConfig : public DecoderConfigurationRecord { public: bool IsValid() const override; // Instance can be initialized by putting raw data in AudioSpecificConfig. - bool Parse(const std::shared_ptr &data) override; + bool Parse(const std::shared_ptr &data) override; bool Equals(const std::shared_ptr &other) override; - std::shared_ptr Serialize() override; + std::shared_ptr Serialize() override; AudioObjectType ObjectType() const; - AacSamplingFrequencies SamplingFrequency() const; - uint32_t SamplerateNum() const; + AacSamplingFrequencies SamplingFrequencyIndex() const; + AacSamplingFrequencies ProbeAacSamplingFrequencyIndex() const; + uint32_t Samplerate() const; uint8_t Channel() const; + uint32_t FrameLength() const; + AacProfile GetAacProfile() const; ov::String GetInfoString() const; void SetObjectType(AudioObjectType object_type); - void SetSamplingFrequency(AacSamplingFrequencies sampling_frequency_index); + void SetSamplingFrequencyIndex(AacSamplingFrequencies sampling_frequency_index); void SetChannel(uint8_t channel); // Helpers ov::String GetCodecsParameter() const; +protected: + // Table 1.16 - Syntax of GetAudioObjectType() + bool GetAudioObjectType(BitReader &reader, AudioObjectType &audio_object_type) const; + + uint32_t CalculateFrameLength(AudioObjectType audio_object_type, bool frame_length_flag) const; + + // Table 4.1 - Syntax of GASpecificConfig() + bool GASpecificConfig(BitReader &reader); + private: - AudioObjectType _object_type = AudioObjectType::Null; // 5 bits - AacSamplingFrequencies _sampling_frequency_index = AacSamplingFrequencies::RATES_RESERVED; // 4 bits + AudioObjectType _audio_object_type = AudioObjectType::Null; // 5 bits + AacSamplingFrequencies _sampling_frequency_index = AacSamplingFrequencies::ESCAPE_VALUE; // 4 bits + AacSamplingFrequencies _probed_sampling_frequency_index = AacSamplingFrequencies::ESCAPE_VALUE; + uint32_t _sampling_frequency = 0; // 24 bits // 0 : Defined in AOT Specifc Config // 1 : 1 channel: front-center @@ -202,5 +142,7 @@ class AudioSpecificConfig : public DecoderConfigurationRecord // 4 : 4 channels: front-center, front-left, front-right, back-center // ... // 8-15 : Reserved - uint8_t _channel = 15; // 4 bits -}; \ No newline at end of file + uint8_t _channel_configuration = 15; // 4 bits + + uint32_t _frame_length = 0; +}; diff --git a/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.cpp b/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.cpp index e54a562ba..d86eda235 100644 --- a/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.cpp +++ b/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.cpp @@ -22,7 +22,7 @@ ov::String AVCDecoderConfigurationRecord::GetCodecsParameter() const return ov::String::FormatString("avc1.%02x%02x%02x", ProfileIndication(), Compatibility(), LevelIndication()); } -bool AVCDecoderConfigurationRecord::Parse(const std::shared_ptr &data) +bool AVCDecoderConfigurationRecord::Parse(const std::shared_ptr &data) { if (data == nullptr) { @@ -165,7 +165,7 @@ bool AVCDecoderConfigurationRecord::Equals(const std::shared_ptr AVCDecoderConfigurationRecord::Serialize() +std::shared_ptr AVCDecoderConfigurationRecord::Serialize() { ov::BitWriter bits(512); diff --git a/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.h b/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.h index 7c34f50c9..1fa171b7b 100644 --- a/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.h +++ b/src/projects/modules/bitstream/h264/h264_decoder_configuration_record.h @@ -50,7 +50,7 @@ class AVCDecoderConfigurationRecord : public DecoderConfigurationRecord ov::String GetCodecsParameter() const override; // Instance can be initialized by putting raw data in AVCDecoderConfigurationRecord. - bool Parse(const std::shared_ptr &data) override; + bool Parse(const std::shared_ptr &data) override; bool Equals(const std::shared_ptr &other) override; // Instance can be initialized by putting SPS/PPS in AVCDecoderConfigurationRecord. @@ -58,7 +58,7 @@ class AVCDecoderConfigurationRecord : public DecoderConfigurationRecord bool AddPPS(const std::shared_ptr &pps); bool AddSPSExt(const std::shared_ptr &sps_ext); - std::shared_ptr Serialize() override; + std::shared_ptr Serialize() override; uint8_t Version() const; uint8_t ProfileIndication() const; diff --git a/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.cpp b/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.cpp index 9ac877547..d7fe9c2d2 100644 --- a/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.cpp +++ b/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.cpp @@ -101,7 +101,7 @@ ov::String HEVCDecoderConfigurationRecord::GetCodecsParameter() const return codecs_parameter; } -bool HEVCDecoderConfigurationRecord::Parse(const std::shared_ptr &data) +bool HEVCDecoderConfigurationRecord::Parse(const std::shared_ptr &data) { if (data == nullptr) { @@ -279,7 +279,7 @@ bool HEVCDecoderConfigurationRecord::Equals(const std::shared_ptr HEVCDecoderConfigurationRecord::Serialize() +std::shared_ptr HEVCDecoderConfigurationRecord::Serialize() { if (IsValid() == false) { diff --git a/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.h b/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.h index c625be8a5..1c72899d1 100644 --- a/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.h +++ b/src/projects/modules/bitstream/h265/h265_decoder_configuration_record.h @@ -50,10 +50,10 @@ class HEVCDecoderConfigurationRecord : public DecoderConfigurationRecord public: bool IsValid() const override; ov::String GetCodecsParameter() const override; - bool Parse(const std::shared_ptr &data) override; + bool Parse(const std::shared_ptr &data) override; bool Equals(const std::shared_ptr &other) override; - std::shared_ptr Serialize() override; + std::shared_ptr Serialize() override; void AddNalUnit(H265NALUnitType nal_type, const std::shared_ptr &nal_unit); // SPS, PPS, VPS, etc. diff --git a/src/projects/modules/bitstream/opus/opus_specific_config.h b/src/projects/modules/bitstream/opus/opus_specific_config.h index d94c9f36a..9fa30d1a1 100644 --- a/src/projects/modules/bitstream/opus/opus_specific_config.h +++ b/src/projects/modules/bitstream/opus/opus_specific_config.h @@ -55,7 +55,7 @@ class OpusSpecificConfig : public DecoderConfigurationRecord return (_header == _header_const) && (_version == 1) && (_channels > 0) && (_sample_rate == 48000); } - bool Parse(const std::shared_ptr &data) override + bool Parse(const std::shared_ptr &data) override { if (data->GetLength() < MIN_OPUS_SPECIFIC_CONFIG_SIZE) { @@ -81,7 +81,7 @@ class OpusSpecificConfig : public DecoderConfigurationRecord return other->GetData()->IsEqual(GetData()); } - std::shared_ptr Serialize() override + std::shared_ptr Serialize() override { ov::BitWriter bits(MIN_OPUS_SPECIFIC_CONFIG_SIZE); diff --git a/src/projects/modules/rtp_rtcp/rtp_depacketizer_mpeg4_generic_audio.cpp b/src/projects/modules/rtp_rtcp/rtp_depacketizer_mpeg4_generic_audio.cpp index 6f62e6ea0..b808be4d9 100644 --- a/src/projects/modules/rtp_rtcp/rtp_depacketizer_mpeg4_generic_audio.cpp +++ b/src/projects/modules/rtp_rtcp/rtp_depacketizer_mpeg4_generic_audio.cpp @@ -55,7 +55,7 @@ std::shared_ptr RtpDepacketizerMpeg4GenericAudio::ParseAndAssembleFram //Get the AACSecificConfig value from extradata; uint8_t aac_profile = static_cast(_aac_config.GetAacProfile()); - uint8_t aac_sample_rate = static_cast(_aac_config.SamplingFrequency()); + uint8_t aac_sample_rate = static_cast(_aac_config.SamplingFrequencyIndex()); uint8_t aac_channels = static_cast(_aac_config.Channel()); bitstream->Append(AacConverter::MakeAdtsHeader(aac_profile, aac_sample_rate, aac_channels, raw_aac_data_length)); diff --git a/src/projects/modules/segment_writer/writer.cpp b/src/projects/modules/segment_writer/writer.cpp index 1fb8fd94d..c26ec6085 100644 --- a/src/projects/modules/segment_writer/writer.cpp +++ b/src/projects/modules/segment_writer/writer.cpp @@ -223,7 +223,7 @@ bool Writer::FillCodecParameters(const std::shared_ptr &track, AVCo codec_parameters->height = media_track->GetHeight(); codec_parameters->format = media_track->GetColorspace(); - std::shared_ptr extra_data = nullptr; + std::shared_ptr extra_data = nullptr; if (media_track->GetCodecId() == cmn::MediaCodecId::H265) { codec_parameters->codec_tag = MKTAG('h', 'v', 'c', '1'); @@ -263,7 +263,7 @@ bool Writer::FillCodecParameters(const std::shared_ptr &track, AVCo codec_parameters->format = static_cast(media_track->GetSample().GetFormat()); codec_parameters->codec_tag = 0; - std::shared_ptr extra_data = nullptr; + std::shared_ptr extra_data = nullptr; if (media_track->GetCodecId() == cmn::MediaCodecId::Aac) { codec_parameters->codec_tag = MKTAG('a', 'a', 'c', 'p'); diff --git a/src/projects/transcoder/codec/decoder/decoder_avc_xma.cpp b/src/projects/transcoder/codec/decoder/decoder_avc_xma.cpp index 5a0f7087f..1d6372d64 100644 --- a/src/projects/transcoder/codec/decoder/decoder_avc_xma.cpp +++ b/src/projects/transcoder/codec/decoder/decoder_avc_xma.cpp @@ -46,14 +46,14 @@ bool DecoderAVCxXMA::InitCodec() } // Set the SPS/PPS to extradata - std::shared_ptr extra_data = nullptr; + std::shared_ptr extra_data = nullptr; extra_data = decoder_config != nullptr ? decoder_config->GetData() : nullptr; if (extra_data != nullptr) { _context->extradata_size = extra_data->GetLength(); _context->extradata = (uint8_t *)::av_malloc(_context->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); ::memset(_context->extradata, 0, _context->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); - ::memcpy(_context->extradata, reinterpret_cast(extra_data->GetData()), _context->extradata_size); + ::memcpy(_context->extradata, extra_data->GetData(), _context->extradata_size); } ::av_opt_set_int(_context->priv_data, "lxlnx_hwdev", _track->GetCodecDeviceId(), 0); diff --git a/src/projects/transcoder/codec/decoder/decoder_hevc_xma.cpp b/src/projects/transcoder/codec/decoder/decoder_hevc_xma.cpp index fe714e579..0f2de2846 100644 --- a/src/projects/transcoder/codec/decoder/decoder_hevc_xma.cpp +++ b/src/projects/transcoder/codec/decoder/decoder_hevc_xma.cpp @@ -47,14 +47,14 @@ bool DecoderHEVCxXMA::InitCodec() } // Set the SPS/PPS to extradata - std::shared_ptr extra_data = nullptr; + std::shared_ptr extra_data = nullptr; extra_data = decoder_config != nullptr ? decoder_config->GetData() : nullptr; if (extra_data != nullptr) { _context->extradata_size = extra_data->GetLength(); _context->extradata = (uint8_t *)::av_malloc(_context->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); ::memset(_context->extradata, 0, _context->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); - ::memcpy(_context->extradata, reinterpret_cast(extra_data->GetData()), _context->extradata_size); + ::memcpy(_context->extradata, extra_data->GetData(), _context->extradata_size); } ::av_opt_set_int(_context->priv_data, "lxlnx_hwdev", _track->GetCodecDeviceId(), 0);