Skip to content

Commit

Permalink
AudioContentBlockSpec Tune-up
Browse files Browse the repository at this point in the history
Summary:
This change improves AudioContentBlockSpec in the following ways:
- update the terminology to use "audio frame" as used in the audio industry, to designate a group of audio samples recorded at the same time, one sample by audio channel. With stereo audio, each audio frame has two audio samples, one per channel. We previously used "audio block", which is non-standard and easy to confuse with the audio content block.
- update constructors to require an AudioFormat, which allows use to...
- reorder parameters in AudioContentBlockSpec, so that all the parameters defining the format of the audio format comes first, followed by the frame spec (sample format, channel count, frame stride), then the sample frame rate, then the sample frame count, which is the most variable part of the definition, since in the same file you could easily have audio content blocks with different audio frame counts.

Reviewed By: perdoch

Differential Revision: D53338818

fbshipit-source-id: 6da17f9c3f91fee474b1404b78063bb273f2880b
  • Loading branch information
Georges Berenger authored and facebook-github-bot committed Feb 2, 2024
1 parent 3c7e7cc commit c9c8403
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 78 deletions.
2 changes: 1 addition & 1 deletion sample_apps/SampleRecordingApp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class AudioStream : public Recordable {
Record::Type::DATA,
kDataRecordFormatVersion,
// the following describe data records' format: a block of audio samples
ContentBlock(AudioSampleFormat::S16_LE, kNumChannels, kSampleRate),
ContentBlock(AudioFormat::PCM, AudioSampleFormat::S16_LE, kNumChannels, 0, kSampleRate),
{});
}
const Record* createConfigurationRecord() override {
Expand Down
26 changes: 14 additions & 12 deletions vrs/ContentBlockReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,25 +157,26 @@ bool AudioBlockReader::readAudioContentBlock(
if (remainingBlockSize > 0 && audioContent.getAudioFormat() == AudioFormat::PCM) {
// The sample count is undefined, but we can to do the math,
// using the remaining bytes in the record.
uint8_t sampleBlockStride = audioContent.getSampleBlockStride();
if (sampleBlockStride > 0 && (remainingBlockSize % sampleBlockStride) == 0) {
uint8_t sampleFrameStride = audioContent.getSampleFrameStride();
if (sampleFrameStride > 0 && (remainingBlockSize % sampleFrameStride) == 0) {
// update contentBlock with the actual sample count
return player.onAudioRead(
record,
blockIndex_,
ContentBlock(
AudioFormat::PCM,
audioContent.getSampleFormat(),
audioContent.getChannelCount(),
audioContent.getSampleFrameStride(),
audioContent.getSampleRate(),
static_cast<uint32_t>(remainingBlockSize / sampleBlockStride),
audioContent.getSampleBlockStride()));
static_cast<uint32_t>(remainingBlockSize / sampleFrameStride)));
}
}
return player.onAudioRead(
record, blockIndex_, ContentBlock(contentBlock, remainingBlockSize));
}
} else {
size_t expectedSize = sampleCount * audioContent.getSampleBlockStride();
size_t expectedSize = sampleCount * audioContent.getSampleFrameStride();
// if we have redundant size information, validate our values
if (remainingBlockSize == ContentBlock::kSizeUnknown || remainingBlockSize == expectedSize) {
return player.onAudioRead(record, blockIndex_, contentBlock);
Expand Down Expand Up @@ -250,21 +251,22 @@ bool AudioBlockReader::audioContentFromAudioSpec(
(audioSpec.channelCount.get(numChannels) && numChannels > 0) &&
(audioSpec.sampleRate.get(sampleRate) && sampleRate > 0)) {
// everything looks fine, check optional fields
uint8_t blockAlign = 0;
uint8_t sampleFrameStride = 0;
uint32_t sampleCount = 0;
uint32_t sampleSizeInBytes =
(AudioContentBlockSpec::getBitsPerSample(sampleFormat) >> 3) * numChannels;
// If blockAlign field is set, perform a sanity check based on the format. Assume that any
AudioContentBlockSpec::getBytesPerSample(sampleFormat) * numChannels;
// If sampleStride field is set, perform a sanity check based on the format. Assume that any
// meaningful alignment of a sample won't be longer than additional 2 bytes per channel e.g. if
// uint16_t samples is stored in uint32_t for some reason
if (audioSpec.sampleStride.get(blockAlign) &&
(blockAlign < sampleSizeInBytes || blockAlign > sampleSizeInBytes + numChannels * 2)) {
if (audioSpec.sampleStride.get(sampleFrameStride) &&
(sampleFrameStride < sampleSizeInBytes ||
sampleFrameStride > sampleSizeInBytes + numChannels * 2)) {
// has invalid block align
return false;
}
audioSpec.sampleCount.get(sampleCount);
audioContentBlock =
ContentBlock(sampleFormat, numChannels, sampleRate, sampleCount, blockAlign);
audioContentBlock = ContentBlock(
AudioFormat::PCM, sampleFormat, numChannels, sampleFrameStride, sampleRate, sampleCount);
return true;
}
return false;
Expand Down
82 changes: 48 additions & 34 deletions vrs/RecordFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#define DEFAULT_LOG_CHANNEL "RecordFormat"
#include <logging/Log.h>
#include <logging/Verify.h>

#include <vrs/DataLayout.h>
#include <vrs/helpers/EnumStringConverter.h>
Expand Down Expand Up @@ -665,19 +666,21 @@ AudioContentBlockSpec::AudioContentBlockSpec(AudioFormat audioFormat, uint8_t ch
}

AudioContentBlockSpec::AudioContentBlockSpec(
AudioFormat audioFormat,
AudioSampleFormat sampleFormat,
uint8_t channelCount,
uint32_t sampleRate,
uint32_t sampleCount,
uint8_t sampleBlockStride)
: audioFormat_{AudioFormat::PCM},
uint8_t sampleFrameStride,
uint32_t sampleFrameRate,
uint32_t sampleFrameCount)
: audioFormat_{audioFormat},
sampleFormat_{sampleFormat},
sampleBlockStride_{sampleBlockStride},
sampleFrameStride_{sampleFrameStride},
channelCount_{channelCount},
sampleRate_{sampleRate},
sampleCount_{sampleCount} {
assert(sampleFormat != AudioSampleFormat::UNDEFINED);
assert(sampleBlockStride_ == 0 || sampleBlockStride_ * 8 >= getBitsPerSample() * channelCount);
sampleFrameRate_{sampleFrameRate},
sampleFrameCount_{sampleFrameCount} {
XR_VERIFY(audioFormat != AudioFormat::UNDEFINED);
XR_VERIFY(sampleFormat != AudioSampleFormat::UNDEFINED);
XR_VERIFY(sampleFrameStride_ == 0 || sampleFrameStride_ * 8 >= getBitsPerSample() * channelCount);
}

AudioContentBlockSpec::AudioContentBlockSpec(const string& formatStr) {
Expand All @@ -689,9 +692,9 @@ void AudioContentBlockSpec::clear() {
audioFormat_ = AudioFormat::UNDEFINED;
sampleFormat_ = AudioSampleFormat::UNDEFINED;
channelCount_ = 0;
sampleBlockStride_ = 0;
sampleRate_ = 0;
sampleCount_ = 0;
sampleFrameStride_ = 0;
sampleFrameRate_ = 0;
sampleFrameCount_ = 0;
}

void AudioContentBlockSpec::set(ContentParser& parser) {
Expand Down Expand Up @@ -724,18 +727,18 @@ void AudioContentBlockSpec::set(ContentParser& parser) {
}
break;
case 'r':
if (sampleRate_ == 0 && sscanf(parser.str.c_str(), "rate=%u", &tmp) == 1) {
sampleRate_ = static_cast<uint32_t>(tmp);
if (sampleFrameRate_ == 0 && sscanf(parser.str.c_str(), "rate=%u", &tmp) == 1) {
sampleFrameRate_ = static_cast<uint32_t>(tmp);
continue;
}
break;
case 's':
if (sampleCount_ == 0 && sscanf(parser.str.c_str(), "samples=%u", &tmp) == 1) {
sampleCount_ = static_cast<uint32_t>(tmp);
if (sampleFrameCount_ == 0 && sscanf(parser.str.c_str(), "samples=%u", &tmp) == 1) {
sampleFrameCount_ = static_cast<uint32_t>(tmp);
continue;
} else if (
sampleBlockStride_ == 0 && sscanf(parser.str.c_str(), "stride=%u", &tmp) == 1) {
sampleBlockStride_ = static_cast<uint8_t>(tmp);
sampleFrameStride_ == 0 && sscanf(parser.str.c_str(), "stride=%u", &tmp) == 1) {
sampleFrameStride_ = static_cast<uint8_t>(tmp);
continue;
}
break;
Expand All @@ -758,21 +761,21 @@ string AudioContentBlockSpec::asString() const {
if (channelCount_ != 0) {
s.append("/channels=").append(to_string(channelCount_));
}
if (sampleRate_ != 0) {
s.append("/rate=").append(to_string(sampleRate_));
if (sampleFrameRate_ != 0) {
s.append("/rate=").append(to_string(sampleFrameRate_));
}
if (sampleCount_ != 0) {
s.append("/samples=").append(to_string(sampleCount_));
if (sampleFrameCount_ != 0) {
s.append("/samples=").append(to_string(sampleFrameCount_));
}
if (getSampleBlockStride() * 8 != getBitsPerSample() * channelCount_) {
s.append("/stride=").append(to_string(sampleBlockStride_));
if (getSampleFrameStride() * 8 != getBitsPerSample() * channelCount_) {
s.append("/stride=").append(to_string(sampleFrameStride_));
}
return s;
}

size_t AudioContentBlockSpec::getBlockSize() const {
if (sampleFormat_ != AudioSampleFormat::UNDEFINED && channelCount_ > 0 && sampleCount_ > 0) {
return getSampleBlockStride() * sampleCount_;
if (sampleFormat_ != AudioSampleFormat::UNDEFINED && channelCount_ > 0 && sampleFrameCount_ > 0) {
return getSampleFrameStride() * sampleFrameCount_;
}
return ContentBlock::kSizeUnknown;
}
Expand Down Expand Up @@ -860,9 +863,13 @@ uint8_t AudioContentBlockSpec::getBitsPerSample(AudioSampleFormat sampleFormat)
return 0; // required by some compilers
}

uint8_t AudioContentBlockSpec::getSampleBlockStride() const {
if (sampleBlockStride_ != 0) {
return sampleBlockStride_;
uint8_t AudioContentBlockSpec::getBytesPerSample(AudioSampleFormat sampleFormat) {
return (getBitsPerSample(sampleFormat) + 7) >> 3;
}

uint8_t AudioContentBlockSpec::getSampleFrameStride() const {
if (sampleFrameStride_ != 0) {
return sampleFrameStride_;
}
return getBytesPerSample() * channelCount_;
}
Expand Down Expand Up @@ -904,13 +911,20 @@ ContentBlock::ContentBlock(AudioFormat audioFormat, uint8_t channelCount)
: contentType_{ContentType::AUDIO}, audioSpec_{audioFormat, channelCount} {}

ContentBlock::ContentBlock(
AudioFormat audioFormat,
AudioSampleFormat sampleFormat,
uint8_t numChannels,
uint8_t sampleFrameStride,
uint32_t sampleRate,
uint32_t sampleCount,
uint8_t sampleBlockStride)
uint32_t sampleCount)
: contentType_(ContentType::AUDIO),
audioSpec_(sampleFormat, numChannels, sampleRate, sampleCount, sampleBlockStride) {}
audioSpec_(
audioFormat,
sampleFormat,
numChannels,
sampleFrameStride,
sampleRate,
sampleCount) {}

ContentBlock::ContentBlock(ContentType type, size_t size) : contentType_(type), size_(size) {
switch (contentType_) {
Expand Down Expand Up @@ -1001,12 +1015,12 @@ RecordFormat ContentBlock::operator+(const ContentBlock& other) const {
}

const ImageContentBlockSpec& ContentBlock::image() const {
assert(contentType_ == ContentType::IMAGE);
XR_VERIFY(contentType_ == ContentType::IMAGE);
return imageSpec_;
}

const AudioContentBlockSpec& ContentBlock::audio() const {
assert(contentType_ == ContentType::AUDIO);
XR_VERIFY(contentType_ == ContentType::AUDIO);
return audioSpec_;
}

Expand Down
47 changes: 26 additions & 21 deletions vrs/RecordFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,14 +342,13 @@ class AudioContentBlockSpec {
AudioContentBlockSpec(const AudioContentBlockSpec&) = default;
/// For audio formats with encoding (mp3, flac, etc).
explicit AudioContentBlockSpec(AudioFormat audioFormat, uint8_t channelCount = 0);
/// For PCM audio formats.
// NOLINTNEXTLINE(hicpp-explicit-conversions)
AudioContentBlockSpec(
AudioFormat audioFormat,
AudioSampleFormat sampleFormat,
uint8_t channelCount = 0,
uint32_t sampleRate = 0,
uint32_t sampleCount = 0,
uint8_t sampleBlockStride = 0);
uint8_t sampleFrameStride = 0,
uint32_t sampleFrameRate = 0,
uint32_t sampleFrameCount = 0);

/// Constructor used for factory construction.
/// @internal
Expand All @@ -375,7 +374,11 @@ class AudioContentBlockSpec {
bool operator==(const AudioContentBlockSpec& rhs) const {
auto tie = [](const AudioContentBlockSpec& s) {
return std::tie(
s.sampleFormat_, s.channelCount_, s.sampleBlockStride_, s.sampleCount_, s.sampleRate_);
s.sampleFormat_,
s.channelCount_,
s.sampleFrameStride_,
s.sampleFrameCount_,
s.sampleFrameRate_);
};
return tie(*this) == tie(rhs);
}
Expand All @@ -385,7 +388,7 @@ class AudioContentBlockSpec {
/// Tell if two audio block have identical audio formats.
bool isCompatibleWith(const AudioContentBlockSpec& rhs) const {
auto tie = [](const AudioContentBlockSpec& s) {
return std::tie(s.sampleFormat_, s.channelCount_, s.sampleRate_);
return std::tie(s.sampleFormat_, s.channelCount_, s.sampleFrameRate_);
};
return tie(*this) == tie(rhs);
}
Expand Down Expand Up @@ -415,19 +418,19 @@ class AudioContentBlockSpec {
uint8_t getBytesPerSample() const {
return (getBitsPerSample(sampleFormat_) + 7) / 8; // round up
}
/// Number of bytes used by group of synchronous audio samples, including padding
uint8_t getSampleBlockStride() const;
/// Get the number of audio channels in a sample block (not per content block).
/// Number of bytes used by a group of synchronous audio samples, including padding
uint8_t getSampleFrameStride() const;
/// Get the number of audio channels in each sample frame (not in the content block).
uint8_t getChannelCount() const {
return channelCount_;
}
/// Get the audio sample rate.
/// Get the audio frame sample rate.
uint32_t getSampleRate() const {
return sampleRate_;
return sampleFrameRate_;
}
/// Get the number of audio sample blocks in the block.
/// Get the number of audio sample frames in the content block.
uint32_t getSampleCount() const {
return sampleCount_;
return sampleFrameCount_;
}
/// Tell if the audio sample format is fully defined.
/// For instance, PCM audio data when we have enough details: sample format & channel count.
Expand All @@ -448,6 +451,8 @@ class AudioContentBlockSpec {
static bool isIEEEFloat(AudioSampleFormat sampleFormat);
/// Get the number of bits per audio sample for a specific audio sample format.
static uint8_t getBitsPerSample(AudioSampleFormat sampleFormat);
/// Get the number of bytes per audio sample for a specific audio sample format.
static uint8_t getBytesPerSample(AudioSampleFormat sampleFormat);
/// Get an audio sample format as a string.
static string getSampleFormatAsString(AudioSampleFormat sampleFormat) {
return toString(sampleFormat);
Expand All @@ -456,10 +461,10 @@ class AudioContentBlockSpec {
private:
AudioFormat audioFormat_{};
AudioSampleFormat sampleFormat_{};
uint8_t sampleBlockStride_{};
uint8_t sampleFrameStride_{};
uint8_t channelCount_{};
uint32_t sampleRate_{};
uint32_t sampleCount_{};
uint32_t sampleFrameRate_{};
uint32_t sampleFrameCount_{};
};

/// \brief Specification of a VRS record content block.
Expand Down Expand Up @@ -519,14 +524,14 @@ class ContentBlock {
// NOLINTNEXTLINE(hicpp-explicit-conversions)
ContentBlock(AudioFormat audioFormat, uint8_t channelCount = 0);

/// PCM audio block description.
// NOLINTNEXTLINE(hicpp-explicit-conversions)
/// Audio block description.
ContentBlock(
AudioFormat audioFormat,
AudioSampleFormat sampleFormat,
uint8_t numChannels = 0,
uint8_t sampleFrameStride = 0,
uint32_t sampleRate = 0,
uint32_t sampleCount = 0,
uint8_t sampleBlockStride = 0);
uint32_t sampleCount = 0);

/// Default copy constructor
ContentBlock(const ContentBlock&) = default;
Expand Down
Loading

0 comments on commit c9c8403

Please sign in to comment.