Files
ladybird/Libraries/LibMedia/FFmpeg/FFmpegAudioDecoder.cpp
Jonathan Gamble 0dea87110e LibMedia: Fix multi-channel decode for dumb containers (like WAV)
For web audio, I reckon an occasional misjudged channel layout is
better than more frequent exceptions.

Signed PCM is normalized with unsigned max divided by 2, not
signed max. If you divide by the signed max (32767), you get headroom
that can exceed the threshold below -1.0. It's not audible, this mostly
matters for tests that assume correct normalization. But it turns out
there's no shortage of "golden ears" jackholes out there who swear they
can hear the difference.
2026-02-13 17:57:19 -06:00

257 lines
11 KiB
C++

/*
* Copyright (c) 2025, Gregory Bertilson <gregory@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibCore/System.h>
#include <LibMedia/AudioBlock.h>
#include <LibMedia/FFmpeg/FFmpegHelpers.h>
#include "FFmpegAudioDecoder.h"
namespace Media::FFmpeg {
DecoderErrorOr<NonnullOwnPtr<FFmpegAudioDecoder>> FFmpegAudioDecoder::try_create(CodecID codec_id, Audio::SampleSpecification const& sample_specification, ReadonlyBytes codec_initialization_data)
{
AVCodecContext* codec_context = nullptr;
AVPacket* packet = nullptr;
AVFrame* frame = nullptr;
ArmedScopeGuard memory_guard {
[&] {
avcodec_free_context(&codec_context);
av_packet_free(&packet);
av_frame_free(&frame);
}
};
auto ff_codec_id = ffmpeg_codec_id_from_media_codec_id(codec_id);
auto const* codec = avcodec_find_decoder(ff_codec_id);
if (!codec)
return DecoderError::format(DecoderErrorCategory::NotImplemented, "Could not find FFmpeg decoder for codec {}", codec_id);
codec_context = avcodec_alloc_context3(codec);
if (!codec_context)
return DecoderError::format(DecoderErrorCategory::Memory, "Failed to allocate FFmpeg codec context for codec {}", codec_id);
codec_context->time_base = { 1, 1'000'000 };
codec_context->thread_count = static_cast<int>(min(Core::System::hardware_concurrency(), 4));
if (sample_specification.sample_rate() > NumericLimits<int>::max())
return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Sample rate is too large"sv);
codec_context->sample_rate = static_cast<int>(sample_specification.sample_rate());
if (sample_specification.channel_map().is_valid()) {
auto channel_layout_result = channel_map_to_av_channel_layout(sample_specification.channel_map());
if (channel_layout_result.is_error())
return DecoderError::format(DecoderErrorCategory::Invalid, channel_layout_result.error().string_literal());
codec_context->ch_layout = channel_layout_result.release_value();
}
if (!codec_initialization_data.is_empty()) {
if (codec_initialization_data.size() > NumericLimits<int>::max())
return DecoderError::corrupted("Codec initialization data is too large"sv);
codec_context->extradata = static_cast<u8*>(av_malloc(codec_initialization_data.size() + AV_INPUT_BUFFER_PADDING_SIZE));
if (!codec_context->extradata)
return DecoderError::with_description(DecoderErrorCategory::Memory, "Failed to allocate codec initialization data buffer for FFmpeg codec"sv);
memcpy(codec_context->extradata, codec_initialization_data.data(), codec_initialization_data.size());
codec_context->extradata_size = static_cast<int>(codec_initialization_data.size());
}
if (avcodec_open2(codec_context, codec, nullptr) < 0)
return DecoderError::format(DecoderErrorCategory::Unknown, "Unknown error occurred when opening FFmpeg codec {}", codec_id);
packet = av_packet_alloc();
if (!packet)
return DecoderError::with_description(DecoderErrorCategory::Memory, "Failed to allocate FFmpeg packet"sv);
frame = av_frame_alloc();
if (!frame)
return DecoderError::with_description(DecoderErrorCategory::Memory, "Failed to allocate FFmpeg frame"sv);
memory_guard.disarm();
return DECODER_TRY_ALLOC(try_make<FFmpegAudioDecoder>(codec_context, packet, frame));
}
FFmpegAudioDecoder::FFmpegAudioDecoder(AVCodecContext* codec_context, AVPacket* packet, AVFrame* frame)
: m_codec_context(codec_context)
, m_packet(packet)
, m_frame(frame)
{
}
FFmpegAudioDecoder::~FFmpegAudioDecoder()
{
av_packet_free(&m_packet);
av_frame_free(&m_frame);
avcodec_free_context(&m_codec_context);
}
DecoderErrorOr<void> FFmpegAudioDecoder::receive_coded_data(AK::Duration timestamp, ReadonlyBytes coded_data)
{
VERIFY(coded_data.size() < NumericLimits<int>::max());
m_packet->data = const_cast<u8*>(coded_data.data());
m_packet->size = static_cast<int>(coded_data.size());
m_packet->pts = timestamp.to_microseconds();
m_packet->dts = m_packet->pts;
auto result = avcodec_send_packet(m_codec_context, m_packet);
switch (result) {
case 0:
return {};
case AVERROR(EAGAIN):
return DecoderError::with_description(DecoderErrorCategory::NeedsMoreInput, "FFmpeg decoder cannot decode any more data until frames have been retrieved"sv);
case AVERROR_EOF:
return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "FFmpeg decoder has been flushed"sv);
case AVERROR(EINVAL):
return DecoderError::with_description(DecoderErrorCategory::Invalid, "FFmpeg codec has not been opened"sv);
case AVERROR(ENOMEM):
return DecoderError::with_description(DecoderErrorCategory::Memory, "FFmpeg codec ran out of internal memory"sv);
default:
return DecoderError::with_description(DecoderErrorCategory::Corrupted, "FFmpeg codec reports that the data is corrupted"sv);
}
}
void FFmpegAudioDecoder::signal_end_of_stream()
{
m_packet->data = nullptr;
m_packet->size = 0;
m_packet->pts = 0;
m_packet->dts = 0;
auto result = avcodec_send_packet(m_codec_context, m_packet);
VERIFY(result == 0 || result == AVERROR_EOF);
}
template<typename T>
static float float_sample_from_frame_data(u8** data, size_t plane, size_t index);
template<>
float float_sample_from_frame_data<u8>(u8** data, size_t plane, size_t index)
{
return static_cast<float>(data[plane][index] - 128) / 128;
}
template<typename T>
requires(IsSigned<T>)
static float float_sample_from_frame_data(u8** data, size_t plane, size_t index)
{
auto* pointer = reinterpret_cast<T*>(data[plane]);
constexpr float full_scale = NumericLimits<MakeUnsigned<T>>::max() / 2;
return static_cast<float>(pointer[index]) / static_cast<float>(full_scale);
}
template<typename T>
requires(IsFloatingPoint<T>)
static float float_sample_from_frame_data(u8** data, size_t plane, size_t index)
{
auto* pointer = reinterpret_cast<T*>(data[plane]);
return pointer[index];
}
DecoderErrorOr<void> FFmpegAudioDecoder::write_next_block(AudioBlock& block)
{
auto result = avcodec_receive_frame(m_codec_context, m_frame);
switch (result) {
case 0: {
if (m_frame->sample_rate <= 0)
return DecoderError::corrupted("FFmpeg decoder created a packet with an invalid sample rate"sv);
auto timestamp = AK::Duration::from_microseconds(m_frame->pts);
auto channel_map_result = av_channel_layout_to_channel_map(m_frame->ch_layout);
if (channel_map_result.is_error())
return DecoderError::with_description(DecoderErrorCategory::NotImplemented, channel_map_result.error().string_literal());
auto channel_map = channel_map_result.release_value();
auto sample_specification = Audio::SampleSpecification(m_frame->sample_rate, channel_map);
block.emplace(sample_specification, timestamp, [&](AudioBlock::Data& data) {
auto format = static_cast<AVSampleFormat>(m_frame->format);
auto is_planar = av_sample_fmt_is_planar(format) != 0;
auto planar_format = av_get_planar_sample_fmt(format);
VERIFY(m_frame->nb_samples >= 0);
auto sample_count = static_cast<size_t>(m_frame->nb_samples);
auto channel_count = static_cast<size_t>(m_frame->ch_layout.nb_channels);
auto count = sample_count * channel_count;
data = MUST(AudioBlock::Data::create(count));
auto sample_size = [&] {
switch (planar_format) {
case AV_SAMPLE_FMT_U8P:
return sizeof(u8);
case AV_SAMPLE_FMT_S16P:
return sizeof(i16);
case AV_SAMPLE_FMT_S32P:
return sizeof(i32);
case AV_SAMPLE_FMT_FLTP:
return sizeof(float);
case AV_SAMPLE_FMT_DBLP:
return sizeof(double);
case AV_SAMPLE_FMT_S64P:
return sizeof(i64);
default:
VERIFY_NOT_REACHED();
}
}();
VERIFY(m_frame->linesize[0] > 0);
if (is_planar)
VERIFY(static_cast<size_t>(m_frame->linesize[0]) >= sample_count * sample_size);
else
VERIFY(static_cast<size_t>(m_frame->linesize[0]) >= sample_count * channel_count * sample_size);
for (size_t i = 0; i < count; i++) {
size_t plane = 0;
size_t index_in_plane = i;
if (is_planar) {
plane = i % channel_count;
index_in_plane = i / channel_count;
}
auto float_sample = [&] {
switch (planar_format) {
case AV_SAMPLE_FMT_U8P:
return float_sample_from_frame_data<u8>(m_frame->extended_data, plane, index_in_plane);
case AV_SAMPLE_FMT_S16P:
return float_sample_from_frame_data<i16>(m_frame->extended_data, plane, index_in_plane);
case AV_SAMPLE_FMT_S32P:
return float_sample_from_frame_data<i32>(m_frame->extended_data, plane, index_in_plane);
case AV_SAMPLE_FMT_FLTP:
return float_sample_from_frame_data<float>(m_frame->extended_data, plane, index_in_plane);
case AV_SAMPLE_FMT_DBLP:
return float_sample_from_frame_data<double>(m_frame->extended_data, plane, index_in_plane);
case AV_SAMPLE_FMT_S64P:
return float_sample_from_frame_data<i64>(m_frame->extended_data, plane, index_in_plane);
default:
VERIFY_NOT_REACHED();
}
}();
data[i] = float_sample;
}
});
return {};
}
case AVERROR(EAGAIN):
return DecoderError::with_description(DecoderErrorCategory::NeedsMoreInput, "FFmpeg decoder has no frames available, send more input"sv);
case AVERROR_EOF:
return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "FFmpeg decoder has been flushed"sv);
case AVERROR(EINVAL):
return DecoderError::with_description(DecoderErrorCategory::Invalid, "FFmpeg codec has not been opened"sv);
default:
return DecoderError::format(DecoderErrorCategory::Unknown, "FFmpeg codec encountered an unexpected error retrieving frames with code {:x}", result);
}
}
void FFmpegAudioDecoder::flush()
{
avcodec_flush_buffers(m_codec_context);
}
}