LibMedia: Implement audio conversion in AudioDataProvider

This commit is contained in:
Zaggy1024
2025-12-10 19:22:05 -06:00
committed by Alexander Kalenik
parent c187315a20
commit 65c0be66e4
Notes: github-actions[bot] 2025-12-13 07:59:19 +00:00
14 changed files with 404 additions and 7 deletions

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2025, Gregory Bertilson <gregory@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Span.h>
#include <LibMedia/Audio/SampleSpecification.h>
#include <LibMedia/AudioBlock.h>
namespace Audio {
class AudioConverter {
public:
virtual ErrorOr<void> set_output_sample_specification(SampleSpecification) = 0;
virtual ErrorOr<void> convert(Media::AudioBlock& input) = 0;
virtual ~AudioConverter() = default;
};
}

View File

@@ -8,6 +8,7 @@
namespace Audio {
class AudioConverter;
class Loader;
class PlaybackStream;
struct Sample;

View File

@@ -25,6 +25,7 @@ target_link_libraries(LibMedia PRIVATE LibCore LibCrypto LibIPC LibGfx LibThread
target_sources(LibMedia PRIVATE
Audio/FFmpegLoader.cpp
FFmpeg/FFmpegAudioConverter.cpp
FFmpeg/FFmpegAudioDecoder.cpp
FFmpeg/FFmpegDemuxer.cpp
FFmpeg/FFmpegHelpers.cpp
@@ -33,7 +34,7 @@ target_sources(LibMedia PRIVATE
)
if (NOT ANDROID)
target_link_libraries(LibMedia PRIVATE PkgConfig::AVCODEC PkgConfig::AVFORMAT PkgConfig::AVUTIL)
target_link_libraries(LibMedia PRIVATE PkgConfig::AVCODEC PkgConfig::AVFORMAT PkgConfig::AVUTIL PkgConfig::LIBSWRESAMPLE)
else()
target_include_directories(LibMedia PRIVATE ${FFMPEG_INCLUDE_DIRS})
target_link_directories(LibMedia PRIVATE ${FFMPEG_LIBRARY_DIRS})

View File

@@ -0,0 +1,154 @@
/*
* Copyright (c) 2025, Gregory Bertilson <gregory@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/NonnullOwnPtr.h>
#include <AK/ScopeGuard.h>
#include <LibMedia/Audio/SampleSpecification.h>
#include <LibMedia/FFmpeg/FFmpegAudioConverter.h>
#include <LibMedia/FFmpeg/FFmpegHelpers.h>
extern "C" {
#include <libswresample/swresample.h>
}
namespace Media::FFmpeg {
FFmpegAudioConverter::FFmpegAudioConverter() = default;
ErrorOr<NonnullOwnPtr<FFmpegAudioConverter>> FFmpegAudioConverter::try_create()
{
return adopt_nonnull_own_or_enomem(new (nothrow) FFmpegAudioConverter());
}
ErrorOr<void> FFmpegAudioConverter::set_output_sample_specification(Audio::SampleSpecification specification)
{
return set_sample_specifications(m_input_sample_specification, specification);
}
ErrorOr<void> FFmpegAudioConverter::set_input_sample_specification(Audio::SampleSpecification specification)
{
return set_sample_specifications(specification, m_output_sample_specification);
}
ErrorOr<void> FFmpegAudioConverter::set_sample_specifications(Audio::SampleSpecification input, Audio::SampleSpecification output)
{
if (m_input_sample_specification == input && m_output_sample_specification == output)
return {};
ArmedScopeGuard free_context = { [&] {
swr_free(&m_context);
VERIFY(m_context == nullptr);
} };
m_input_sample_specification = input;
m_output_sample_specification = output;
if (!m_input_sample_specification.is_valid() || !m_output_sample_specification.is_valid())
return {};
if (m_input_sample_specification == m_output_sample_specification)
return {};
if (input.sample_rate() > NumericLimits<int>::max())
return Error::from_string_literal("Input sample rate is too high");
if (output.sample_rate() > NumericLimits<int>::max())
return Error::from_string_literal("Output sample rate is too high");
auto input_channel_layout = TRY(channel_map_to_av_channel_layout(input.channel_map()));
auto input_sample_rate = static_cast<int>(input.sample_rate());
auto output_channel_layout = TRY(channel_map_to_av_channel_layout(output.channel_map()));
auto output_sample_rate = static_cast<int>(output.sample_rate());
auto allocation_result = swr_alloc_set_opts2(&m_context,
&output_channel_layout, AVSampleFormat::AV_SAMPLE_FMT_FLT, output_sample_rate,
&input_channel_layout, AVSampleFormat::AV_SAMPLE_FMT_FLT, input_sample_rate,
0, nullptr);
if (allocation_result < 0)
return Error::from_string_view(av_error_code_to_string(allocation_result));
auto init_result = swr_init(m_context);
if (init_result < 0)
return Error::from_string_view(av_error_code_to_string(allocation_result));
free_context.disarm();
return {};
}
void FFmpegAudioConverter::free_output_buffer()
{
if (m_output_buffer == nullptr) {
VERIFY(m_output_buffer_sample_count == 0);
return;
}
av_freep(static_cast<void*>(&m_output_buffer));
VERIFY(m_output_buffer == nullptr);
m_output_buffer_sample_count = 0;
}
ErrorOr<int> FFmpegAudioConverter::get_maximum_output_samples(size_t input_size) const
{
Checked<size_t> result = input_size;
result /= m_input_sample_specification.channel_count();
auto delay = swr_get_delay(m_context, m_input_sample_specification.sample_rate());
VERIFY(delay >= 0);
result += delay;
if (result.has_overflow() || result.value_unchecked() > NumericLimits<int>::max())
return Error::from_string_literal("Input is too large");
auto rescaled = av_rescale_rnd(static_cast<i64>(result.value_unchecked()), m_output_sample_specification.sample_rate(), m_input_sample_specification.sample_rate(), AV_ROUND_UP);
VERIFY(rescaled > 0);
if (rescaled > NumericLimits<int>::max())
return Error::from_string_literal("Input is too large");
return static_cast<int>(rescaled);
}
ErrorOr<void> FFmpegAudioConverter::convert(AudioBlock& input)
{
TRY(set_input_sample_specification(input.sample_specification()));
if (m_context == nullptr)
return {};
VERIFY(m_input_sample_specification.is_valid());
VERIFY(m_output_sample_specification.is_valid());
auto input_data = input.data().span();
auto output_channel_count = m_output_sample_specification.channel_count();
auto output_sample_count = TRY(get_maximum_output_samples(input_data.size()));
if (output_sample_count > m_output_buffer_sample_count) {
free_output_buffer();
auto alloc_samples_result = av_samples_alloc(&m_output_buffer, nullptr, output_channel_count, output_sample_count, AVSampleFormat::AV_SAMPLE_FMT_FLT, 0);
if (alloc_samples_result < 0)
return Error::from_string_view(av_error_code_to_string(alloc_samples_result));
VERIFY(m_output_buffer != nullptr);
m_output_buffer_sample_count = output_sample_count;
}
auto const* input_buffer = input_data.reinterpret<u8 const>().data();
// The input buffer size should already be safe to cast to int here.
auto input_count = static_cast<int>(input_data.size() / m_input_sample_specification.channel_count());
VERIFY(input_count >= 0);
auto converted_samples_result = swr_convert(m_context, &m_output_buffer, m_output_buffer_sample_count, &input_buffer, input_count);
if (converted_samples_result < 0)
return Error::from_string_view(av_error_code_to_string(converted_samples_result));
VERIFY(converted_samples_result <= m_output_buffer_sample_count);
auto converted_samples = static_cast<size_t>(converted_samples_result);
input.emplace(m_output_sample_specification, input.timestamp(), [&](FixedArray<float>& data) {
data = MUST(AudioBlock::Data::create(converted_samples * output_channel_count));
AK::TypedTransfer<float>::copy(data.data(), reinterpret_cast<float*>(m_output_buffer), data.size());
});
return {};
}
FFmpegAudioConverter::~FFmpegAudioConverter()
{
swr_free(&m_context);
}
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2025, Gregory Bertilson <gregory@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibMedia/Audio/AudioConverter.h>
#include <LibMedia/Audio/SampleSpecification.h>
#include <LibMedia/Export.h>
#include <LibMedia/FFmpeg/FFmpegForward.h>
namespace Media::FFmpeg {
class MEDIA_API FFmpegAudioConverter final : public Audio::AudioConverter {
AK_MAKE_NONCOPYABLE(FFmpegAudioConverter);
AK_MAKE_NONMOVABLE(FFmpegAudioConverter);
public:
static ErrorOr<NonnullOwnPtr<FFmpegAudioConverter>> try_create();
virtual ErrorOr<void> set_output_sample_specification(Audio::SampleSpecification) override;
virtual ErrorOr<void> convert(AudioBlock& input) override;
virtual ~FFmpegAudioConverter() override;
private:
FFmpegAudioConverter();
ErrorOr<void> set_input_sample_specification(Audio::SampleSpecification);
ErrorOr<void> set_sample_specifications(Audio::SampleSpecification input, Audio::SampleSpecification output);
void free_output_buffer();
ErrorOr<int> get_maximum_output_samples(size_t input_size) const;
Audio::SampleSpecification m_input_sample_specification;
Audio::SampleSpecification m_output_sample_specification;
SwrContext* m_context { nullptr };
u8* m_output_buffer { nullptr };
int m_output_buffer_sample_count { 0 };
};
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Gregory Bertilson <zaggy1024@gmail.com>
* Copyright (c) 2024-2025, Gregory Bertilson <gregory@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@@ -12,4 +12,5 @@ struct AVFormatContext;
struct AVIOContext;
struct AVPacket;
struct AVFrame;
struct SwrContext;
}

View File

@@ -128,4 +128,144 @@ static inline TrackType track_type_from_ffmpeg_media_type(AVMediaType media_type
ErrorOr<Audio::ChannelMap> av_channel_layout_to_channel_map(AVChannelLayout const&);
ErrorOr<AVChannelLayout> channel_map_to_av_channel_layout(Audio::ChannelMap const&);
constexpr StringView av_error_code_to_string(int error)
{
switch (error) {
case AVERROR_BSF_NOT_FOUND:
return "Bitstream filter not found"sv;
case AVERROR_BUG:
case AVERROR_BUG2:
return "Internal bug, should not have happened"sv;
case AVERROR_BUFFER_TOO_SMALL:
return "Buffer too small"sv;
case AVERROR_DECODER_NOT_FOUND:
return "Decoder not found"sv;
case AVERROR_DEMUXER_NOT_FOUND:
return "Demuxer not found"sv;
case AVERROR_ENCODER_NOT_FOUND:
return "Encoder not found"sv;
case AVERROR_EOF:
return "End of file"sv;
case AVERROR_EXIT:
return "Immediate exit requested"sv;
case AVERROR_EXTERNAL:
return "Generic error in an external library"sv;
case AVERROR_FILTER_NOT_FOUND:
return "Filter not found"sv;
case AVERROR_INPUT_CHANGED:
return "Input changed"sv;
case AVERROR_INVALIDDATA:
return "Invalid data found when processing input"sv;
case AVERROR_MUXER_NOT_FOUND:
return "Muxer not found"sv;
case AVERROR_OPTION_NOT_FOUND:
return "Option not found"sv;
case AVERROR_OUTPUT_CHANGED:
return "Output changed"sv;
case AVERROR_PATCHWELCOME:
return "Not yet implemented in FFmpeg, patches welcome"sv;
case AVERROR_PROTOCOL_NOT_FOUND:
return "Protocol not found"sv;
case AVERROR_STREAM_NOT_FOUND:
return "Stream not found"sv;
case AVERROR_UNKNOWN:
return "Unknown error occurred"sv;
case AVERROR_EXPERIMENTAL:
return "Experimental feature"sv;
case AVERROR_HTTP_BAD_REQUEST:
return "Server returned 400 Bad Request"sv;
case AVERROR_HTTP_UNAUTHORIZED:
return "Server returned 401 Unauthorized (authorization failed)"sv;
case AVERROR_HTTP_FORBIDDEN:
return "Server returned 403 Forbidden (access denied)"sv;
case AVERROR_HTTP_NOT_FOUND:
return "Server returned 404 Not Found"sv;
case AVERROR_HTTP_TOO_MANY_REQUESTS:
return "Server returned 429 Too Many Requests"sv;
case AVERROR_HTTP_OTHER_4XX:
return "Server returned 4XX Client Error, but not one of 40{0,1,3,4}"sv;
case AVERROR_HTTP_SERVER_ERROR:
return "Server returned 5XX Server Error reply"sv;
case AVERROR(E2BIG):
return "Argument list too long"sv;
case AVERROR(EACCES):
return "Permission denied"sv;
case AVERROR(EAGAIN):
return "Resource temporarily unavailable"sv;
case AVERROR(EBADF):
return "Bad file descriptor"sv;
case AVERROR(EBUSY):
return "Device or resource busy"sv;
case AVERROR(ECHILD):
return "No child processes"sv;
case AVERROR(EDEADLK):
return "Resource deadlock avoided"sv;
case AVERROR(EDOM):
return "Numerical argument out of domain"sv;
case AVERROR(EEXIST):
return "File exists"sv;
case AVERROR(EFAULT):
return "Bad address"sv;
case AVERROR(EFBIG):
return "File too large"sv;
case AVERROR(EILSEQ):
return "Illegal byte sequence"sv;
case AVERROR(EINTR):
return "Interrupted system call"sv;
case AVERROR(EINVAL):
return "Invalid argument"sv;
case AVERROR(EIO):
return "I/O error"sv;
case AVERROR(EISDIR):
return "Is a directory"sv;
case AVERROR(EMFILE):
return "Too many open files"sv;
case AVERROR(EMLINK):
return "Too many links"sv;
case AVERROR(ENAMETOOLONG):
return "File name too long"sv;
case AVERROR(ENFILE):
return "Too many open files in system"sv;
case AVERROR(ENODEV):
return "No such device"sv;
case AVERROR(ENOENT):
return "No such file or directory"sv;
case AVERROR(ENOEXEC):
return "Exec format error"sv;
case AVERROR(ENOLCK):
return "No locks available"sv;
case AVERROR(ENOMEM):
return "Cannot allocate memory"sv;
case AVERROR(ENOSPC):
return "No space left on device"sv;
case AVERROR(ENOSYS):
return "Function not implemented"sv;
case AVERROR(ENOTDIR):
return "Not a directory"sv;
case AVERROR(ENOTEMPTY):
return "Directory not empty"sv;
case AVERROR(ENOTTY):
return "Inappropriate I/O control operation"sv;
case AVERROR(ENXIO):
return "No such device or address"sv;
case AVERROR(EPERM):
return "Operation not permitted"sv;
case AVERROR(EPIPE):
return "Broken pipe"sv;
case AVERROR(ERANGE):
return "Result too large"sv;
case AVERROR(EROFS):
return "Read-only file system"sv;
case AVERROR(ESPIPE):
return "Illegal seek"sv;
case AVERROR(ESRCH):
return "No such process"sv;
case AVERROR(EXDEV):
return "Cross-device link"sv;
default:
return "Unknown error"sv;
}
VERIFY_NOT_REACHED();
}
}

View File

@@ -6,6 +6,8 @@
#include <AK/Debug.h>
#include <LibCore/EventLoop.h>
#include <LibMedia/Audio/SampleSpecification.h>
#include <LibMedia/FFmpeg/FFmpegAudioConverter.h>
#include <LibMedia/FFmpeg/FFmpegAudioDecoder.h>
#include <LibMedia/MutexedDemuxer.h>
#include <LibMedia/Sinks/AudioSink.h>
@@ -21,8 +23,9 @@ DecoderErrorOr<NonnullRefPtr<AudioDataProvider>> AudioDataProvider::try_create(N
auto codec_id = TRY(demuxer->get_codec_id_for_track(track));
auto codec_initialization_data = TRY(demuxer->get_codec_initialization_data_for_track(track));
auto decoder = DECODER_TRY_ALLOC(FFmpeg::FFmpegAudioDecoder::try_create(codec_id, codec_initialization_data));
auto converter = DECODER_TRY_ALLOC(FFmpeg::FFmpegAudioConverter::try_create());
auto thread_data = DECODER_TRY_ALLOC(try_make_ref_counted<AudioDataProvider::ThreadData>(main_thread_event_loop, demuxer, track, move(decoder)));
auto thread_data = DECODER_TRY_ALLOC(try_make_ref_counted<AudioDataProvider::ThreadData>(main_thread_event_loop, demuxer, track, move(decoder), move(converter)));
auto provider = DECODER_TRY_ALLOC(try_make_ref_counted<AudioDataProvider>(thread_data));
auto thread = DECODER_TRY_ALLOC(Threading::Thread::try_create([thread_data]() -> int {
@@ -59,6 +62,11 @@ void AudioDataProvider::set_block_end_time_handler(BlockEndTimeHandler&& handler
m_thread_data->set_block_end_time_handler(move(handler));
}
void AudioDataProvider::set_output_sample_specification(Audio::SampleSpecification sample_specification)
{
m_thread_data->set_output_sample_specification(sample_specification);
}
void AudioDataProvider::start()
{
m_thread_data->start();
@@ -69,11 +77,12 @@ void AudioDataProvider::seek(AK::Duration timestamp, SeekCompletionHandler&& com
m_thread_data->seek(timestamp, move(completion_handler));
}
AudioDataProvider::ThreadData::ThreadData(NonnullRefPtr<Core::WeakEventLoopReference> const& main_thread_event_loop, NonnullRefPtr<MutexedDemuxer> const& demuxer, Track const& track, NonnullOwnPtr<AudioDecoder>&& decoder)
AudioDataProvider::ThreadData::ThreadData(NonnullRefPtr<Core::WeakEventLoopReference> const& main_thread_event_loop, NonnullRefPtr<MutexedDemuxer> const& demuxer, Track const& track, NonnullOwnPtr<AudioDecoder>&& decoder, NonnullOwnPtr<Audio::AudioConverter>&& converter)
: m_main_thread_event_loop(main_thread_event_loop)
, m_demuxer(demuxer)
, m_track(track)
, m_decoder(move(decoder))
, m_converter(move(converter))
{
}
@@ -89,6 +98,11 @@ void AudioDataProvider::ThreadData::set_block_end_time_handler(BlockEndTimeHandl
m_frame_end_time_handler = move(handler);
}
void AudioDataProvider::ThreadData::set_output_sample_specification(Audio::SampleSpecification sample_specification)
{
m_converter->set_output_sample_specification(sample_specification).release_value_but_fixme_should_propagate_errors();
}
void AudioDataProvider::ThreadData::start()
{
auto locker = take_lock();
@@ -185,6 +199,11 @@ void AudioDataProvider::ThreadData::flush_decoder()
DecoderErrorOr<void> AudioDataProvider::ThreadData::retrieve_next_block(AudioBlock& block)
{
TRY(m_decoder->write_next_block(block));
auto convert_result = m_converter->convert(block);
if (convert_result.is_error())
return DecoderError::format(DecoderErrorCategory::NotImplemented, "Sample specification conversion failed: {}", convert_result.error().string_literal());
if (block.timestamp_in_samples() < m_last_sample)
block.set_timestamp_in_samples(m_last_sample);
m_last_sample = block.timestamp_in_samples() + static_cast<i64>(block.sample_count());

View File

@@ -12,6 +12,7 @@
#include <AK/Queue.h>
#include <AK/Time.h>
#include <LibCore/Forward.h>
#include <LibMedia/Audio/AudioConverter.h>
#include <LibMedia/AudioBlock.h>
#include <LibMedia/DecoderError.h>
#include <LibMedia/Export.h>
@@ -41,6 +42,7 @@ public:
void set_error_handler(ErrorHandler&&);
void set_block_end_time_handler(BlockEndTimeHandler&&);
void set_output_sample_specification(Audio::SampleSpecification);
void start();
@@ -51,11 +53,12 @@ public:
private:
class ThreadData final : public AtomicRefCounted<ThreadData> {
public:
ThreadData(NonnullRefPtr<Core::WeakEventLoopReference> const& main_thread_event_loop, NonnullRefPtr<MutexedDemuxer> const&, Track const&, NonnullOwnPtr<AudioDecoder>&&);
ThreadData(NonnullRefPtr<Core::WeakEventLoopReference> const& main_thread_event_loop, NonnullRefPtr<MutexedDemuxer> const&, Track const&, NonnullOwnPtr<AudioDecoder>&&, NonnullOwnPtr<Audio::AudioConverter>&&);
~ThreadData();
void set_error_handler(ErrorHandler&&);
void set_block_end_time_handler(BlockEndTimeHandler&&);
void set_output_sample_specification(Audio::SampleSpecification);
void start();
void exit();
@@ -100,6 +103,7 @@ private:
NonnullRefPtr<MutexedDemuxer> m_demuxer;
Track m_track;
NonnullOwnPtr<AudioDecoder> m_decoder;
NonnullOwnPtr<Audio::AudioConverter> m_converter;
i64 m_last_sample { NumericLimits<i64>::min() };
size_t m_queue_max_size { 8 };

View File

@@ -39,8 +39,14 @@ void AudioMixingSink::set_provider(Track const& track, RefPtr<AudioDataProvider>
return;
create_playback_stream();
// The provider must have its output sample specification set before it starts decoding, or
// we'll drop some samples due to a mismatch.
m_track_mixing_datas.set(track, TrackMixingData(*provider));
provider->start();
if (m_sample_specification.is_valid()) {
provider->set_output_sample_specification(m_sample_specification);
provider->start();
}
}
RefPtr<AudioDataProvider> AudioMixingSink::provider(Track const& track) const
@@ -64,8 +70,10 @@ void AudioMixingSink::create_playback_stream()
Threading::MutexLocker locker { self->m_mutex };
self->m_sample_specification = sample_specification;
for (auto& [track, track_data] : self->m_track_mixing_datas)
for (auto& [track, track_data] : self->m_track_mixing_datas) {
track_data.provider->set_output_sample_specification(sample_specification);
track_data.provider->start();
}
if (self->m_playing)
self->resume();

View File

@@ -5,6 +5,7 @@ find_package(PkgConfig REQUIRED)
pkg_check_modules(AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(LIBSWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
else()
find_package(FFMPEG REQUIRED)
endif()

View File

@@ -26,6 +26,7 @@ config("ffmpeg_config") {
"avcodec",
"avformat",
"avutil",
"swresample",
]
}

View File

@@ -30,6 +30,7 @@ shared_library("LibMedia") {
if (enable_ffmpeg) {
sources += [
"Audio/FFmpegLoader.cpp",
"FFmpeg/FFmpegAudioConverter.cpp",
"FFmpeg/FFmpegAudioDecoder.cpp",
"FFmpeg/FFmpegHelpers.cpp",
"FFmpeg/FFmpegVideoDecoder.cpp",

View File

@@ -44,6 +44,7 @@
"features": [
"avcodec",
"avformat",
"swresample",
"dav1d",
"openh264",
"opus",