diff --git a/src/core/dumping/ffmpeg_backend.cpp b/src/core/dumping/ffmpeg_backend.cpp index ef7843ffc..3c34c8440 100644 --- a/src/core/dumping/ffmpeg_backend.cpp +++ b/src/core/dumping/ffmpeg_backend.cpp @@ -211,7 +211,7 @@ bool FFmpegAudioStream::Init(AVFormatContext* format_context) { if (!FFmpegStream::Init(format_context)) return false; - sample_count = 0; + frame_count = 0; // Initialize audio codec const AVCodec* codec = avcodec_find_encoder_by_name(Settings::values.audio_encoder.c_str()); @@ -243,7 +243,20 @@ bool FFmpegAudioStream::Init(AVFormatContext* format_context) { codec_context->sample_fmt = AV_SAMPLE_FMT_S16P; } - codec_context->sample_rate = AudioCore::native_sample_rate; + if (codec->supported_samplerates) { + codec_context->sample_rate = codec->supported_samplerates[0]; + // Prefer native sample rate if supported + const int* ptr = codec->supported_samplerates; + while ((*ptr)) { + if ((*ptr) == AudioCore::native_sample_rate) { + codec_context->sample_rate = AudioCore::native_sample_rate; + break; + } + ptr++; + } + } else { + codec_context->sample_rate = AudioCore::native_sample_rate; + } codec_context->channel_layout = AV_CH_LAYOUT_STEREO; codec_context->channels = 2; @@ -259,6 +272,12 @@ bool FFmpegAudioStream::Init(AVFormatContext* format_context) { LOG_WARNING(Render, "Audio encoder options not found: {}", buf); } + if (codec_context->frame_size) { + frame_size = static_cast(codec_context->frame_size); + } else { // variable frame size support + frame_size = std::tuple_size::value; + } + // Create audio stream stream = avformat_new_stream(format_context, codec); if (!stream || avcodec_parameters_from_context(stream->codecpar, codec_context.get()) < 0) { @@ -291,7 +310,7 @@ bool FFmpegAudioStream::Init(AVFormatContext* format_context) { // Allocate resampled data int error = av_samples_alloc_array_and_samples(&resampled_data, nullptr, codec_context->channels, - codec_context->frame_size, codec_context->sample_fmt, 0); + frame_size, codec_context->sample_fmt, 0); if (error < 0) { LOG_ERROR(Render, "Could not allocate samples storage"); return false; @@ -312,31 +331,62 @@ void FFmpegAudioStream::Free() { av_freep(&resampled_data); } -void FFmpegAudioStream::ProcessFrame(VariableAudioFrame& channel0, VariableAudioFrame& channel1) { +void FFmpegAudioStream::ProcessFrame(const VariableAudioFrame& channel0, + const VariableAudioFrame& channel1) { ASSERT_MSG(channel0.size() == channel1.size(), "Frames of the two channels must have the same number of samples"); - std::array src_data = {reinterpret_cast(channel0.data()), - reinterpret_cast(channel1.data())}; - if (swr_convert(swr_context.get(), resampled_data, channel0.size(), src_data.data(), - channel0.size()) < 0) { + const auto sample_size = av_get_bytes_per_sample(codec_context->sample_fmt); + std::array src_data = {reinterpret_cast(channel0.data()), + reinterpret_cast(channel1.data())}; + std::array dst_data = {resampled_data[0] + sample_size * offset, + resampled_data[1] + sample_size * offset}; + + auto resampled_count = swr_convert(swr_context.get(), dst_data.data(), frame_size - offset, + src_data.data(), channel0.size()); + if (resampled_count < 0) { LOG_ERROR(Render, "Audio frame dropped: Could not resample data"); return; } - // Prepare frame - audio_frame->nb_samples = channel0.size(); - audio_frame->data[0] = resampled_data[0]; - audio_frame->data[1] = resampled_data[1]; - audio_frame->pts = sample_count; - sample_count += channel0.size(); + offset += resampled_count; + if (offset < frame_size) { // Still not enough to form a frame + return; + } - SendFrame(audio_frame.get()); + while (true) { + // Prepare frame + audio_frame->nb_samples = frame_size; + audio_frame->data[0] = resampled_data[0]; + audio_frame->data[1] = resampled_data[1]; + audio_frame->pts = frame_count * frame_size; + frame_count++; + + SendFrame(audio_frame.get()); + + // swr_convert buffers input internally. Try to get more resampled data + resampled_count = swr_convert(swr_context.get(), resampled_data, frame_size, nullptr, 0); + if (resampled_count < 0) { + LOG_ERROR(Render, "Audio frame dropped: Could not resample data"); + return; + } + if (static_cast(resampled_count) < frame_size) { + offset = resampled_count; + break; + } + } } -std::size_t FFmpegAudioStream::GetAudioFrameSize() const { - ASSERT_MSG(codec_context, "Codec context is not initialized yet!"); - return codec_context->frame_size; +void FFmpegAudioStream::Flush() { + // Send the last samples + audio_frame->nb_samples = offset; + audio_frame->data[0] = resampled_data[0]; + audio_frame->data[1] = resampled_data[1]; + audio_frame->pts = frame_count * frame_size; + + SendFrame(audio_frame.get()); + + FFmpegStream::Flush(); } FFmpegMuxer::~FFmpegMuxer() { @@ -402,7 +452,8 @@ void FFmpegMuxer::ProcessVideoFrame(VideoFrame& frame) { video_stream.ProcessFrame(frame); } -void FFmpegMuxer::ProcessAudioFrame(VariableAudioFrame& channel0, VariableAudioFrame& channel1) { +void FFmpegMuxer::ProcessAudioFrame(const VariableAudioFrame& channel0, + const VariableAudioFrame& channel1) { audio_stream.ProcessFrame(channel0, channel1); } @@ -414,10 +465,6 @@ void FFmpegMuxer::FlushAudio() { audio_stream.Flush(); } -std::size_t FFmpegMuxer::GetAudioFrameSize() const { - return audio_stream.GetAudioFrameSize(); -} - void FFmpegMuxer::WriteTrailer() { av_write_trailer(format_context.get()); } @@ -498,24 +545,20 @@ void FFmpegBackend::AddVideoFrame(VideoFrame frame) { } void FFmpegBackend::AddAudioFrame(AudioCore::StereoFrame16 frame) { - std::array, 2> refactored_frame; + std::array refactored_frame; + for (auto& channel : refactored_frame) { + channel.resize(frame.size()); + } for (std::size_t i = 0; i < frame.size(); i++) { refactored_frame[0][i] = frame[i][0]; refactored_frame[1][i] = frame[i][1]; } - for (auto i : {0, 1}) { - audio_buffers[i].insert(audio_buffers[i].end(), refactored_frame[i].begin(), - refactored_frame[i].end()); - } - CheckAudioBuffer(); + ffmpeg.ProcessAudioFrame(refactored_frame[0], refactored_frame[1]); } void FFmpegBackend::AddAudioSample(const std::array& sample) { - for (auto i : {0, 1}) { - audio_buffers[i].push_back(sample[i]); - } - CheckAudioBuffer(); + ffmpeg.ProcessAudioFrame({sample[0]}, {sample[1]}); } void FFmpegBackend::StopDumping() { @@ -525,12 +568,6 @@ void FFmpegBackend::StopDumping() { // Flush the video processing queue AddVideoFrame(VideoFrame()); for (auto i : {0, 1}) { - // Add remaining data to audio queue - if (audio_buffers[i].size() >= 0) { - VariableAudioFrame buffer(audio_buffers[i].begin(), audio_buffers[i].end()); - audio_frame_queues[i].Push(std::move(buffer)); - audio_buffers[i].clear(); - } // Flush the audio processing queue audio_frame_queues[i].Push(VariableAudioFrame()); } @@ -554,18 +591,4 @@ void FFmpegBackend::EndDumping() { processing_ended.Set(); } -void FFmpegBackend::CheckAudioBuffer() { - for (auto i : {0, 1}) { - const std::size_t frame_size = ffmpeg.GetAudioFrameSize(); - // Add audio data to the queue when there is enough to form a frame - while (audio_buffers[i].size() >= frame_size) { - VariableAudioFrame buffer(audio_buffers[i].begin(), - audio_buffers[i].begin() + frame_size); - audio_frame_queues[i].Push(std::move(buffer)); - - audio_buffers[i].erase(audio_buffers[i].begin(), audio_buffers[i].begin() + frame_size); - } - } -} - } // namespace VideoDumper diff --git a/src/core/dumping/ffmpeg_backend.h b/src/core/dumping/ffmpeg_backend.h index f08f31d3d..f0962189e 100644 --- a/src/core/dumping/ffmpeg_backend.h +++ b/src/core/dumping/ffmpeg_backend.h @@ -96,6 +96,7 @@ private: /** * A FFmpegStream used for audio data. * Resamples (converts), encodes and writes a frame. + * This also temporarily stores resampled audio data before there are enough to form a frame. */ class FFmpegAudioStream : public FFmpegStream { public: @@ -103,8 +104,8 @@ public: bool Init(AVFormatContext* format_context); void Free(); - void ProcessFrame(VariableAudioFrame& channel0, VariableAudioFrame& channel1); - std::size_t GetAudioFrameSize() const; + void ProcessFrame(const VariableAudioFrame& channel0, const VariableAudioFrame& channel1); + void Flush(); private: struct SwrContextDeleter { @@ -113,12 +114,14 @@ private: } }; - u64 sample_count{}; + u64 frame_size{}; + u64 frame_count{}; std::unique_ptr audio_frame{}; std::unique_ptr swr_context{}; u8** resampled_data{}; + u64 offset{}; // Number of output samples that are currently in resampled_data. }; /** @@ -132,10 +135,9 @@ public: bool Init(const std::string& path, const Layout::FramebufferLayout& layout); void Free(); void ProcessVideoFrame(VideoFrame& frame); - void ProcessAudioFrame(VariableAudioFrame& channel0, VariableAudioFrame& channel1); + void ProcessAudioFrame(const VariableAudioFrame& channel0, const VariableAudioFrame& channel1); void FlushVideo(); void FlushAudio(); - std::size_t GetAudioFrameSize() const; void WriteTrailer(); private: @@ -153,8 +155,7 @@ private: /** * FFmpeg video dumping backend. - * This class implements a double buffer, and an audio queue to keep audio data - * before enough data is received to form a frame. + * This class implements a double buffer. */ class FFmpegBackend : public Backend { public: @@ -169,7 +170,6 @@ public: Layout::FramebufferLayout GetLayout() const override; private: - void CheckAudioBuffer(); void EndDumping(); std::atomic_bool is_dumping = false; ///< Whether the backend is currently dumping @@ -182,9 +182,6 @@ private: Common::Event event1, event2; std::thread video_processing_thread; - /// An audio buffer used to temporarily hold audio data, before the size is big enough - /// to be sent to the encoder as a frame - std::array audio_buffers; std::array, 2> audio_frame_queues; std::thread audio_processing_thread;