使用 FFmpeg 库的音频污染

Question

我正在致力于使用 FFmpeg 库实现音频串联。但是，我遇到了一个问题，输出音频由音频一的前 3 秒和音频二的最后 2 秒组成，而不是预期的 3 + 5 秒总数。

void AudioConcat::concatenateAudio(const char* input1Path, const char* input2Path, const char* outputPath) {
        std::cerr << count++ << std::endl;

    // Open input files
    AVPacket* avPacket = nullptr;
    AVFormatContext* avInputFormatContext1 = NULL;
    AVFormatContext* avInputFormatContext2 = NULL;
    AVFormatContext* avOutputFormatContext;

    avPacket = av_packet_alloc();
    if (!avPacket) {
        std::cerr << "Failed to allocate AVPacket." << std::endl;
        qCritical("Failed to allocate AVPacket.");
        return;
    }

    try {

        if (avformat_open_input(&avInputFormatContext1, input1Path, 0, 0) < 0 ||
            avformat_open_input(&avInputFormatContext2, input2Path, 0, 0) < 0) {
            std::cerr << "Error opening input files." << std::endl;
            return;
        }

        if (avformat_find_stream_info(avInputFormatContext1, 0) < 0 ||
            avformat_find_stream_info(avInputFormatContext2, 0) < 0) {
            qCritical("%s", QString("Failed to retrieve the input stream information.").toStdString().c_str());
            return;
        }

        // Open output file
        if (avformat_alloc_output_context2(&avOutputFormatContext, nullptr, nullptr, outputPath) < 0) {
            std::cerr << "Error creating output format context." << std::endl;
            return;
        }

        // Find audio streams in input files
        AVStream* input1AudioStream = nullptr;
        AVStream* input2AudioStream = nullptr;

        for (unsigned int i = 0; i < avInputFormatContext1->nb_streams; ++i) {
            if (avInputFormatContext1->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
                input1AudioStream = avInputFormatContext1->streams[i];
                break;
            }
        }

        for (unsigned int i = 0; i < avInputFormatContext2->nb_streams; ++i) {
            if (avInputFormatContext2->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
                input2AudioStream = avInputFormatContext2->streams[i];
                break;
            }
        }

        if (!input1AudioStream || !input2AudioStream) {
            std::cerr << "Error finding audio streams in input files." << std::endl;
            return;
        }

        // Create new audio stream in the output file
        AVStream* outputAudioStream = avformat_new_stream(avOutputFormatContext, nullptr);
        if (!outputAudioStream) {
            std::cerr << "Error creating new audio stream in the output file." << std::endl;
            return;
        }

        // Copy codec parameters from input streams to output stream
        avcodec_parameters_copy(outputAudioStream->codecpar, input1AudioStream->codecpar);

        // Write the output file header
        if (!(avOutputFormatContext->oformat->flags & AVFMT_NOFILE)) {
            int operationResult = avio_open(&avOutputFormatContext->pb, outputPath, AVIO_FLAG_WRITE);
            if (operationResult < 0) {
                qCritical(
                    "%s", QString("Failed to open the output file '%1'.").arg(outputPath).toStdString().c_str());
            }
        }
        if (avformat_write_header(avOutputFormatContext, NULL) < 0) {
            std::cerr << "Error writing output file header." << std::endl;
            return;
        }

        while (av_read_frame(avInputFormatContext1, avPacket) == 0) {
            if (avPacket->stream_index == input1AudioStream->index) {
                avPacket->stream_index = outputAudioStream->index;
                avPacket->pts = av_rescale_q(avPacket->pts, input1AudioStream->time_base, outputAudioStream->time_base);
                avPacket->dts = av_rescale_q(avPacket->dts, input1AudioStream->time_base, outputAudioStream->time_base);
                avPacket->duration = av_rescale_q(avPacket->duration, input1AudioStream->time_base, outputAudioStream->time_base);
                av_interleaved_write_frame(avOutputFormatContext, avPacket);
            }

            av_packet_unref(avPacket);
        }
        while (av_read_frame(avInputFormatContext2, avPacket) == 0) {
            if (avPacket->stream_index == input2AudioStream->index) {
                avPacket->stream_index = outputAudioStream->index;
                avPacket->pts = av_rescale_q(avPacket->pts, input2AudioStream->time_base, outputAudioStream->time_base);
                avPacket->dts = av_rescale_q(avPacket->dts, input2AudioStream->time_base, outputAudioStream->time_base);
                avPacket->duration = av_rescale_q(avPacket->duration, input2AudioStream->time_base, outputAudioStream->time_base);
                av_interleaved_write_frame(avOutputFormatContext, avPacket);
            }

            av_packet_unref(avPacket);
        }

        // Write the output file trailer
        if (av_write_trailer(avOutputFormatContext) < 0) {
            std::cerr << "Error writing output file trailer." << std::endl;
            return;
        }

        av_packet_unref(avPacket);
        avformat_close_input(&avInputFormatContext1);
        avformat_close_input(&avInputFormatContext2);
        avformat_free_context(avOutputFormatContext);
    }
    catch (...) {
        std::exception_ptr p = std::current_exception();
        std::cerr <<(p ? p.__cxa_exception_type()->name() : "null") << std::endl;
    }

}

我怀疑这个问题可能与在从第二个输入写入帧时使用带有 AV 数据包的输出流秒有关。有什么见解吗？

Answer 1

pts

字段代表“演示时间戳”。这个想法是每个数据包都对从

pts

到

pts+duration

的声音进行编码。如果您想连接音频，则需要将第二个声音的所有数据包移动第一个声音中最后一个数据包的

pts+duration

。具体来说：

int64_t nextPts = 0;
while (av_read_frame(avInputFormatContext1, avPacket) == 0) {
        if (avPacket->stream_index == input1AudioStream->index) {
                avPacket->stream_index = outputAudioStream->index;
                avPacket->pts = av_rescale_q(avPacket->pts, input1AudioStream->time_base, outputAudioStream->time_base);
                avPacket->dts = av_rescale_q(avPacket->dts, input1AudioStream->time_base, outputAudioStream->time_base);
                avPacket->duration = av_rescale_q(avPacket->duration, input1AudioStream->time_base, outputAudioStream->time_base);
                nextPts = avPacket->pts + avPacket->duration;
                av_interleaved_write_frame(avOutputFormatContext, avPacket);
        }

        av_packet_unref(avPacket);
}
while (av_read_frame(avInputFormatContext2, avPacket) == 0) {
        if (avPacket->stream_index == input2AudioStream->index) {
                avPacket->stream_index = outputAudioStream->index;
                // See nextPts below!
                avPacket->pts = nextPts + av_rescale_q(avPacket->pts, input2AudioStream->time_base, outputAudioStream->time_base);
                avPacket->dts = nextPts + av_rescale_q(avPacket->dts, input2AudioStream->time_base, outputAudioStream->time_base);
                avPacket->duration = av_rescale_q(avPacket->duration, input2AudioStream->time_base, outputAudioStream->time_base);
                av_interleaved_write_frame(avOutputFormatContext, avPacket);
        }

        av_packet_unref(avPacket);
}

请注意，pts 和 dts 可能是一个称为

AV_NOPTS_VALUE

的特殊值，在这种情况下，您必须保留该

AV_NOPTS_VALUE

。这留给你作为练习:)

使用 FFmpeg 库的音频污染

问题描述投票：0回答：1

1个回答

最新问题

使用 FFmpeg 库的音频污染

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1