使用 onnxruntime c++ 时，会出现错误“可用内存 0 小于请求的字节 256”

Question

我使用以下代码

#include <iostream>
#include <onnxruntime_cxx_api.h>
#include "filesystem"
#include <opencv2/opencv.hpp>
#include <opencv2/dnn/dnn.hpp>

namespace fs = std::filesystem;


int main() {
    // Load the ONNX model
    fs::path d = fs::absolute(fs::path(__FILE__).parent_path());
    std::string onnx_model_path = (d / "RealESRGAN_x4plus_anime_6B.onnx").string();

    // Set up the ONNX Runtime session
    Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "onnxruntime");
    Ort::SessionOptions session_options;
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
    session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
    OrtCUDAProviderOptions cuda_options;
    cuda_options.gpu_mem_limit = 1 * 1024 * 1024;

    session_options.AppendExecutionProvider_CUDA(cuda_options);
    Ort::Session ort_session(env, onnx_model_path.c_str(), session_options);

    // Load and preprocess input image
    std::string input_image_path = (d / "small.jpg").string();
    cv::Mat input_image = cv::imread(input_image_path);
    cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB);
    input_image.convertTo(input_image, CV_32FC3, 1.0 / 255.0);
    cv::transpose(input_image, input_image);
    input_image = cv::dnn::blobFromImage(input_image);

    Ort::AllocatorWithDefaultOptions allocator;
    std::string input_name = ort_session.GetInputNameAllocated(0, allocator).get();
    std::string out_name = ort_session.GetOutputNameAllocated(0, allocator).get();

    // Perform inference
    std::vector<const char *> input_names = {input_name.c_str()};
    std::vector<const char *> out_names = {out_name.c_str()};

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator,
                                                            OrtMemType::OrtMemTypeDefault);

    // Get the shape of the input tensor
    std::vector<int64_t> input_shape = {1, input_image.channels(), input_image.rows, input_image.cols};

    // Allocate memory for the tensor data
    size_t tensor_size = input_image.total() * input_image.channels();
    float *tensor_data = new float[tensor_size];

// Copy the data from the cv::Mat to the tensor data buffer
    std::memcpy(tensor_data, input_image.data, tensor_size * sizeof(float));


    // Create the input tensor
    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memoryInfo, tensor_data,
                                                              input_image.total() * input_image.channels(),
                                                              input_shape.data(), input_shape.size());

    // Create a vector of input tensors
    // Create a vector of input tensors
    std::vector<Ort::Value> input_tensors = {};
    input_tensors.push_back(std::move(input_tensor)); // Move input_tensor into input_tensors
    std::vector<Ort::Value> out_tensors = ort_session.Run(Ort::RunOptions{nullptr}, input_names.data(),
                                                          input_tensors.data(), 1, out_names.data(), 1);

    // Post-process the output
    const std::vector<int64_t> out_shape = out_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
    float *data_ptr = const_cast<float *>(out_tensors[0].GetTensorData<float>());
    cv::Mat out_img(out_shape[2], out_shape[3], CV_32FC3, data_ptr);

// Transpose the image
    cv::transpose(out_img, out_img);

// Scale the image to the range [0, 255]
    out_img = out_img * 255;

// Clamp the values to the range [0, 255]
    cv::max(0, out_img, out_img);
    cv::min(255, out_img, out_img);

// Convert the image to 8-bit unsigned integer type
    out_img.convertTo(out_img, CV_8UC3);

    // Save the output image
    std::string out_path = (d / "out.jpg").string();
    cv::Mat out_bgr;
    cv::cvtColor(out_img, out_bgr, cv::COLOR_RGB2BGR);
    cv::imwrite(out_path, out_bgr);
    std::cout << "Output saved to: " << out_path << std::endl;

    return 0;
}

错误：

2024-04-21 10:24:38.313352771 [E:onnxruntime:, inference_session.cc:1798 operator()] Exception during initialization: /onnxruntime_src/onnxruntime/core/framework/bfc_arena.cc:376 void* onnxruntime::BFCArena::AllocateRawInternal(size_t, bool, onnxruntime::Stream*, bool, onnxruntime::WaitNotificationFn) Available memory of 0 is smaller than requested bytes of 256

我想我只是设置了gpu mem限制1g，而我的gpu有接近2g的可用gpu mem，并且我使用onnxruntime py可以正确运行模型推断，如何修复这个错误

cuda_options.gpu_mem_limit = 1 * 1024 * 1024

Answer 1

我找到了解决方案：

cuda_options.gpu_mem_limit，它的单位应该是字节，所以2gb是2 * 1024 * 1024 * 1024而不是210241024

使用 onnxruntime c++ 时，会出现错误“可用内存 0 小于请求的字节 256”

问题描述投票：0回答：1

1个回答

最新问题

使用 onnxruntime c++ 时，会出现错误“可用内存 0 小于请求的字节 256”

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1