我使用以下代码
#include <iostream>
#include <onnxruntime_cxx_api.h>
#include "filesystem"
#include <opencv2/opencv.hpp>
#include <opencv2/dnn/dnn.hpp>
namespace fs = std::filesystem;
int main() {
// Load the ONNX model
fs::path d = fs::absolute(fs::path(__FILE__).parent_path());
std::string onnx_model_path = (d / "RealESRGAN_x4plus_anime_6B.onnx").string();
// Set up the ONNX Runtime session
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "onnxruntime");
Ort::SessionOptions session_options;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
OrtCUDAProviderOptions cuda_options;
cuda_options.gpu_mem_limit = 1 * 1024 * 1024;
session_options.AppendExecutionProvider_CUDA(cuda_options);
Ort::Session ort_session(env, onnx_model_path.c_str(), session_options);
// Load and preprocess input image
std::string input_image_path = (d / "small.jpg").string();
cv::Mat input_image = cv::imread(input_image_path);
cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB);
input_image.convertTo(input_image, CV_32FC3, 1.0 / 255.0);
cv::transpose(input_image, input_image);
input_image = cv::dnn::blobFromImage(input_image);
Ort::AllocatorWithDefaultOptions allocator;
std::string input_name = ort_session.GetInputNameAllocated(0, allocator).get();
std::string out_name = ort_session.GetOutputNameAllocated(0, allocator).get();
// Perform inference
std::vector<const char *> input_names = {input_name.c_str()};
std::vector<const char *> out_names = {out_name.c_str()};
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator,
OrtMemType::OrtMemTypeDefault);
// Get the shape of the input tensor
std::vector<int64_t> input_shape = {1, input_image.channels(), input_image.rows, input_image.cols};
// Allocate memory for the tensor data
size_t tensor_size = input_image.total() * input_image.channels();
float *tensor_data = new float[tensor_size];
// Copy the data from the cv::Mat to the tensor data buffer
std::memcpy(tensor_data, input_image.data, tensor_size * sizeof(float));
// Create the input tensor
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memoryInfo, tensor_data,
input_image.total() * input_image.channels(),
input_shape.data(), input_shape.size());
// Create a vector of input tensors
// Create a vector of input tensors
std::vector<Ort::Value> input_tensors = {};
input_tensors.push_back(std::move(input_tensor)); // Move input_tensor into input_tensors
std::vector<Ort::Value> out_tensors = ort_session.Run(Ort::RunOptions{nullptr}, input_names.data(),
input_tensors.data(), 1, out_names.data(), 1);
// Post-process the output
const std::vector<int64_t> out_shape = out_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
float *data_ptr = const_cast<float *>(out_tensors[0].GetTensorData<float>());
cv::Mat out_img(out_shape[2], out_shape[3], CV_32FC3, data_ptr);
// Transpose the image
cv::transpose(out_img, out_img);
// Scale the image to the range [0, 255]
out_img = out_img * 255;
// Clamp the values to the range [0, 255]
cv::max(0, out_img, out_img);
cv::min(255, out_img, out_img);
// Convert the image to 8-bit unsigned integer type
out_img.convertTo(out_img, CV_8UC3);
// Save the output image
std::string out_path = (d / "out.jpg").string();
cv::Mat out_bgr;
cv::cvtColor(out_img, out_bgr, cv::COLOR_RGB2BGR);
cv::imwrite(out_path, out_bgr);
std::cout << "Output saved to: " << out_path << std::endl;
return 0;
}
错误:
2024-04-21 10:24:38.313352771 [E:onnxruntime:, inference_session.cc:1798 operator()] Exception during initialization: /onnxruntime_src/onnxruntime/core/framework/bfc_arena.cc:376 void* onnxruntime::BFCArena::AllocateRawInternal(size_t, bool, onnxruntime::Stream*, bool, onnxruntime::WaitNotificationFn) Available memory of 0 is smaller than requested bytes of 256
我想我只是设置了gpu mem限制1g,而我的gpu有接近2g的可用gpu mem,并且我使用onnxruntime py可以正确运行模型推断,如何修复这个错误
cuda_options.gpu_mem_limit = 1 * 1024 * 1024
我找到了解决方案:
cuda_options.gpu_mem_limit,它的单位应该是字节,所以2gb是2 * 1024 * 1024 * 1024而不是210241024