C/C++ Inference API
The ONNX Runtime C++ API provides high-performance inference with RAII-based resource management and exception safety. This guide covers the complete C++ API with real examples from the codebase.Installation
Using NuGet (Windows)
Copy
Ask AI
nuget install Microsoft.ML.OnnxRuntime
Using vcpkg
Copy
Ask AI
vcpkg install onnxruntime
Manual Installation
Download pre-built binaries from the ONNX Runtime releases page.Quick Start
Here’s a minimal C++ example:Copy
Ask AI
#include <onnxruntime_cxx_api.h>
#include <vector>
#include <iostream>
int main() {
// Initialize environment
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
// Create session
Ort::SessionOptions session_options;
Ort::Session session(env, "model.onnx", session_options);
// Prepare input
std::vector<float> input_data(1 * 3 * 224 * 224, 0.5f);
std::vector<int64_t> input_shape = {1, 3, 224, 224};
auto memory_info = Ort::MemoryInfo::CreateCpu(
OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info, input_data.data(), input_data.size(),
input_shape.data(), input_shape.size());
// Get input/output names
Ort::AllocatorWithDefaultOptions allocator;
const char* input_name = session.GetInputNameAllocated(0, allocator).get();
const char* output_name = session.GetOutputNameAllocated(0, allocator).get();
// Run inference
std::vector<const char*> input_names = {input_name};
std::vector<const char*> output_names = {output_name};
auto output_tensors = session.Run(
Ort::RunOptions{nullptr},
input_names.data(), &input_tensor, 1,
output_names.data(), 1);
// Get output
float* output_data = output_tensors[0].GetTensorMutableData<float>();
std::cout << "First output value: " << output_data[0] << std::endl;
return 0;
}
Core Classes
Ort::Env
The environment manages global state. Create one per application.Copy
Ask AI
// Basic initialization
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "MyApp");
// With custom thread pool
Ort::ThreadingOptions threading_options;
threading_options.SetGlobalIntraOpNumThreads(4);
threading_options.SetGlobalInterOpNumThreads(2);
Ort::Env env(threading_options, ORT_LOGGING_LEVEL_WARNING, "MyApp");
ORT_LOGGING_LEVEL_VERBOSE(0)ORT_LOGGING_LEVEL_INFO(1)ORT_LOGGING_LEVEL_WARNING(2)ORT_LOGGING_LEVEL_ERROR(3)ORT_LOGGING_LEVEL_FATAL(4)
Ort::SessionOptions
Configure session creation and optimization.Copy
Ask AI
Ort::SessionOptions session_options;
// Graph optimization
session_options.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
// Threading
session_options.SetIntraOpNumThreads(4);
session_options.SetInterOpNumThreads(2);
// Execution mode
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
// Memory optimization
session_options.EnableCpuMemArena();
session_options.EnableMemPattern();
// Profiling
session_options.EnableProfiling("profile.json");
// Log settings
session_options.SetLogId("MySession");
session_options.SetLogSeverityLevel(2);
// Save optimized model
session_options.SetOptimizedModelFilePath("optimized_model.onnx");
Copy
Ask AI
GraphOptimizationLevel::ORT_DISABLE_ALL // No optimizations
GraphOptimizationLevel::ORT_ENABLE_BASIC // Constant folding, redundant node elimination
GraphOptimizationLevel::ORT_ENABLE_EXTENDED // Operator fusion, node reordering
GraphOptimizationLevel::ORT_ENABLE_ALL // Layout transformations, NCHWc format
Ort::Session
The main inference session class. Create from file:Copy
Ask AI
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
Ort::SessionOptions session_options;
Ort::Session session(env, "model.onnx", session_options);
// On Windows, use wide strings
Ort::Session session(env, L"model.onnx", session_options);
Copy
Ask AI
// Load model into memory
std::vector<uint8_t> model_data = load_model_file("model.onnx");
Ort::Session session(env, model_data.data(), model_data.size(), session_options);
Copy
Ask AI
Ort::AllocatorWithDefaultOptions allocator;
// Get input/output counts
size_t num_inputs = session.GetInputCount();
size_t num_outputs = session.GetOutputCount();
// Get input names and types
for (size_t i = 0; i < num_inputs; i++) {
auto input_name = session.GetInputNameAllocated(i, allocator);
std::cout << "Input " << i << ": " << input_name.get() << std::endl;
auto type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
std::cout << " Type: " << tensor_info.GetElementType() << std::endl;
std::cout << " Shape: [";
auto shape = tensor_info.GetShape();
for (size_t j = 0; j < shape.size(); j++) {
std::cout << (j > 0 ? ", " : "") << shape[j];
}
std::cout << "]" << std::endl;
}
// Get output names and types
for (size_t i = 0; i < num_outputs; i++) {
auto output_name = session.GetOutputNameAllocated(i, allocator);
std::cout << "Output " << i << ": " << output_name.get() << std::endl;
}
Copy
Ask AI
Ort::ModelMetadata metadata = session.GetModelMetadata();
Ort::AllocatorWithDefaultOptions allocator;
auto producer_name = metadata.GetProducerNameAllocated(allocator);
auto graph_name = metadata.GetGraphNameAllocated(allocator);
auto version = metadata.GetVersion();
std::cout << "Producer: " << producer_name.get() << std::endl;
std::cout << "Graph: " << graph_name.get() << std::endl;
std::cout << "Version: " << version << std::endl;
Running Inference
Basic inference:Copy
Ask AI
// Prepare input tensor
std::vector<float> input_data(batch_size * channels * height * width);
std::vector<int64_t> input_shape = {batch_size, channels, height, width};
auto memory_info = Ort::MemoryInfo::CreateCpu(
OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info, input_data.data(), input_data.size(),
input_shape.data(), input_shape.size());
// Get input/output names
const char* input_names[] = {"input"};
const char* output_names[] = {"output"};
// Run inference
Ort::RunOptions run_options;
auto output_tensors = session.Run(
run_options,
input_names, &input_tensor, 1,
output_names, 1);
// Access output
float* output_data = output_tensors[0].GetTensorMutableData<float>();
auto output_shape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
Ort::Value
Represents tensors and other values. Create tensor from existing data:Copy
Ask AI
std::vector<float> data = {1.0f, 2.0f, 3.0f, 4.0f};
std::vector<int64_t> shape = {2, 2};
auto memory_info = Ort::MemoryInfo::CreateCpu(
OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value tensor = Ort::Value::CreateTensor<float>(
memory_info, data.data(), data.size(),
shape.data(), shape.size());
Copy
Ask AI
Ort::AllocatorWithDefaultOptions allocator;
std::vector<int64_t> shape = {1, 3, 224, 224};
Ort::Value tensor = Ort::Value::CreateTensor<float>(
allocator, shape.data(), shape.size());
// Get mutable data pointer
float* data = tensor.GetTensorMutableData<float>();
// Fill data...
Copy
Ask AI
bool is_tensor = tensor.IsTensor();
auto type_info = tensor.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType element_type = type_info.GetElementType();
size_t element_count = type_info.GetElementCount();
std::vector<int64_t> shape = type_info.GetShape();
const float* data = tensor.GetTensorData<float>();
Execution Providers
Add Execution Providers
CUDA:Copy
Ask AI
Ort::SessionOptions session_options;
OrtCUDAProviderOptions cuda_options;
cuda_options.device_id = 0;
cuda_options.arena_extend_strategy = OrtArenaExtendStrategy::kNextPowerOfTwo;
cuda_options.gpu_mem_limit = 2ULL * 1024 * 1024 * 1024; // 2GB
cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch::EXHAUSTIVE;
session_options.AppendExecutionProvider_CUDA(cuda_options);
Copy
Ask AI
OrtTensorRTProviderOptions trt_options;
trt_options.device_id = 0;
trt_options.trt_max_workspace_size = 1ULL << 30; // 1GB
trt_options.trt_fp16_enable = 1;
session_options.AppendExecutionProvider_TensorRT(trt_options);
Copy
Ask AI
session_options.AppendExecutionProvider_DML(0); // Device ID
Copy
Ask AI
uint32_t coreml_flags = 0;
session_options.AppendExecutionProvider_CoreML(coreml_flags);
Complete Example: Image Classification
Copy
Ask AI
#include <onnxruntime_cxx_api.h>
#include <array>
#include <vector>
#include <iostream>
#include <fstream>
class ImageClassifier {
private:
Ort::Env env_;
Ort::Session session_;
Ort::AllocatorWithDefaultOptions allocator_;
std::vector<std::string> input_names_;
std::vector<std::string> output_names_;
std::vector<int64_t> input_shape_;
public:
ImageClassifier(const char* model_path)
: env_(ORT_LOGGING_LEVEL_WARNING, "ImageClassifier"),
session_(nullptr) {
// Configure session options
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(4);
session_options.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_ALL);
// Add CUDA provider
OrtCUDAProviderOptions cuda_options;
cuda_options.device_id = 0;
session_options.AppendExecutionProvider_CUDA(cuda_options);
// Create session
session_ = Ort::Session(env_, model_path, session_options);
// Get model metadata
size_t num_inputs = session_.GetInputCount();
size_t num_outputs = session_.GetOutputCount();
std::cout << "Model has " << num_inputs << " inputs and "
<< num_outputs << " outputs" << std::endl;
// Get input names and shapes
for (size_t i = 0; i < num_inputs; i++) {
auto name = session_.GetInputNameAllocated(i, allocator_);
input_names_.push_back(name.get());
auto type_info = session_.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
input_shape_ = tensor_info.GetShape();
std::cout << "Input " << i << ": " << input_names_[i]
<< " [";
for (size_t j = 0; j < input_shape_.size(); j++) {
std::cout << (j > 0 ? ", " : "") << input_shape_[j];
}
std::cout << "]" << std::endl;
}
// Get output names
for (size_t i = 0; i < num_outputs; i++) {
auto name = session_.GetOutputNameAllocated(i, allocator_);
output_names_.push_back(name.get());
}
}
std::vector<float> classify(const std::vector<float>& image_data) {
// Validate input size
size_t expected_size = 1;
for (auto dim : input_shape_) {
expected_size *= dim;
}
if (image_data.size() != expected_size) {
throw std::runtime_error("Input size mismatch");
}
// Create input tensor
auto memory_info = Ort::MemoryInfo::CreateCpu(
OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info,
const_cast<float*>(image_data.data()),
image_data.size(),
input_shape_.data(),
input_shape_.size());
// Prepare input/output names as C strings
std::vector<const char*> input_names_cstr;
std::vector<const char*> output_names_cstr;
for (auto& name : input_names_) {
input_names_cstr.push_back(name.c_str());
}
for (auto& name : output_names_) {
output_names_cstr.push_back(name.c_str());
}
// Run inference
Ort::RunOptions run_options;
auto output_tensors = session_.Run(
run_options,
input_names_cstr.data(), &input_tensor, 1,
output_names_cstr.data(), output_names_cstr.size());
// Get output data
float* output_data = output_tensors[0].GetTensorMutableData<float>();
auto output_shape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
size_t output_size = 1;
for (auto dim : output_shape) {
output_size *= dim;
}
return std::vector<float>(output_data, output_data + output_size);
}
};
int main() {
try {
ImageClassifier classifier("resnet50.onnx");
// Create dummy input (1, 3, 224, 224)
std::vector<float> image_data(1 * 3 * 224 * 224, 0.5f);
// Run inference
auto predictions = classifier.classify(image_data);
// Find top prediction
auto max_it = std::max_element(predictions.begin(), predictions.end());
int max_idx = std::distance(predictions.begin(), max_it);
std::cout << "Top prediction: class " << max_idx
<< " with score " << *max_it << std::endl;
} catch (const Ort::Exception& e) {
std::cerr << "ONNX Runtime error: " << e.what() << std::endl;
return 1;
}
return 0;
}
Memory Management
The C++ API uses RAII (Resource Acquisition Is Initialization) for automatic resource management:Copy
Ask AI
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
Ort::SessionOptions options;
Ort::Session session(env, "model.onnx", options);
// Resources automatically cleaned up when going out of scope
}
Error Handling
Copy
Ask AI
try {
Ort::Session session(env, "model.onnx", session_options);
auto outputs = session.Run(/*...*/);
} catch (const Ort::Exception& e) {
std::cerr << "ONNX Runtime error: " << e.what() << std::endl;
std::cerr << "Error code: " << e.GetOrtErrorCode() << std::endl;
}
Next Steps
Model Optimization
Optimize models for production deployment
Execution Providers
Configure hardware acceleration