Skip to main content

Execution Providers in C/C++

Execution Providers (EPs) enable ONNX Runtime to execute models on different hardware accelerators like GPUs, NPUs, and other specialized devices.

Available Providers

GetAvailableProviders

OrtStatus* (*GetAvailableProviders)(char*** out_ptr, int* provider_length);
Get the list of all available execution providers. Parameters:
  • out_ptr: Array of provider name strings (must be freed with ReleaseAvailableProviders)
  • provider_length: Number of providers
Returns: NULL on success Example:
char** providers;
int num_providers;
api->GetAvailableProviders(&providers, &num_providers);

printf("Available providers:\n");
for (int i = 0; i < num_providers; i++) {
    printf("  %s\n", providers[i]);
}

api->ReleaseAvailableProviders(providers, num_providers);
Note: A provider being “available” doesn’t guarantee it’s usable. It may fail if system dependencies are missing.

CUDA Execution Provider

OrtCUDAProviderOptions

typedef struct OrtCUDAProviderOptions {
    int device_id;                        // CUDA device ID (default: 0)
    OrtCudnnConvAlgoSearch cudnn_conv_algo_search;  // cuDNN algorithm search
    size_t gpu_mem_limit;                 // GPU memory limit (SIZE_MAX = unlimited)
    int arena_extend_strategy;            // 0=kNextPowerOfTwo, 1=kSameAsRequested
    int do_copy_in_default_stream;        // Use same stream for copy and compute
    int has_user_compute_stream;          // Has user-provided compute stream
    void* user_compute_stream;            // User compute stream (if has_user_compute_stream=1)
    OrtArenaCfg* default_memory_arena_cfg;
    int tunable_op_enable;                // Enable TunableOp
    int tunable_op_tuning_enable;         // Enable TunableOp tuning
    int tunable_op_max_tuning_duration_ms;
} OrtCUDAProviderOptions;
cuDNN Convolution Algorithm Search:
typedef enum OrtCudnnConvAlgoSearch {
    OrtCudnnConvAlgoSearchExhaustive,  // Exhaustive benchmarking
    OrtCudnnConvAlgoSearchHeuristic,   // Heuristic-based search
    OrtCudnnConvAlgoSearchDefault,     // Default IMPLICIT_PRECOMP_GEMM
} OrtCudnnConvAlgoSearch;

SessionOptionsAppendExecutionProvider_CUDA

OrtStatus* (*SessionOptionsAppendExecutionProvider_CUDA)(
    OrtSessionOptions* options,
    const OrtCUDAProviderOptions* cuda_options);
Append CUDA execution provider to session options. Parameters:
  • options: Session options
  • cuda_options: CUDA provider configuration
Returns: Error if CUDA is not available Example:
OrtCUDAProviderOptions cuda_options = {
    .device_id = 0,
    .cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive,
    .gpu_mem_limit = SIZE_MAX,
    .arena_extend_strategy = 0,
    .do_copy_in_default_stream = 1,
    .has_user_compute_stream = 0,
    .user_compute_stream = NULL,
    .default_memory_arena_cfg = NULL,
    .tunable_op_enable = 0,
    .tunable_op_tuning_enable = 0,
    .tunable_op_max_tuning_duration_ms = 0
};

OrtSessionOptions* session_options;
api->CreateSessionOptions(&session_options);
api->SessionOptionsAppendExecutionProvider_CUDA(session_options, &cuda_options);

CUDA Provider V2 (Advanced)

OrtStatus* (*CreateCUDAProviderOptions)(OrtCUDAProviderOptionsV2** out);

OrtStatus* (*UpdateCUDAProviderOptions)(
    OrtCUDAProviderOptionsV2* cuda_options,
    const char* const* provider_options_keys,
    const char* const* provider_options_values,
    size_t num_keys);

OrtStatus* (*SessionOptionsAppendExecutionProvider_CUDA_V2)(
    OrtSessionOptions* options,
    const OrtCUDAProviderOptionsV2* cuda_options);

void (*ReleaseCUDAProviderOptions)(OrtCUDAProviderOptionsV2* options);
Example:
OrtCUDAProviderOptionsV2* cuda_options;
api->CreateCUDAProviderOptions(&cuda_options);

const char* keys[] = {"device_id", "gpu_mem_limit", "arena_extend_strategy"};
const char* values[] = {"0", "2147483648", "kSameAsRequested"};
api->UpdateCUDAProviderOptions(cuda_options, keys, values, 3);

api->SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options);
api->ReleaseCUDAProviderOptions(cuda_options);

ROCm Execution Provider

OrtROCMProviderOptions

typedef struct OrtROCMProviderOptions {
    int device_id;                        // ROCm device ID (default: 0)
    int miopen_conv_exhaustive_search;    // MIOpen exhaustive search (default: 0)
    size_t gpu_mem_limit;                 // GPU memory limit
    int arena_extend_strategy;
    int do_copy_in_default_stream;
    int has_user_compute_stream;
    void* user_compute_stream;
    OrtArenaCfg* default_memory_arena_cfg;
    int enable_hip_graph;
    int tunable_op_enable;
    int tunable_op_tuning_enable;
    int tunable_op_max_tuning_duration_ms;
} OrtROCMProviderOptions;

SessionOptionsAppendExecutionProvider_ROCM

OrtStatus* (*SessionOptionsAppendExecutionProvider_ROCM)(
    OrtSessionOptions* options,
    const OrtROCMProviderOptions* rocm_options);
Append ROCm execution provider. Example:
OrtROCMProviderOptions rocm_options = {
    .device_id = 0,
    .miopen_conv_exhaustive_search = 0,
    .gpu_mem_limit = SIZE_MAX,
    .arena_extend_strategy = 0,
    .do_copy_in_default_stream = 1,
    .enable_hip_graph = 0
};

api->SessionOptionsAppendExecutionProvider_ROCM(session_options, &rocm_options);

TensorRT Execution Provider

OrtTensorRTProviderOptions

typedef struct OrtTensorRTProviderOptions {
    int device_id;                                // CUDA device ID
    int has_user_compute_stream;
    void* user_compute_stream;
    int trt_max_partition_iterations;             // Max iterations for partitioning
    int trt_min_subgraph_size;                    // Min subgraph size for TRT
    size_t trt_max_workspace_size;                // Max workspace size
    int trt_fp16_enable;                          // Enable FP16 precision
    int trt_int8_enable;                          // Enable INT8 precision
    const char* trt_int8_calibration_table_name;  // INT8 calibration table
    int trt_int8_use_native_calibration_table;
    int trt_dla_enable;                           // Enable DLA
    int trt_dla_core;                             // DLA core number
    int trt_dump_subgraphs;                       // Dump TRT subgraphs
    int trt_engine_cache_enable;                  // Enable engine caching
    const char* trt_engine_cache_path;            // Engine cache path
    int trt_engine_decryption_enable;
    const char* trt_engine_decryption_lib_path;
    int trt_force_sequential_engine_build;
} OrtTensorRTProviderOptions;

SessionOptionsAppendExecutionProvider_TensorRT

OrtStatus* (*SessionOptionsAppendExecutionProvider_TensorRT)(
    OrtSessionOptions* options,
    const OrtTensorRTProviderOptions* tensorrt_options);
Example:
OrtTensorRTProviderOptions trt_options = {
    .device_id = 0,
    .has_user_compute_stream = 0,
    .trt_max_partition_iterations = 1000,
    .trt_min_subgraph_size = 1,
    .trt_max_workspace_size = 1 << 30,  // 1GB
    .trt_fp16_enable = 1,
    .trt_int8_enable = 0,
    .trt_int8_calibration_table_name = "",
    .trt_engine_cache_enable = 1,
    .trt_engine_cache_path = "./trt_cache"
};

api->SessionOptionsAppendExecutionProvider_TensorRT(session_options, &trt_options);

TensorRT Provider V2

OrtStatus* (*CreateTensorRTProviderOptions)(OrtTensorRTProviderOptionsV2** out);

OrtStatus* (*UpdateTensorRTProviderOptions)(
    OrtTensorRTProviderOptionsV2* tensorrt_options,
    const char* const* provider_options_keys,
    const char* const* provider_options_values,
    size_t num_keys);

OrtStatus* (*GetTensorRTProviderOptionsAsString)(
    const OrtTensorRTProviderOptionsV2* tensorrt_options,
    OrtAllocator* allocator,
    char** ptr);

void (*ReleaseTensorRTProviderOptions)(OrtTensorRTProviderOptionsV2* input);
Example:
OrtTensorRTProviderOptionsV2* trt_options;
api->CreateTensorRTProviderOptions(&trt_options);

const char* keys[] = {
    "device_id",
    "trt_fp16_enable",
    "trt_max_workspace_size",
    "trt_engine_cache_enable",
    "trt_engine_cache_path"
};
const char* values[] = {"0", "1", "1073741824", "1", "./trt_cache"};

api->UpdateTensorRTProviderOptions(trt_options, keys, values, 5);
api->SessionOptionsAppendExecutionProvider_TensorRT_V2(session_options, trt_options);
api->ReleaseTensorRTProviderOptions(trt_options);

OpenVINO Execution Provider

OrtOpenVINOProviderOptions

typedef struct OrtOpenVINOProviderOptions {
    const char* device_type;              // "CPU_FP32", "GPU_FP32", etc.
    unsigned char enable_npu_fast_compile;
    const char* device_id;
    size_t num_of_threads;                // 0 = use default
    const char* cache_dir;
    void* context;
    unsigned char enable_opencl_throttling;
    unsigned char enable_dynamic_shapes;
} OrtOpenVINOProviderOptions;

SessionOptionsAppendExecutionProvider_OpenVINO

OrtStatus* (*SessionOptionsAppendExecutionProvider_OpenVINO)(
    OrtSessionOptions* options,
    const OrtOpenVINOProviderOptions* provider_options);
Example:
OrtOpenVINOProviderOptions openvino_options = {
    .device_type = "CPU_FP32",
    .enable_npu_fast_compile = 0,
    .device_id = "",
    .num_of_threads = 0,
    .cache_dir = "",
    .context = NULL,
    .enable_opencl_throttling = 0,
    .enable_dynamic_shapes = 0
};

api->SessionOptionsAppendExecutionProvider_OpenVINO(
    session_options, &openvino_options);

MIGraphX Execution Provider

OrtMIGraphXProviderOptions

typedef struct OrtMIGraphXProviderOptions {
    int device_id;
    int migraphx_fp16_enable;
    int migraphx_fp8_enable;
    int migraphx_int8_enable;
    int migraphx_use_native_calibration_table;
    const char* migraphx_int8_calibration_table_name;
    int migraphx_save_compiled_model;
    const char* migraphx_save_model_path;
    int migraphx_load_compiled_model;
    const char* migraphx_load_model_path;
    bool migraphx_exhaustive_tune;
    size_t migraphx_mem_limit;
    int migraphx_arena_extend_strategy;
} OrtMIGraphXProviderOptions;

SessionOptionsAppendExecutionProvider_MIGraphX

OrtStatus* (*SessionOptionsAppendExecutionProvider_MIGraphX)(
    OrtSessionOptions* options,
    const OrtMIGraphXProviderOptions* migraphx_options);

Generic Provider Configuration

SessionOptionsAppendExecutionProvider

OrtStatus* (*SessionOptionsAppendExecutionProvider)(
    OrtSessionOptions* options,
    const char* provider_name,
    const char* const* provider_options_keys,
    const char* const* provider_options_values,
    size_t num_keys);
Append any execution provider using key-value configuration. Parameters:
  • options: Session options
  • provider_name: Name of the provider (e.g., “CUDAExecutionProvider”)
  • provider_options_keys: Array of configuration keys
  • provider_options_values: Array of configuration values
  • num_keys: Number of key-value pairs
Example:
const char* keys[] = {"device_id", "gpu_mem_limit"};
const char* values[] = {"0", "2147483648"};

api->SessionOptionsAppendExecutionProvider(
    session_options,
    "CUDAExecutionProvider",
    keys,
    values,
    2
);

Device Management

SetCurrentGpuDeviceId

OrtStatus* (*SetCurrentGpuDeviceId)(int device_id);
Set the current GPU device ID for CUDA/TensorRT/ROCm providers. Parameters:
  • device_id: Device ID (must be less than total device count)
Example:
// Use GPU 1
api->SetCurrentGpuDeviceId(1);

GetCurrentGpuDeviceId

OrtStatus* (*GetCurrentGpuDeviceId)(int* device_id);
Get the current GPU device ID.

Memory Arena Configuration

CreateArenaCfg

OrtStatus* (*CreateArenaCfg)(
    size_t max_mem,
    int arena_extend_strategy,
    int initial_chunk_size_bytes,
    int max_dead_bytes_per_chunk,
    OrtArenaCfg** out);
Deprecated: Use CreateArenaCfgV2 instead.

CreateArenaCfgV2

OrtStatus* (*CreateArenaCfgV2)(
    const char* const* arena_config_keys,
    const size_t* arena_config_values,
    size_t num_keys,
    OrtArenaCfg** out);
Create arena configuration for memory management. Configuration Keys:
  • "max_mem": Maximum memory (0 = let ORT decide)
  • "arena_extend_strategy": 0=kNextPowerOfTwo, 1=kSameAsRequested (-1=default)
  • "initial_chunk_size_bytes": First allocation size (-1=default)
  • "max_dead_bytes_per_chunk": Threshold for chunk splitting (-1=default)
  • "initial_growth_chunk_size_bytes": Second allocation size (-1=default)
  • "max_power_of_two_extend_bytes": Max extension size for kNextPowerOfTwo (-1=default 1GB)
Example:
const char* keys[] = {
    "max_mem",
    "arena_extend_strategy",
    "initial_chunk_size_bytes"
};
size_t values[] = {
    1024 * 1024 * 1024,  // 1GB max
    0,                    // kNextPowerOfTwo
    1024 * 1024           // 1MB initial
};

OrtArenaCfg* arena_cfg;
api->CreateArenaCfgV2(keys, values, 3, &arena_cfg);

// Use with CUDA options
cuda_options.default_memory_arena_cfg = arena_cfg;

ReleaseArenaCfg

void (*ReleaseArenaCfg)(OrtArenaCfg* arena_cfg);
Free arena configuration.

Custom Operators

RegisterCustomOpsLibrary_V2

OrtStatus* (*RegisterCustomOpsLibrary_V2)(
    OrtSessionOptions* options,
    const ORTCHAR_T* library_path);
Register custom operators from a shared library. Parameters:
  • options: Session options
  • library_path: Path to shared library (.dll, .so, .dylib)
Expected Entry Point:
OrtStatus* RegisterCustomOps(OrtSessionOptions* options, const OrtApiBase* api);

EnableOrtCustomOps

OrtStatus* (*EnableOrtCustomOps)(OrtSessionOptions* options);
Enable built-in custom operators from onnxruntime-extensions.

Provider Priority

Providers are tried in the order they are added. Add the most preferred provider first:
// Try TensorRT first, fall back to CUDA, then CPU
api->SessionOptionsAppendExecutionProvider_TensorRT(options, &trt_options);
api->SessionOptionsAppendExecutionProvider_CUDA(options, &cuda_options);
// CPU provider is always available as fallback

Complete Example

OrtEnv* env;
api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env);

OrtSessionOptions* session_options;
api->CreateSessionOptions(&session_options);

// Configure CUDA provider
OrtCUDAProviderOptions cuda_options = {
    .device_id = 0,
    .gpu_mem_limit = SIZE_MAX,
    .arena_extend_strategy = 0,
    .do_copy_in_default_stream = 1
};

OrtStatus* status = api->SessionOptionsAppendExecutionProvider_CUDA(
    session_options, &cuda_options);

if (status != NULL) {
    printf("CUDA not available: %s\n", api->GetErrorMessage(status));
    api->ReleaseStatus(status);
    // Continue with CPU
}

// Create session
OrtSession* session;
api->CreateSession(env, model_path, session_options, &session);

// Run inference...

api->ReleaseSession(session);
api->ReleaseSessionOptions(session_options);
api->ReleaseEnv(env);

See Also