Skip to main content
The generator functions provide the core text generation capabilities, allowing you to generate tokens iteratively or process complete sequences.

Generator Creation

OgaCreateGeneratorParams
function
OgaResult* OGA_API_CALL OgaCreateGeneratorParams(const OgaModel* model, OgaGeneratorParams** out);
Creates generator parameters from a model.Parameters:
  • model: The model to use for generation
  • out: Pointer to store the created generator params
Returns: NULL on success, or OgaResult* containing error message on failure
OgaDestroyGeneratorParams
function
void OGA_API_CALL OgaDestroyGeneratorParams(OgaGeneratorParams* params);
Destroys generator parameters.Parameters:
  • params: The generator params to destroy
OgaCreateGenerator
function
OgaResult* OGA_API_CALL OgaCreateGenerator(
    const OgaModel* model,
    const OgaGeneratorParams* params,
    OgaGenerator** out
);
Creates a generator from a model and parameters.Parameters:
  • model: The model to use for generation
  • params: The parameters to use for generation
  • out: Pointer to store the created generator
Returns: NULL on success, or OgaResult* containing error message on failureExample:
OgaGeneratorParams* params = NULL;
OgaResult* result = OgaCreateGeneratorParams(model, &params);
if (result != NULL) {
    fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
    OgaDestroyResult(result);
    return -1;
}

OgaGenerator* generator = NULL;
result = OgaCreateGenerator(model, params, &generator);
if (result != NULL) {
    fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
    OgaDestroyResult(result);
    OgaDestroyGeneratorParams(params);
    return -1;
}

// Use generator...

OgaDestroyGenerator(generator);
OgaDestroyGeneratorParams(params);
OgaDestroyGenerator
function
void OGA_API_CALL OgaDestroyGenerator(OgaGenerator* generator);
Destroys a generator.Parameters:
  • generator: The generator to destroy

Generator Parameters

Search Parameters

OgaGeneratorParamsSetSearchNumber
function
OgaResult* OGA_API_CALL OgaGeneratorParamsSetSearchNumber(
    OgaGeneratorParams* params,
    const char* name,
    double value
);
Sets a numerical search parameter.Parameters:
  • params: The generator params to modify
  • name: The parameter name (e.g., “max_length”, “temperature”, “top_p”, “top_k”)
  • value: The numerical value to set
Returns: NULL on success, or OgaResult* containing error message on failureExample:
OgaGeneratorParamsSetSearchNumber(params, "max_length", 200);
OgaGeneratorParamsSetSearchNumber(params, "temperature", 0.7);
OgaGeneratorParamsSetSearchNumber(params, "top_p", 0.9);
OgaGeneratorParamsSetSearchNumber(params, "top_k", 50);
OgaGeneratorParamsSetSearchBool
function
OgaResult* OGA_API_CALL OgaGeneratorParamsSetSearchBool(
    OgaGeneratorParams* params,
    const char* name,
    bool value
);
Sets a boolean search parameter.Parameters:
  • params: The generator params to modify
  • name: The parameter name
  • value: The boolean value to set
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGeneratorParamsGetSearchNumber
function
OgaResult* OGA_API_CALL OgaGeneratorParamsGetSearchNumber(
    const OgaGeneratorParams* params,
    const char* name,
    double* value
);
Gets a numerical search parameter value.Parameters:
  • params: The generator params to query
  • name: The parameter name
  • value: Pointer to store the parameter value
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGeneratorParamsGetSearchBool
function
OgaResult* OGA_API_CALL OgaGeneratorParamsGetSearchBool(
    const OgaGeneratorParams* params,
    const char* name,
    bool* value
);
Gets a boolean search parameter value.Parameters:
  • params: The generator params to query
  • name: The parameter name
  • value: Pointer to store the parameter value
Returns: NULL on success, or OgaResult* containing error message on failure

Guided Generation

OgaGeneratorParamsSetGuidance
function
OgaResult* OGA_API_CALL OgaGeneratorParamsSetGuidance(
    OgaGeneratorParams* params,
    const char* type,
    const char* data,
    bool enable_ff_tokens
);
Sets guidance for constrained generation.Parameters:
  • params: The generator params to modify
  • type: The guidance type (“json_schema”, “regex”, or “lark_grammar”)
  • data: The guidance specification
  • enable_ff_tokens: Whether to enable force-forward tokens (only valid with batch_size=1 and beam_size=1)
Returns: NULL on success, or OgaResult* containing error message on failureExample:
// JSON schema guidance
const char* json_schema = "{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}}";
OgaResult* result = OgaGeneratorParamsSetGuidance(params, "json_schema", json_schema, false);

Input Management

OgaGenerator_AppendTokenSequences
function
OgaResult* OGA_API_CALL OgaGenerator_AppendTokenSequences(
    OgaGenerator* generator,
    const OgaSequences* p_sequences
);
Adds input token sequences to the generator.Parameters:
  • generator: The generator to add tokens to
  • p_sequences: The input token sequences
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_AppendTokens
function
OgaResult* OGA_API_CALL OgaGenerator_AppendTokens(
    OgaGenerator* generator,
    const int32_t* input_ids,
    size_t input_ids_count
);
Adds input tokens directly to the generator.Parameters:
  • generator: The generator to add tokens to
  • input_ids: Array of token IDs
  • input_ids_count: Number of tokens (batch_size × sequence_length)
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_TokenCount
function
size_t OGA_API_CALL OgaGenerator_TokenCount(const OgaGenerator* generator);
Returns the total number of tokens that have been added to the generator.Parameters:
  • generator: The generator to query
Returns: Number of tokens
OgaGenerator_SetModelInput
function
OgaResult* OGA_API_CALL OgaGenerator_SetModelInput(
    OgaGenerator* generator,
    const char* name,
    OgaTensor* tensor
);
Sets additional model inputs that GenAI doesn’t handle automatically (e.g., LoRA inputs).Parameters:
  • generator: The generator to set inputs on
  • name: Name of the model input (must match the model’s input name)
  • tensor: The tensor containing the input data
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_SetInputs
function
OgaResult* OGA_API_CALL OgaGenerator_SetInputs(
    OgaGenerator* generator,
    const OgaNamedTensors* named_tensors
);
Sets multiple model inputs at once.Parameters:
  • generator: The generator to set inputs on
  • named_tensors: Collection of named tensors
Returns: NULL on success, or OgaResult* containing error message on failure

Generation Loop

OgaGenerator_GenerateNextToken
function
OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator);
Generates the next token based on the current state. This computes logits from the model and updates the generator’s internal state.Parameters:
  • generator: The generator to advance
Returns: NULL on success, or OgaResult* containing error message on failureExample:
// Generation loop
while (!OgaGenerator_IsDone(generator)) {
    OgaResult* result = OgaGenerator_GenerateNextToken(generator);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        break;
    }
}
OgaGenerator_GetNextTokens
function
OgaResult* OGA_API_CALL OgaGenerator_GetNextTokens(
    const OgaGenerator* generator,
    const int32_t** out,
    size_t* out_count
);
Returns the most recently generated tokens. The count matches the batch size.Parameters:
  • generator: The generator to query
  • out: Pointer to store the token array (valid until next generator call)
  • out_count: Pointer to store the number of tokens
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_IsDone
function
bool OGA_API_CALL OgaGenerator_IsDone(OgaGenerator* generator);
Returns true if the generator has finished generating all sequences.Parameters:
  • generator: The generator to check
Returns: true if generation is complete, false otherwise
OgaGenerator_IsSessionTerminated
function
bool OGA_API_CALL OgaGenerator_IsSessionTerminated(const OgaGenerator* generator);
Returns true if the session has been terminated.Parameters:
  • generator: The generator to check
Returns: true if session is terminated, false otherwise

Sequence Access

OgaGenerator_GetSequenceCount
function
size_t OGA_API_CALL OgaGenerator_GetSequenceCount(const OgaGenerator* generator, size_t index);
Returns the number of tokens in the sequence at the given index.Parameters:
  • generator: The generator to query
  • index: The sequence index
Returns: Number of tokens in the sequence
OgaGenerator_GetSequenceData
function
const int32_t* OGA_API_CALL OgaGenerator_GetSequenceData(
    const OgaGenerator* generator,
    size_t index
);
Returns a pointer to the sequence data at the given index.Parameters:
  • generator: The generator to query
  • index: The sequence index
Returns: Pointer to the sequence data (owned by generator, valid until generator is destroyed)Example:
// Get the first sequence
size_t seq_length = OgaGenerator_GetSequenceCount(generator, 0);
const int32_t* seq_data = OgaGenerator_GetSequenceData(generator, 0);

// Decode the sequence
const char* output = NULL;
OgaResult* result = OgaTokenizerDecode(tokenizer, seq_data, seq_length, &output);
if (result == NULL) {
    printf("Output: %s\n", output);
    OgaDestroyString(output);
}

Logits Access and Modification

OgaGenerator_GetLogits
function
OgaResult* OGA_API_CALL OgaGenerator_GetLogits(OgaGenerator* generator, OgaTensor** out);
Returns a copy of the logits from the model as a CPU tensor. Only contains the last token logits.Parameters:
  • generator: The generator to get logits from
  • out: Pointer to store the logits tensor
Returns: NULL on success, or OgaResult* containing error message on failureNote: The returned tensor must be destroyed with OgaDestroyTensor()
OgaGenerator_SetLogits
function
OgaResult* OGA_API_CALL OgaGenerator_SetLogits(OgaGenerator* generator, OgaTensor* tensor);
Sets the logits for the generator. Useful for guided generation.Parameters:
  • generator: The generator to set logits on
  • tensor: The logits tensor (must have same shape as GetLogits output)
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_GetInput
function
OgaResult* OGA_API_CALL OgaGenerator_GetInput(
    const OgaGenerator* generator,
    const char* name,
    OgaTensor** out
);
Returns a copy of a model input as a CPU tensor.Parameters:
  • generator: The generator to query
  • name: Name of the input tensor
  • out: Pointer to store the input tensor
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_GetOutput
function
OgaResult* OGA_API_CALL OgaGenerator_GetOutput(
    const OgaGenerator* generator,
    const char* name,
    OgaTensor** out
);
Returns a copy of a model output as a CPU tensor.Parameters:
  • generator: The generator to query
  • name: Name of the output tensor
  • out: Pointer to store the output tensor
Returns: NULL on success, or OgaResult* containing error message on failure

Advanced Features

OgaGenerator_RewindTo
function
OgaResult* OGA_API_CALL OgaGenerator_RewindTo(OgaGenerator* generator, size_t new_length);
Rewinds the generator to a specific token length. Useful for backtracking during generation.Parameters:
  • generator: The generator to rewind
  • new_length: The desired token length after rewinding
Returns: NULL on success, or OgaResult* containing error message on failure
OgaGenerator_SetRuntimeOption
function
OgaResult* OGA_API_CALL OgaGenerator_SetRuntimeOption(
    OgaGenerator* generator,
    const char* key,
    const char* value
);
Sets a runtime option for the generator.Parameters:
  • generator: The generator to configure
  • key: The runtime option name
  • value: The runtime option value
Returns: NULL on success, or OgaResult* containing error message on failure

Complete Generation Example

#include "ort_genai_c.h"
#include <stdio.h>
#include <stdbool.h>

int main() {
    OgaResult* result = NULL;
    
    // Load model
    OgaModel* model = NULL;
    result = OgaCreateModel("/path/to/model", &model);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        return -1;
    }
    
    // Create tokenizer
    OgaTokenizer* tokenizer = NULL;
    result = OgaCreateTokenizer(model, &tokenizer);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        OgaDestroyModel(model);
        return -1;
    }
    
    // Encode input
    OgaSequences* sequences = NULL;
    OgaCreateSequences(&sequences);
    const char* prompt = "The quick brown fox";
    result = OgaTokenizerEncode(tokenizer, prompt, sequences);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        OgaDestroySequences(sequences);
        OgaDestroyTokenizer(tokenizer);
        OgaDestroyModel(model);
        return -1;
    }
    
    // Create generator params
    OgaGeneratorParams* params = NULL;
    result = OgaCreateGeneratorParams(model, &params);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        OgaDestroySequences(sequences);
        OgaDestroyTokenizer(tokenizer);
        OgaDestroyModel(model);
        return -1;
    }
    
    // Set generation parameters
    OgaGeneratorParamsSetSearchNumber(params, "max_length", 100);
    OgaGeneratorParamsSetSearchNumber(params, "temperature", 0.8);
    OgaGeneratorParamsSetSearchNumber(params, "top_p", 0.95);
    
    // Create generator
    OgaGenerator* generator = NULL;
    result = OgaCreateGenerator(model, params, &generator);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        OgaDestroyGeneratorParams(params);
        OgaDestroySequences(sequences);
        OgaDestroyTokenizer(tokenizer);
        OgaDestroyModel(model);
        return -1;
    }
    
    // Add input tokens
    result = OgaGenerator_AppendTokenSequences(generator, sequences);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        OgaDestroyGenerator(generator);
        OgaDestroyGeneratorParams(params);
        OgaDestroySequences(sequences);
        OgaDestroyTokenizer(tokenizer);
        OgaDestroyModel(model);
        return -1;
    }
    
    // Generate tokens
    printf("Generating...\n");
    while (!OgaGenerator_IsDone(generator)) {
        result = OgaGenerator_GenerateNextToken(generator);
        if (result != NULL) {
            fprintf(stderr, "Error during generation: %s\n", OgaResultGetError(result));
            OgaDestroyResult(result);
            break;
        }
    }
    
    // Get generated sequence
    size_t seq_length = OgaGenerator_GetSequenceCount(generator, 0);
    const int32_t* seq_data = OgaGenerator_GetSequenceData(generator, 0);
    
    // Decode output
    const char* output = NULL;
    result = OgaTokenizerDecode(tokenizer, seq_data, seq_length, &output);
    if (result == NULL) {
        printf("Generated text: %s\n", output);
        OgaDestroyString(output);
    } else {
        fprintf(stderr, "Error decoding: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
    }
    
    // Cleanup
    OgaDestroyGenerator(generator);
    OgaDestroyGeneratorParams(params);
    OgaDestroySequences(sequences);
    OgaDestroyTokenizer(tokenizer);
    OgaDestroyModel(model);
    OgaShutdown();
    
    return 0;
}

Streaming Generation Example

#include "ort_genai_c.h"
#include <stdio.h>

int main() {
    // ... (model and tokenizer creation as above)
    
    // Create tokenizer stream for incremental decoding
    OgaTokenizerStream* stream = NULL;
    OgaResult* result = OgaCreateTokenizerStream(tokenizer, &stream);
    if (result != NULL) {
        fprintf(stderr, "Error: %s\n", OgaResultGetError(result));
        OgaDestroyResult(result);
        return -1;
    }
    
    // ... (generator creation and input setup as above)
    
    // Generate with streaming output
    printf("Output: ");
    fflush(stdout);
    
    while (!OgaGenerator_IsDone(generator)) {
        result = OgaGenerator_GenerateNextToken(generator);
        if (result != NULL) {
            fprintf(stderr, "\nError: %s\n", OgaResultGetError(result));
            OgaDestroyResult(result);
            break;
        }
        
        // Get the newly generated token
        const int32_t* tokens = NULL;
        size_t token_count = 0;
        result = OgaGenerator_GetNextTokens(generator, &tokens, &token_count);
        if (result == NULL && token_count > 0) {
            // Decode token incrementally
            const char* chunk = NULL;
            result = OgaTokenizerStreamDecode(stream, tokens[0], &chunk);
            if (result == NULL && chunk != NULL && chunk[0] != '\0') {
                printf("%s", chunk);
                fflush(stdout);
            }
        }
    }
    printf("\n");
    
    // Cleanup
    OgaDestroyTokenizerStream(stream);
    OgaDestroyGenerator(generator);
    // ... (other cleanup)
    
    return 0;
}

See Also

C API Overview

Learn about memory management and error handling

Model Functions

Create and configure models

Build docs developers (and LLMs) love