The OgaGeneratorParams class configures how text generation is performed. It provides methods to set search options like temperature, top-p, max length, and other parameters that control the generation process.
Class Definition
struct OgaGeneratorParams : OgaAbstract {
static std::unique_ptr<OgaGeneratorParams> Create(const OgaModel& model);
void SetSearchOption(const char* name, double value);
void SetSearchOptionBool(const char* name, bool value);
void SetGuidance(const char* type, const char* data, bool enable_ff_tokens = false);
double GetSearchNumber(const char* name) const;
bool GetSearchBool(const char* name) const;
};
Defined in: ~/workspace/source/src/ort_genai.h:412
Methods
Create()
Create generator parameters for a model.
static std::unique_ptr<OgaGeneratorParams> Create(const OgaModel& model)
The model to create parameters for
Returns: std::unique_ptr<OgaGeneratorParams> - A unique pointer to the created parameters
Throws: std::runtime_error if parameter creation fails
Example
auto model = OgaModel::Create("phi-2");
auto params = OgaGeneratorParams::Create(*model);
SetSearchOption()
Set a numeric search option for generation.
void SetSearchOption(const char* name, double value)
The name of the search option
Throws: std::runtime_error if the option name is invalid or value is out of range
Common Search Options
| Option | Type | Description | Default |
|---|
max_length | int | Maximum total sequence length (prompt + generation) | 2048 |
min_length | int | Minimum sequence length | 0 |
batch_size | int | Number of sequences to generate | 1 |
temperature | float | Sampling temperature (higher = more random) | 1.0 |
top_p | float | Nucleus sampling threshold | 1.0 |
top_k | int | Top-K sampling threshold | 50 |
repetition_penalty | float | Penalty for repeated tokens | 1.0 |
length_penalty | float | Penalty for sequence length | 1.0 |
num_beams | int | Number of beams for beam search | 1 |
num_return_sequences | int | Number of sequences to return | 1 |
diversity_penalty | float | Penalty for diverse beam search | 0.0 |
Example
auto params = OgaGeneratorParams::Create(*model);
// Set maximum generation length
params->SetSearchOption("max_length", 200);
// Set batch size
params->SetSearchOption("batch_size", 1);
// Configure sampling
params->SetSearchOption("temperature", 0.7);
params->SetSearchOption("top_p", 0.9);
params->SetSearchOption("top_k", 40);
// Set repetition penalty
params->SetSearchOption("repetition_penalty", 1.1);
From ~/workspace/source/src/ort_genai.h:30
SetSearchOptionBool()
Set a boolean search option.
void SetSearchOptionBool(const char* name, bool value)
The name of the boolean search option
Common Boolean Options
| Option | Description | Default |
|---|
do_sample | Enable sampling (vs greedy decoding) | false |
early_stopping | Stop beam search when enough candidates found | false |
past_present_share_buffer | Share KV cache buffers for memory efficiency | true |
Example
// Enable sampling
params->SetSearchOptionBool("do_sample", true);
// Enable early stopping for beam search
params->SetSearchOptionBool("early_stopping", true);
SetGuidance()
Set guidance for constrained generation (e.g., JSON schema, function calling).
void SetGuidance(const char* type, const char* data, bool enable_ff_tokens = false)
The guidance type (e.g., “json”, “regex”)
The guidance data (e.g., JSON schema, regex pattern)
Enable fast-forward tokens optimization (default: false)
Throws: std::runtime_error if guidance setup fails
Example
// JSON schema guidance for structured output
const char* json_schema = R"({
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "number"}
},
"required": ["name", "age"]
})";
params->SetGuidance("json", json_schema);
From ~/workspace/source/examples/c/src/model_qa.cpp:99
GetSearchNumber()
Get the current value of a numeric search option.
double GetSearchNumber(const char* name) const
The name of the search option
Returns: double - The current value of the option
Throws: std::runtime_error if the option name is invalid
Example
double max_length = params->GetSearchNumber("max_length");
std::cout << "Max length: " << max_length << std::endl;
GetSearchBool()
Get the current value of a boolean search option.
bool GetSearchBool(const char* name) const
The name of the boolean search option
Returns: bool - The current value of the option
Throws: std::runtime_error if the option name is invalid
Example
bool do_sample = params->GetSearchBool("do_sample");
std::cout << "Sampling enabled: " << (do_sample ? "yes" : "no") << std::endl;
Generation Strategies
Greedy Decoding (Default)
Simply select the most likely token at each step.
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("max_length", 100);
// No additional options needed - greedy is default
Sampling
Randomly sample from the probability distribution.
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOptionBool("do_sample", true);
params->SetSearchOption("temperature", 0.8);
params->SetSearchOption("top_p", 0.95);
params->SetSearchOption("top_k", 50);
Beam Search
Explore multiple hypotheses in parallel.
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("num_beams", 5);
params->SetSearchOption("num_return_sequences", 3);
params->SetSearchOptionBool("early_stopping", true);
Diverse Beam Search
Generate diverse outputs using beam search.
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("num_beams", 5);
params->SetSearchOption("num_return_sequences", 3);
params->SetSearchOption("diversity_penalty", 1.0);
Complete Examples
Basic Configuration
From ~/workspace/source/src/ort_genai.h:29:
#include "ort_genai.h"
#include <iostream>
int main() {
OgaHandle handle;
try {
auto model = OgaModel::Create("phi-2");
auto tokenizer = OgaTokenizer::Create(*model);
// Create and configure generation parameters
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("max_length", 200);
params->SetSearchOption("batch_size", 1);
// Encode prompt
auto sequences = OgaSequences::Create();
tokenizer->Encode("A great recipe for Kung Pao chicken is ", *sequences);
// Create generator with parameters
auto generator = OgaGenerator::Create(*model, *params);
generator->AppendTokenSequences(*sequences);
// Generate...
while (!generator->IsDone()) {
generator->GenerateNextToken();
}
// Decode output
auto output_sequence = generator->GetSequenceData(0);
auto output_length = generator->GetSequenceCount(0);
auto output_string = tokenizer->Decode(output_sequence, output_length);
std::cout << "Output: " << output_string << std::endl;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return -1;
}
return 0;
}
Advanced Sampling Configuration
// Configure advanced sampling with temperature, top-p, and penalties
auto params = OgaGeneratorParams::Create(*model);
// Basic length constraints
params->SetSearchOption("max_length", 512);
params->SetSearchOption("min_length", 50);
// Enable sampling
params->SetSearchOptionBool("do_sample", true);
// Sampling parameters
params->SetSearchOption("temperature", 0.7); // Lower = more focused
params->SetSearchOption("top_p", 0.9); // Nucleus sampling
params->SetSearchOption("top_k", 40); // Limit to top 40 tokens
// Penalties to reduce repetition
params->SetSearchOption("repetition_penalty", 1.2);
params->SetSearchOption("length_penalty", 1.0);
// Create generator
auto generator = OgaGenerator::Create(*model, *params);
Constrained Generation with JSON Schema
From ~/workspace/source/examples/c/src/model_qa.cpp:98:
// Define JSON schema for structured output
const char* json_schema = R"({
"type": "object",
"properties": {
"answer": {"type": "string"},
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
"sources": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["answer", "confidence"]
})";
// Configure parameters with guidance
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("max_length", 500);
params->SetGuidance("json", json_schema);
// Generate with schema constraints
auto generator = OgaGenerator::Create(*model, *params);
// ... generation code ...
Multi-Sequence Beam Search
// Configure beam search to return multiple diverse outputs
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("max_length", 200);
params->SetSearchOption("num_beams", 5); // Use 5 beams
params->SetSearchOption("num_return_sequences", 3); // Return top 3
params->SetSearchOption("diversity_penalty", 0.5); // Encourage diversity
params->SetSearchOptionBool("early_stopping", true); // Stop when done
auto generator = OgaGenerator::Create(*model, *params);
generator->AppendTokenSequences(*sequences);
while (!generator->IsDone()) {
generator->GenerateNextToken();
}
// Get all returned sequences
for (size_t i = 0; i < 3; ++i) {
auto seq_data = generator->GetSequenceData(i);
auto seq_length = generator->GetSequenceCount(i);
auto text = tokenizer->Decode(seq_data, seq_length);
std::cout << "Sequence " << i << ": " << text << "\n\n";
}
Parameter Validation
The library validates parameters when creating the generator. Invalid values will throw exceptions:
try {
auto params = OgaGeneratorParams::Create(*model);
// This might throw if value is invalid
params->SetSearchOption("temperature", -1.0); // Temperature must be > 0
auto generator = OgaGenerator::Create(*model, *params);
} catch (const std::runtime_error& e) {
std::cerr << "Invalid parameter: " << e.what() << std::endl;
}
See Also