The OgaTokenizer class provides methods to encode text into token sequences and decode token sequences back into text. It’s essential for preparing input for the model and interpreting the generated output.
Class Definition
struct OgaTokenizer : OgaAbstract {
static std::unique_ptr<OgaTokenizer> Create(const OgaModel& model);
void Encode(const char* str, OgaSequences& sequences) const;
OgaString Decode(const int32_t* tokens_data, size_t tokens_length) const;
int32_t GetBosTokenId() const;
std::span<const int32_t> GetEosTokenIds() const;
int32_t GetPadTokenId() const;
int32_t ToTokenId(const char* str) const;
std::unique_ptr<OgaTensor> EncodeBatch(const char** strings, size_t count) const;
std::unique_ptr<OgaStringArray> DecodeBatch(const OgaTensor& tensor) const;
OgaString ApplyChatTemplate(const char* template_str, const char* messages,
const char* tools, bool add_generation_prompt) const;
void UpdateOptions(const char* const* keys, const char* const* values, size_t num_options);
};
Defined in: ~/workspace/source/src/ort_genai.h:301
Methods
Create()
Create a tokenizer instance from a model.
static std::unique_ptr<OgaTokenizer> Create(const OgaModel& model)
The model to create the tokenizer from
Returns: std::unique_ptr<OgaTokenizer> - A unique pointer to the created tokenizer
Example
auto model = OgaModel::Create("phi-2");
auto tokenizer = OgaTokenizer::Create(*model);
Encode()
Encode a string into token sequences.
void Encode(const char* str, OgaSequences& sequences) const
The sequences object to store the encoded tokens
Throws: std::runtime_error if encoding fails
Example
auto sequences = OgaSequences::Create();
tokenizer->Encode("A great recipe for Kung Pao chicken is ", *sequences);
// Access the encoded tokens
const int32_t* tokens = sequences->SequenceData(0);
size_t token_count = sequences->SequenceCount(0);
Decode()
Decode token sequences back into text.
OgaString Decode(const int32_t* tokens_data, size_t tokens_length) const
Pointer to the token data array
Number of tokens in the array
Returns: OgaString - The decoded text string
Example
// Decode tokens from generator output
auto output_sequence = generator->GetSequenceData(0);
size_t output_length = generator->GetSequenceCount(0);
auto output_string = tokenizer->Decode(output_sequence, output_length);
std::cout << "Output: " << output_string << std::endl;
Example with std::span (C++20)
#if OGA_USE_SPAN
std::span<const int32_t> tokens = generator->GetSequence(0);
auto output_string = tokenizer->Decode(tokens);
#endif
GetBosTokenId()
Get the beginning-of-sequence token ID.
int32_t GetBosTokenId() const
Returns: int32_t - The BOS token ID
Example
int32_t bos_id = tokenizer->GetBosTokenId();
std::cout << "BOS token ID: " << bos_id << std::endl;
GetEosTokenIds()
Get the end-of-sequence token IDs.
std::span<const int32_t> GetEosTokenIds() const // C++20
std::vector<int32_t> GetEosTokenIds() const // Pre-C++20
Returns: A span or vector of EOS token IDs
Example
auto eos_ids = tokenizer->GetEosTokenIds();
for (int32_t eos_id : eos_ids) {
std::cout << "EOS token ID: " << eos_id << std::endl;
}
GetPadTokenId()
Get the padding token ID.
int32_t GetPadTokenId() const
Returns: int32_t - The padding token ID
ToTokenId()
Convert a string to its corresponding token ID.
int32_t ToTokenId(const char* str) const
Returns: int32_t - The token ID for the string
Example
int32_t token_id = tokenizer->ToTokenId("Hello");
std::cout << "Token ID for 'Hello': " << token_id << std::endl;
EncodeBatch()
Encode multiple strings in a batch.
std::unique_ptr<OgaTensor> EncodeBatch(const char** strings, size_t count) const
Array of strings to encode
Number of strings in the array
Returns: std::unique_ptr<OgaTensor> - Tensor containing the encoded batch
DecodeBatch()
Decode a batch of token sequences.
std::unique_ptr<OgaStringArray> DecodeBatch(const OgaTensor& tensor) const
Tensor containing the token sequences to decode
Returns: std::unique_ptr<OgaStringArray> - Array of decoded strings
ApplyChatTemplate()
Apply a chat template to format messages.
OgaString ApplyChatTemplate(const char* template_str, const char* messages,
const char* tools, bool add_generation_prompt) const
The chat template string (can be nullptr to use default)
JSON string containing the chat messages
JSON string containing tool definitions (optional)
Whether to add the generation prompt
Returns: OgaString - The formatted prompt with chat template applied
Example
const char* messages = R"([
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the weather?"}
])";
auto prompt = tokenizer->ApplyChatTemplate(nullptr, messages, nullptr, true);
std::cout << "Formatted prompt: " << prompt << std::endl;
UpdateOptions()
Update tokenizer options.
void UpdateOptions(const char* const* keys, const char* const* values, size_t num_options)
keys
const char* const*
required
Array of option keys
values
const char* const*
required
Array of option values
Streaming Tokenization
For streaming output, use OgaTokenizerStream to decode tokens one at a time:
OgaTokenizerStream
struct OgaTokenizerStream : OgaAbstract {
static std::unique_ptr<OgaTokenizerStream> Create(const OgaTokenizer& tokenizer);
const char* Decode(int32_t token);
};
Defined in: ~/workspace/source/src/ort_genai.h:385
Example
// Create a streaming tokenizer
auto stream = OgaTokenizerStream::Create(*tokenizer);
// Decode tokens one at a time during generation
while (!generator->IsDone()) {
generator->GenerateNextToken();
const auto new_token = generator->GetNextTokens()[0];
const char* chunk = stream->Decode(new_token);
std::cout << chunk << std::flush;
}
Complete Example
From ~/workspace/source/examples/c/src/model_qa.cpp:126:
#include "ort_genai.h"
#include <iostream>
int main() {
OgaHandle handle;
try {
// Create model and tokenizer
auto model = OgaModel::Create("phi-2");
auto tokenizer = OgaTokenizer::Create(*model);
auto stream = OgaTokenizerStream::Create(*tokenizer);
// Encode input text
auto sequences = OgaSequences::Create();
tokenizer->Encode("A great recipe for Kung Pao chicken is ", *sequences);
// Create generator and append tokens
auto params = OgaGeneratorParams::Create(*model);
params->SetSearchOption("max_length", 200);
auto generator = OgaGenerator::Create(*model, *params);
generator->AppendTokenSequences(*sequences);
// Generate and decode output
std::cout << "Output: ";
while (!generator->IsDone()) {
generator->GenerateNextToken();
const auto new_token = generator->GetNextTokens()[0];
std::cout << stream->Decode(new_token) << std::flush;
}
std::cout << std::endl;
// Or decode the full sequence at once
auto output_sequence = generator->GetSequenceData(0);
auto output_length = generator->GetSequenceCount(0);
auto output_string = tokenizer->Decode(output_sequence, output_length);
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return -1;
}
return 0;
}
See Also