Skip to main content
The OgaTokenizer class provides methods to encode text into token sequences and decode token sequences back into text. It’s essential for preparing input for the model and interpreting the generated output.

Class Definition

struct OgaTokenizer : OgaAbstract {
  static std::unique_ptr<OgaTokenizer> Create(const OgaModel& model);
  
  void Encode(const char* str, OgaSequences& sequences) const;
  OgaString Decode(const int32_t* tokens_data, size_t tokens_length) const;
  
  int32_t GetBosTokenId() const;
  std::span<const int32_t> GetEosTokenIds() const;
  int32_t GetPadTokenId() const;
  int32_t ToTokenId(const char* str) const;
  
  std::unique_ptr<OgaTensor> EncodeBatch(const char** strings, size_t count) const;
  std::unique_ptr<OgaStringArray> DecodeBatch(const OgaTensor& tensor) const;
  
  OgaString ApplyChatTemplate(const char* template_str, const char* messages, 
                              const char* tools, bool add_generation_prompt) const;
  void UpdateOptions(const char* const* keys, const char* const* values, size_t num_options);
};
Defined in: ~/workspace/source/src/ort_genai.h:301

Methods

Create()

Create a tokenizer instance from a model.
static std::unique_ptr<OgaTokenizer> Create(const OgaModel& model)
model
const OgaModel&
required
The model to create the tokenizer from
Returns: std::unique_ptr<OgaTokenizer> - A unique pointer to the created tokenizer

Example

auto model = OgaModel::Create("phi-2");
auto tokenizer = OgaTokenizer::Create(*model);

Encode()

Encode a string into token sequences.
void Encode(const char* str, OgaSequences& sequences) const
str
const char*
required
The input text to encode
sequences
OgaSequences&
required
The sequences object to store the encoded tokens
Throws: std::runtime_error if encoding fails

Example

auto sequences = OgaSequences::Create();
tokenizer->Encode("A great recipe for Kung Pao chicken is ", *sequences);

// Access the encoded tokens
const int32_t* tokens = sequences->SequenceData(0);
size_t token_count = sequences->SequenceCount(0);

Decode()

Decode token sequences back into text.
OgaString Decode(const int32_t* tokens_data, size_t tokens_length) const
tokens_data
const int32_t*
required
Pointer to the token data array
tokens_length
size_t
required
Number of tokens in the array
Returns: OgaString - The decoded text string

Example

// Decode tokens from generator output
auto output_sequence = generator->GetSequenceData(0);
size_t output_length = generator->GetSequenceCount(0);
auto output_string = tokenizer->Decode(output_sequence, output_length);

std::cout << "Output: " << output_string << std::endl;

Example with std::span (C++20)

#if OGA_USE_SPAN
std::span<const int32_t> tokens = generator->GetSequence(0);
auto output_string = tokenizer->Decode(tokens);
#endif

GetBosTokenId()

Get the beginning-of-sequence token ID.
int32_t GetBosTokenId() const
Returns: int32_t - The BOS token ID

Example

int32_t bos_id = tokenizer->GetBosTokenId();
std::cout << "BOS token ID: " << bos_id << std::endl;

GetEosTokenIds()

Get the end-of-sequence token IDs.
std::span<const int32_t> GetEosTokenIds() const  // C++20
std::vector<int32_t> GetEosTokenIds() const      // Pre-C++20
Returns: A span or vector of EOS token IDs

Example

auto eos_ids = tokenizer->GetEosTokenIds();
for (int32_t eos_id : eos_ids) {
  std::cout << "EOS token ID: " << eos_id << std::endl;
}

GetPadTokenId()

Get the padding token ID.
int32_t GetPadTokenId() const
Returns: int32_t - The padding token ID

ToTokenId()

Convert a string to its corresponding token ID.
int32_t ToTokenId(const char* str) const
str
const char*
required
The string to convert
Returns: int32_t - The token ID for the string

Example

int32_t token_id = tokenizer->ToTokenId("Hello");
std::cout << "Token ID for 'Hello': " << token_id << std::endl;

EncodeBatch()

Encode multiple strings in a batch.
std::unique_ptr<OgaTensor> EncodeBatch(const char** strings, size_t count) const
strings
const char**
required
Array of strings to encode
count
size_t
required
Number of strings in the array
Returns: std::unique_ptr<OgaTensor> - Tensor containing the encoded batch

DecodeBatch()

Decode a batch of token sequences.
std::unique_ptr<OgaStringArray> DecodeBatch(const OgaTensor& tensor) const
tensor
const OgaTensor&
required
Tensor containing the token sequences to decode
Returns: std::unique_ptr<OgaStringArray> - Array of decoded strings

ApplyChatTemplate()

Apply a chat template to format messages.
OgaString ApplyChatTemplate(const char* template_str, const char* messages, 
                           const char* tools, bool add_generation_prompt) const
template_str
const char*
required
The chat template string (can be nullptr to use default)
messages
const char*
required
JSON string containing the chat messages
tools
const char*
JSON string containing tool definitions (optional)
add_generation_prompt
bool
required
Whether to add the generation prompt
Returns: OgaString - The formatted prompt with chat template applied

Example

const char* messages = R"([
  {"role": "system", "content": "You are a helpful assistant."},
  {"role": "user", "content": "What is the weather?"}
])";

auto prompt = tokenizer->ApplyChatTemplate(nullptr, messages, nullptr, true);
std::cout << "Formatted prompt: " << prompt << std::endl;

UpdateOptions()

Update tokenizer options.
void UpdateOptions(const char* const* keys, const char* const* values, size_t num_options)
keys
const char* const*
required
Array of option keys
values
const char* const*
required
Array of option values
num_options
size_t
required
Number of options

Streaming Tokenization

For streaming output, use OgaTokenizerStream to decode tokens one at a time:

OgaTokenizerStream

struct OgaTokenizerStream : OgaAbstract {
  static std::unique_ptr<OgaTokenizerStream> Create(const OgaTokenizer& tokenizer);
  const char* Decode(int32_t token);
};
Defined in: ~/workspace/source/src/ort_genai.h:385

Example

// Create a streaming tokenizer
auto stream = OgaTokenizerStream::Create(*tokenizer);

// Decode tokens one at a time during generation
while (!generator->IsDone()) {
  generator->GenerateNextToken();
  
  const auto new_token = generator->GetNextTokens()[0];
  const char* chunk = stream->Decode(new_token);
  std::cout << chunk << std::flush;
}

Complete Example

From ~/workspace/source/examples/c/src/model_qa.cpp:126:
#include "ort_genai.h"
#include <iostream>

int main() {
  OgaHandle handle;
  
  try {
    // Create model and tokenizer
    auto model = OgaModel::Create("phi-2");
    auto tokenizer = OgaTokenizer::Create(*model);
    auto stream = OgaTokenizerStream::Create(*tokenizer);
    
    // Encode input text
    auto sequences = OgaSequences::Create();
    tokenizer->Encode("A great recipe for Kung Pao chicken is ", *sequences);
    
    // Create generator and append tokens
    auto params = OgaGeneratorParams::Create(*model);
    params->SetSearchOption("max_length", 200);
    
    auto generator = OgaGenerator::Create(*model, *params);
    generator->AppendTokenSequences(*sequences);
    
    // Generate and decode output
    std::cout << "Output: ";
    while (!generator->IsDone()) {
      generator->GenerateNextToken();
      const auto new_token = generator->GetNextTokens()[0];
      std::cout << stream->Decode(new_token) << std::flush;
    }
    std::cout << std::endl;
    
    // Or decode the full sequence at once
    auto output_sequence = generator->GetSequenceData(0);
    auto output_length = generator->GetSequenceCount(0);
    auto output_string = tokenizer->Decode(output_sequence, output_length);
    
  } catch (const std::exception& e) {
    std::cerr << "Error: " << e.what() << std::endl;
    return -1;
  }
  
  return 0;
}

See Also

Build docs developers (and LLMs) love