Skip to main content

Overview

The useTokenizer hook manages a tokenizer instance for encoding text to token IDs and decoding token IDs back to text. It’s useful for text preprocessing, analyzing token counts, and understanding model inputs.

Import

import { useTokenizer } from 'react-native-executorch';

Hook Signature

const tokenizer = useTokenizer({ tokenizer, preventLoad }: TokenizerProps): TokenizerType

Parameters

tokenizer
object
required
Object containing tokenizer source
preventLoad
boolean
default:"false"
If true, prevents automatic tokenizer loading and downloading when the hook mounts

Return Value

Returns an object with the following properties and methods:

State Properties

isReady
boolean
Indicates whether the tokenizer has successfully loaded and is ready for use.
isGenerating
boolean
Indicates whether the tokenizer is currently processing data.
downloadProgress
number
Download progress as a value between 0 and 1.
error
RnExecutorchError | null
Contains error details if the tokenizer fails to load or encounters an error during processing.

Methods

encode
function
Converts a string into an array of token IDs.
encode(text: string): Promise<number[]>
Returns a promise that resolves to an array of token IDs.
decode
function
Converts an array of token IDs into a string.
decode(tokens: number[], skipSpecialTokens?: boolean): Promise<string>
Returns a promise that resolves to the decoded text string.
getVocabSize
function
Returns the size of the tokenizer’s vocabulary.
getVocabSize(): Promise<number>
Returns a promise that resolves to the vocabulary size.
idToToken
function
Returns the token string associated with the given ID.
idToToken(id: number): Promise<string>
Returns a promise that resolves to the token string.
tokenToId
function
Returns the ID associated with the given token string.
tokenToId(token: string): Promise<number>
Returns a promise that resolves to the token ID.

Usage Examples

Basic Encoding and Decoding

import { useTokenizer } from 'react-native-executorch';
import { useEffect, useState } from 'react';

function TokenizerDemo() {
  const [text, setText] = useState('Hello, world!');
  const [tokens, setTokens] = useState<number[]>([]);
  const [decodedText, setDecodedText] = useState('');
  
  const tokenizer = useTokenizer({
    tokenizer: {
      tokenizerSource: 'https://huggingface.co/.../tokenizer.json',
    },
  });
  
  const handleEncode = async () => {
    if (!tokenizer.isReady) return;
    
    try {
      const encoded = await tokenizer.encode(text);
      setTokens(encoded);
      console.log('Token IDs:', encoded);
    } catch (error) {
      console.error('Encoding failed:', error);
    }
  };
  
  const handleDecode = async () => {
    if (!tokenizer.isReady || tokens.length === 0) return;
    
    try {
      const decoded = await tokenizer.decode(tokens, true);
      setDecodedText(decoded);
      console.log('Decoded text:', decoded);
    } catch (error) {
      console.error('Decoding failed:', error);
    }
  };
  
  return (
    <View>
      <Text>Status: {tokenizer.isReady ? 'Ready' : 'Loading...'}</Text>
      
      <TextInput
        value={text}
        onChangeText={setText}
        placeholder="Enter text to tokenize"
      />
      
      <Button
        title="Encode"
        onPress={handleEncode}
        disabled={!tokenizer.isReady}
      />
      
      <Text>Tokens: {JSON.stringify(tokens)}</Text>
      <Text>Token count: {tokens.length}</Text>
      
      <Button
        title="Decode"
        onPress={handleDecode}
        disabled={!tokenizer.isReady || tokens.length === 0}
      />
      
      <Text>Decoded: {decodedText}</Text>
    </View>
  );
}

Token Count Calculator

import { useTokenizer } from 'react-native-executorch';
import { useState } from 'react';

function TokenCounter() {
  const [text, setText] = useState('');
  const [count, setCount] = useState(0);
  
  const tokenizer = useTokenizer({
    tokenizer: {
      tokenizerSource: require('./tokenizer.json'),
    },
  });
  
  const countTokens = async (input: string) => {
    if (!tokenizer.isReady) return;
    
    try {
      const tokens = await tokenizer.encode(input);
      setCount(tokens.length);
    } catch (error) {
      console.error('Token counting failed:', error);
    }
  };
  
  return (
    <View>
      <TextInput
        value={text}
        onChangeText={(value) => {
          setText(value);
          countTokens(value);
        }}
        multiline
        placeholder="Enter text to count tokens..."
      />
      
      <Text>Token count: {count}</Text>
      <Text>Character count: {text.length}</Text>
    </View>
  );
}

Vocabulary Inspector

import { useTokenizer } from 'react-native-executorch';
import { useEffect, useState } from 'react';

function VocabularyInspector() {
  const [vocabSize, setVocabSize] = useState(0);
  const [tokenId, setTokenId] = useState('');
  const [tokenString, setTokenString] = useState('');
  
  const tokenizer = useTokenizer({
    tokenizer: {
      tokenizerSource: 'https://example.com/tokenizer.json',
    },
  });
  
  useEffect(() => {
    const loadVocabInfo = async () => {
      if (!tokenizer.isReady) return;
      
      try {
        const size = await tokenizer.getVocabSize();
        setVocabSize(size);
      } catch (error) {
        console.error('Failed to get vocab size:', error);
      }
    };
    
    loadVocabInfo();
  }, [tokenizer.isReady]);
  
  const lookupToken = async (id: number) => {
    try {
      const token = await tokenizer.idToToken(id);
      setTokenString(token);
    } catch (error) {
      console.error('Token lookup failed:', error);
    }
  };
  
  const lookupId = async (token: string) => {
    try {
      const id = await tokenizer.tokenToId(token);
      setTokenId(id.toString());
    } catch (error) {
      console.error('ID lookup failed:', error);
    }
  };
  
  return (
    <View>
      <Text>Vocabulary size: {vocabSize}</Text>
      
      <TextInput
        placeholder="Enter token ID"
        keyboardType="numeric"
        onChangeText={(value) => {
          const id = parseInt(value);
          if (!isNaN(id)) lookupToken(id);
        }}
      />
      <Text>Token: {tokenString}</Text>
      
      <TextInput
        placeholder="Enter token string"
        onChangeText={lookupId}
      />
      <Text>ID: {tokenId}</Text>
    </View>
  );
}

Decode with Special Tokens

import { useTokenizer } from 'react-native-executorch';
import { useState } from 'react';

function SpecialTokensDemo() {
  const [tokens] = useState([1, 15043, 29892, 3186, 29991, 2]); // Example tokens
  const [withSpecial, setWithSpecial] = useState('');
  const [withoutSpecial, setWithoutSpecial] = useState('');
  
  const tokenizer = useTokenizer({
    tokenizer: {
      tokenizerSource: require('./tokenizer.json'),
    },
  });
  
  useEffect(() => {
    const decodeTokens = async () => {
      if (!tokenizer.isReady) return;
      
      try {
        // Decode with special tokens
        const decoded1 = await tokenizer.decode(tokens, false);
        setWithSpecial(decoded1);
        
        // Decode without special tokens
        const decoded2 = await tokenizer.decode(tokens, true);
        setWithoutSpecial(decoded2);
      } catch (error) {
        console.error('Decoding failed:', error);
      }
    };
    
    decodeTokens();
  }, [tokenizer.isReady]);
  
  return (
    <View>
      <Text>Tokens: {JSON.stringify(tokens)}</Text>
      <Text>With special tokens: "{withSpecial}"</Text>
      <Text>Without special tokens: "{withoutSpecial}"</Text>
    </View>
  );
}

Error Handling

import { useTokenizer } from 'react-native-executorch';
import { useEffect } from 'react';

function TokenizerWithErrorHandling() {
  const tokenizer = useTokenizer({
    tokenizer: {
      tokenizerSource: 'https://example.com/tokenizer.json',
    },
  });
  
  useEffect(() => {
    if (tokenizer.error) {
      console.error('Tokenizer error:', tokenizer.error.message);
      console.error('Error code:', tokenizer.error.code);
    }
  }, [tokenizer.error]);
  
  const safeEncode = async (text: string) => {
    if (!tokenizer.isReady) {
      throw new Error('Tokenizer is not ready');
    }
    
    if (tokenizer.isGenerating) {
      throw new Error('Tokenizer is busy');
    }
    
    try {
      return await tokenizer.encode(text);
    } catch (error) {
      console.error('Encoding failed:', error);
      throw error;
    }
  };
  
  return (
    <View>
      {tokenizer.error && (
        <Text style={{ color: 'red' }}>
          Error: {tokenizer.error.message}
        </Text>
      )}
    </View>
  );
}

Notes

The tokenizer automatically loads when the hook mounts unless preventLoad is set to true.
Do not call tokenizer methods while isGenerating is true. Each method will throw an error if called during processing.
Use the tokenizer to calculate token counts for LLM inputs and manage context windows effectively.

See Also

Build docs developers (and LLMs) love