Skip to main content

count_tokens

Count the number of tokens in content without making a generation request.

Method Signature

def count_tokens(
    self,
    *,
    model: str,
    contents: Union[ContentListUnion, ContentListUnionDict],
    config: Optional[CountTokensConfig] = None,
) -> CountTokensResponse
async def count_tokens(
    self,
    *,
    model: str,
    contents: Union[ContentListUnion, ContentListUnionDict],
    config: Optional[CountTokensConfig] = None,
) -> CountTokensResponse

Description

Counts the number of tokens in the given content. This is useful for:
  • Estimating API costs before making requests
  • Ensuring content fits within model token limits
  • Managing context windows
  • Optimizing prompt engineering
Supports multimodal input for Gemini models.

Parameters

model
str
required
The model to use for token counting. Different models have different tokenizers.Examples:
  • 'gemini-2.0-flash'
  • 'gemini-2.0-flash-exp'
  • 'gemini-1.5-pro'
contents
ContentListUnion
required
The content to count tokens for.Can be:
  • A string: 'What is your name?'
  • A list of Content objects
  • A list of Part objects
  • Multimodal content with text, images, video, and audio
config
CountTokensConfig
Configuration for token counting.

Response

total_tokens
int
Total number of tokens in the content
cached_content_token_count
int
Number of tokens from cached content (if using context caching)

Code Examples

Basic Token Counting

from google import genai

client = genai.Client(api_key='your-api-key')

response = client.models.count_tokens(
    model='gemini-2.0-flash',
    contents='What is your name?',
)

print(f"Total tokens: {response.total_tokens}")
# Output: Total tokens: 5

Count Tokens in Conversation

from google.genai import types

conversation = [
    types.Content(role='user', parts=[types.Part.from_text('Hello!')]),
    types.Content(role='model', parts=[types.Part.from_text('Hi there! How can I help you?')]),
    types.Content(role='user', parts=[types.Part.from_text('Tell me about Python')]),
]

response = client.models.count_tokens(
    model='gemini-2.0-flash',
    contents=conversation,
)

print(f"Conversation tokens: {response.total_tokens}")

Count Multimodal Tokens

response = client.models.count_tokens(
    model='gemini-2.0-flash',
    contents=[
        types.Part.from_text('Describe this image'),
        types.Part.from_uri(
            file_uri='gs://generativeai-downloads/images/scones.jpg',
            mime_type='image/jpeg'
        ),
    ],
)

print(f"Multimodal tokens: {response.total_tokens}")
# Includes both text and image tokens

With System Instructions (Vertex AI)

client = genai.Client(vertexai=True, project='my-project', location='us-central1')

response = client.models.count_tokens(
    model='gemini-2.0-flash',
    contents='What is the weather?',
    config=types.CountTokensConfig(
        system_instruction='You are a helpful weather assistant.',
        tools=[weather_tool],
    ),
)

print(f"Total tokens (including system instruction and tools): {response.total_tokens}")

Check Before Generation

def safe_generate(client, model, contents, max_tokens=30000):
    """Generate content only if within token limit."""
    token_response = client.models.count_tokens(
        model=model,
        contents=contents,
    )
    
    if token_response.total_tokens > max_tokens:
        raise ValueError(f"Content exceeds {max_tokens} tokens: {token_response.total_tokens}")
    
    return client.models.generate_content(
        model=model,
        contents=contents,
    )

# Use it
try:
    response = safe_generate(client, 'gemini-2.0-flash', 'What is AI?')
    print(response.text)
except ValueError as e:
    print(f"Error: {e}")

compute_tokens

Returns detailed token information including individual token IDs and strings.

Method Signature

def compute_tokens(
    self,
    *,
    model: str,
    contents: Union[ContentListUnion, ContentListUnionDict],
    config: Optional[ComputeTokensConfig] = None,
) -> ComputeTokensResponse
async def compute_tokens(
    self,
    *,
    model: str,
    contents: Union[ContentListUnion, ContentListUnionDict],
    config: Optional[ComputeTokensConfig] = None,
) -> ComputeTokensResponse

Description

Given a list of contents, returns a corresponding TokensInfo containing the list of tokens and list of token IDs. This method is only supported by Vertex AI API (not Gemini Developer API). Useful for:
  • Understanding model tokenization
  • Debugging prompt engineering
  • Analyzing token distribution
  • Building custom tokenization tools

Parameters

model
str
required
The model to use for tokenization.Examples:
  • 'gemini-2.0-flash'
  • 'gemini-1.5-pro'
contents
ContentListUnion
required
The content to compute tokens for.
config
ComputeTokensConfig
Configuration for token computation (reserved for future use)

Response

tokens_info
list[TokensInfo]
List of token information for each content.

Code Examples

Basic Token Computation

from google import genai

client = genai.Client(vertexai=True, project='my-project', location='us-central1')

response = client.models.compute_tokens(
    model='gemini-2.0-flash',
    contents='What is your name?',
)

print(f"Tokens: {response.tokens_info[0].tokens}")
print(f"Token IDs: {response.tokens_info[0].token_ids}")
# Output:
# Tokens: [b'What', b' is', b' your', b' name', b'?']
# Token IDs: ['1841', '374', '574', '836', '30']

Analyze Tokenization

text = "The quick brown fox jumps over the lazy dog."

response = client.models.compute_tokens(
    model='gemini-2.0-flash',
    contents=text,
)

tokens_info = response.tokens_info[0]
print(f"Original text: {text}")
print(f"Number of tokens: {len(tokens_info.tokens)}")
print("\nTokenization:")
for token, token_id in zip(tokens_info.tokens, tokens_info.token_ids):
    print(f"  '{token.decode('utf-8')}' -> ID: {token_id}")

Compare Tokenization Across Content

from google.genai import types

contents = [
    types.Content(role='user', parts=[types.Part.from_text('Hello!')]),
    types.Content(role='model', parts=[types.Part.from_text('Hi there!')]),
]

response = client.models.compute_tokens(
    model='gemini-2.0-flash',
    contents=contents,
)

for i, tokens_info in enumerate(response.tokens_info):
    print(f"\nContent {i+1} ({tokens_info.role}):")
    print(f"  Tokens: {len(tokens_info.tokens)}")
    print(f"  Token breakdown: {[t.decode('utf-8') for t in tokens_info.tokens]}")

Analyze Special Characters

special_text = "Hello! 你好 🌍"

response = client.models.compute_tokens(
    model='gemini-2.0-flash',
    contents=special_text,
)

tokens_info = response.tokens_info[0]
print("Token analysis for special characters:")
for token, token_id in zip(tokens_info.tokens, tokens_info.token_ids):
    try:
        decoded = token.decode('utf-8')
    except:
        decoded = f"<binary: {token.hex()}>"
    print(f"  '{decoded}' -> ID: {token_id}")

Async Usage

import asyncio
from google import genai

client = genai.Client(api_key='your-api-key')

async def count():
    # count_tokens
    response = await client.aio.models.count_tokens(
        model='gemini-2.0-flash',
        contents='What is AI?',
    )
    print(f"Tokens: {response.total_tokens}")

asyncio.run(count())
import asyncio
from google import genai

client = genai.Client(vertexai=True, project='my-project', location='us-central1')

async def compute():
    # compute_tokens (Vertex AI only)
    response = await client.aio.models.compute_tokens(
        model='gemini-2.0-flash',
        contents='What is AI?',
    )
    print(f"Token IDs: {response.tokens_info[0].token_ids}")

asyncio.run(compute())

Comparison

Featurecount_tokenscompute_tokens
AvailabilityGemini API & Vertex AIVertex AI only
ReturnsToken countToken strings + IDs
Use CaseQuick countDetailed analysis
PerformanceFasterSlightly slower
System InstructionsVertex AI onlyNot supported

Notes

  • Token counts may vary slightly between model versions
  • Multimodal tokens (images, video, audio) are counted differently than text
  • Use count_tokens before generation to avoid exceeding limits
  • Use compute_tokens to understand how models tokenize your input
  • compute_tokens is only available on Vertex AI
  • System instructions and tools can be included in token count (Vertex AI only)
  • Context caching can significantly reduce token usage for repeated content

Build docs developers (and LLMs) love