Tinbox provides a flexible translation system with multiple algorithms for different use cases. The system uses a protocol-based interface for translator implementations.
ModelInterface
Protocol defining the interface for LLM translation models.
from typing import Protocol
from tinbox.core.translation import ModelInterface, TranslationRequest, TranslationResponse
class ModelInterface ( Protocol ):
async def translate (
self ,
request : TranslationRequest,
) -> TranslationResponse:
"""Translate content using the model."""
...
async def validate_model ( self ) -> bool :
"""Check if the model is available and properly configured."""
...
translate
Translate content according to a translation request.
request
TranslationRequest
required
Translation request containing source/target languages, content, and configuration.
Translation response with translated text, token usage, cost, and timing information.
Raised if translation fails due to API errors, network issues, or invalid configuration.
validate_model
Validate that the model is properly configured and accessible.
True if the model is available and can be used for translation.
TranslationRequest
Configuration for a single translation request.
from tinbox.core.translation import TranslationRequest
from tinbox.core.types import ModelType
# Basic text translation
request = TranslationRequest(
source_lang = "en" ,
target_lang = "fr" ,
content = "Hello, world!" ,
content_type = "text/plain" ,
model = ModelType. OPENAI ,
model_params = { "model_name" : "gpt-4o" },
)
# Translation with context
request = TranslationRequest(
source_lang = "en" ,
target_lang = "de" ,
content = "He said it was great." ,
context = "[PREVIOUS_CHUNK] \n The restaurant opened yesterday. \n [/PREVIOUS_CHUNK]" ,
content_type = "text/plain" ,
model = ModelType. ANTHROPIC ,
model_params = { "model_name" : "claude-3-sonnet" },
)
# Translation with glossary and reasoning
from tinbox.core.types import Glossary, GlossaryEntry
glossary = Glossary( entries = { "API" : "API" , "cloud" : "Cloud" })
request = TranslationRequest(
source_lang = "en" ,
target_lang = "ja" ,
content = "The API connects to the cloud." ,
content_type = "text/plain" ,
model = ModelType. GEMINI ,
model_params = { "model_name" : "gemini-2.5-pro" },
glossary = glossary,
reasoning_effort = "high" ,
)
Source language code (e.g., "en", "fr", "ja").
Target language code (e.g., "de", "es", "zh").
Content to translate:
str for text content
bytes for image content (PNG format for scanned documents)
MIME type of the content. Must match pattern ^(text|image)/.+$.
"text/plain" - Plain text content
"image/png" - Image content (scanned PDFs)
Model provider to use. Options:
ModelType.OPENAI - OpenAI models
ModelType.ANTHROPIC - Anthropic Claude models
ModelType.GEMINI - Google Gemini models
ModelType.OLLAMA - Local Ollama models
Optional context information to improve translation quality and consistency. Context-aware algorithm provides:
[PREVIOUS_CHUNK] tags with previous content
[PREVIOUS_CHUNK_TRANSLATION] tags with previous translation
[NEXT_CHUNK] tags with upcoming content
context = """[PREVIOUS_CHUNK]
The meeting started at 9 AM.
[/PREVIOUS_CHUNK]
[PREVIOUS_CHUNK_TRANSLATION]
Die Besprechung begann um 9 Uhr.
[/PREVIOUS_CHUNK_TRANSLATION]
Use this context to maintain consistency in terminology and style."""
Additional model-specific parameters. Common parameters:
model_name: Specific model to use (e.g., "gpt-4o", "claude-3-sonnet")
temperature: Sampling temperature (if supported)
max_tokens: Maximum output tokens (if supported)
glossary
Glossary | None
default: "None"
Optional glossary for consistent term translations. The model will use these terms when translating. from tinbox.core.types import Glossary
glossary = Glossary( entries = {
"API" : "API" ,
"cloud computing" : "Cloud-Computing" ,
"database" : "Datenbank" ,
})
reasoning_effort
Literal['minimal', 'low', 'medium', 'high']
default: "minimal"
Model reasoning effort level:
"minimal" - Fast, cost-effective
"low" - Slight improvement, moderate cost increase
"medium" - Better quality, higher cost
"high" - Best quality, significantly higher cost
Higher reasoning efforts can multiply costs by 3-10x.
TranslationRequest is immutable (frozen=True).
TranslationResponse
Response from a translation request or algorithm.
from tinbox import translate_document, load_document, create_translator, TranslationConfig
from pathlib import Path
content = await load_document(Path( "document.pdf" ))
config = TranslationConfig( ... )
translator = create_translator(config)
response = await translate_document(content, config, translator)
print ( f "Translated text: { response.text[: 100 ] } ..." )
print ( f "Tokens used: { response.tokens_used :,} " )
print ( f "Cost: $ { response.cost :.4f} " )
print ( f "Time taken: { response.time_taken :.2f} s" )
if response.failed_pages:
print ( f "Failed pages: { response.failed_pages } " )
for page, error in response.page_errors.items():
print ( f " Page { page } : { error } " )
if response.warnings:
for warning in response.warnings:
print ( f "Warning: { warning } " )
if response.glossary_updates:
print ( f "New glossary entries: { len (response.glossary_updates) } " )
for entry in response.glossary_updates:
print ( f " { entry.term } -> { entry.translation } " )
The translated text. For page-by-page algorithm with failed pages, contains placeholders: [TRANSLATION_FAILED: Page 3]
Reason: API timeout
[/TRANSLATION_FAILED]
Total number of tokens used (input + output). Must be >= 0.
Total cost in USD. Must be >= 0.0.
Time taken in seconds. Must be >= 0.0.
glossary_updates
list[GlossaryEntry]
default: "[]"
New glossary entries discovered during translation (when glossary is enabled). Each entry contains:
term: Term in source language
translation: Translation in target language
List of page numbers that failed to translate (page-by-page algorithm only). Page numbers are 1-indexed.
page_errors
dict[int, str]
default: "{}"
Mapping from page number to error message for failed pages. if response.page_errors:
for page, error in response.page_errors.items():
print ( f "Page { page } failed: { error } " )
Non-fatal warnings during translation. Common warnings:
Incomplete translation due to failed pages
Cost approaching threshold
Algorithm-specific issues
TranslationResponse is immutable (frozen=True).
Translation Algorithms
Tinbox provides three translation algorithms, each optimized for different scenarios.
Page-by-Page
Translates each page independently without context.
config = TranslationConfig(
... ,
algorithm = "page" ,
)
Characteristics:
Fastest algorithm
No context between pages
Good for documents with independent sections
Best for simple documents or when speed is priority
Supports resume from checkpoint
Can continue despite individual page failures
Best for:
Simple documents
Presentations with independent slides
Documents where each page is self-contained
Quick translations where context isn’t critical
Sliding Window
Processes text using overlapping windows for continuity.
config = TranslationConfig(
... ,
algorithm = "sliding-window" ,
window_size = 3000 , # Characters per window
overlap_size = 300 , # Overlap between windows
)
Characteristics:
Good balance between speed and quality
Overlapping windows maintain some continuity
Not suitable for image content (text only)
Windows are merged intelligently after translation
Supports resume from checkpoint
Best for:
Long text documents
Content requiring some continuity
When context-aware overhead is too high
Technical documentation with cross-references
Configuration:
window_size: Size of each window (default: 2000 characters)
overlap_size: Overlap between windows (default: 200 characters)
Context-Aware
Splits text at natural boundaries with full context from adjacent chunks.
config = TranslationConfig(
... ,
algorithm = "context-aware" ,
context_size = 2500 , # Target chunk size
)
# Or with custom split token
config = TranslationConfig(
... ,
algorithm = "context-aware" ,
custom_split_token = " \n --- \n " , # Split on horizontal rules
)
Characteristics:
Highest quality translations
Splits text at natural boundaries (paragraphs, sentences, etc.)
Provides previous/next chunk context for each translation
Higher input token usage (3-4x multiplier due to context)
Not suitable for image content (text only)
Supports resume from checkpoint
Best for:
Literary works and books
Content requiring high consistency
Documents with narrative flow
Technical manuals with interconnected sections
Configuration:
context_size: Target chunk size in characters (default: 2000)
custom_split_token: Custom token to split on, ignoring context_size
Context Format:
The algorithm provides context in tagged format:
[PREVIOUS_CHUNK]
The previous section's content...
[/PREVIOUS_CHUNK]
[PREVIOUS_CHUNK_TRANSLATION]
Die Übersetzung des vorherigen Abschnitts...
[/PREVIOUS_CHUNK_TRANSLATION]
[NEXT_CHUNK]
The next section's content...
[/NEXT_CHUNK]
Use this context to maintain consistency in terminology and style.
TranslationError
Exception raised when translation fails.
from tinbox.core.translation import TranslationError
from tinbox import translate_document
try :
result = await translate_document(content, config, translator)
except TranslationError as e:
print ( f "Translation failed: { e } " )
Common causes:
API authentication failures
Network errors
Rate limiting
Invalid model configuration
Cost exceeding max_cost threshold
Unknown algorithm specified
Model-specific errors (context length exceeded, etc.)
Examples
Using Different Algorithms
from tinbox import translate_document, load_document, create_translator, TranslationConfig
from pathlib import Path
# Load document once
content = await load_document(Path( "document.pdf" ))
# Page-by-page (fastest)
config_page = TranslationConfig(
source_lang = "en" ,
target_lang = "de" ,
model = "openai" ,
model_name = "gpt-4o" ,
algorithm = "page" ,
input_file = Path( "document.pdf" ),
)
translator = create_translator(config_page)
result_page = await translate_document(content, config_page, translator)
# Sliding window (balanced)
config_window = config_page.model_copy( update = {
"algorithm" : "sliding-window" ,
"window_size" : 3000 ,
"overlap_size" : 300 ,
})
result_window = await translate_document(content, config_window, translator)
# Context-aware (highest quality)
config_context = config_page.model_copy( update = {
"algorithm" : "context-aware" ,
"context_size" : 2500 ,
"use_glossary" : True ,
})
result_context = await translate_document(content, config_context, translator)
print ( f "Page: { result_page.cost :.2f} , { result_page.time_taken :.1f} s" )
print ( f "Window: { result_window.cost :.2f} , { result_window.time_taken :.1f} s" )
print ( f "Context: { result_context.cost :.2f} , { result_context.time_taken :.1f} s" )
Handling Translation Errors
from tinbox.core.translation import TranslationError
try :
result = await translate_document(content, config, translator)
# Check for partial failures
if result.failed_pages:
print ( f "Warning: { len (result.failed_pages) } pages failed" )
for page in result.failed_pages:
error = result.page_errors.get(page, "Unknown error" )
print ( f " Page { page } : { error } " )
# Check warnings
for warning in result.warnings:
print ( f "Warning: { warning } " )
except TranslationError as e:
print ( f "Translation failed completely: { e } " )
# Handle total failure
Custom Translation with Context
from tinbox.core.translation import TranslationRequest, create_translator
from tinbox.core.types import ModelType
# Create translator
config = TranslationConfig( ... )
translator = create_translator(config)
# Manual translation with custom context
request = TranslationRequest(
source_lang = "en" ,
target_lang = "fr" ,
content = "This improves the quality significantly." ,
context = """[PREVIOUS_CHUNK]
We implemented the new algorithm.
[/PREVIOUS_CHUNK]
[PREVIOUS_CHUNK_TRANSLATION]
Nous avons implémenté le nouvel algorithme.
[/PREVIOUS_CHUNK_TRANSLATION]
Use this context to maintain consistency.""" ,
content_type = "text/plain" ,
model = ModelType. ANTHROPIC ,
model_params = { "model_name" : "claude-3-sonnet" },
)
response = await translator.translate(request)
print (response.text)