Skip to main content
Core functions provide the main entry points for translating documents, loading files, estimating costs, and creating translator instances.

translate_document

Translate a document using the specified translation algorithm.
from tinbox import translate_document, load_document, create_translator, TranslationConfig
from pathlib import Path

# Load document
content = await load_document(Path("document.pdf"))

# Create configuration
config = TranslationConfig(
    source_lang="en",
    target_lang="de",
    model="openai",
    model_name="gpt-4o",
    algorithm="page",
    input_file=Path("document.pdf"),
)

# Create translator and translate
translator = create_translator(config)
result = await translate_document(content, config, translator)
print(f"Translation: {result.text}")
print(f"Cost: ${result.cost:.2f}")
content
DocumentContent
required
The document content to translate. Obtain this using load_document().
config
TranslationConfig
required
Translation configuration including source/target languages, model settings, and algorithm.
translator
ModelInterface
required
Model interface to use for translation. Create using create_translator().
progress
Progress | None
default:"None"
Optional Rich progress bar instance for tracking translation progress.
checkpoint_manager
CheckpointManager | None
default:"None"
Optional checkpoint manager for saving/resuming translation state.
glossary_manager
GlossaryManager | None
default:"None"
Optional glossary manager for maintaining consistent term translations.
text
str
The translated text.
tokens_used
int
Total number of tokens used during translation.
cost
float
Total cost in USD.
time_taken
float
Time taken in seconds.
failed_pages
list[int]
default:"[]"
List of page numbers that failed to translate (page-by-page algorithm only).
page_errors
dict[int, str]
default:"{}"
Mapping from page number to error message for failed pages.
warnings
list[str]
default:"[]"
Non-fatal warnings during translation.

load_document

Load a document and prepare it for translation.
from tinbox import load_document
from pathlib import Path

# Load a PDF document
content = await load_document(Path("document.pdf"))

# Load with custom processor settings (e.g., higher DPI for PDFs)
content = await load_document(
    Path("document.pdf"),
    processor_settings={"dpi": 300}
)

print(f"Pages: {len(content.pages)}")
print(f"Content type: {content.content_type}")
print(f"Metadata: {content.metadata}")
file_path
Path
required
Path to the document to load. Supported formats: .pdf, .docx, .txt.
processor_settings
dict[str, Any] | None
default:"None"
Optional settings passed to the document processor. For PDFs, you can specify {"dpi": 300} to control image resolution.
pages
list[str | bytes]
Individual pages ready for translation. Text files contain strings, while image-based content (like scanned PDFs) contains bytes.
content_type
str
MIME type of the content. Either "text/plain" for text content or "image/png" for image content.
metadata
dict[str, Any]
Document metadata including file type, total pages, title, author, and creation/modification dates.

estimate_cost

Estimate the cost and time required to translate a document.
from tinbox.core.cost import estimate_cost
from tinbox.core.types import ModelType
from pathlib import Path

# Basic cost estimation
estimate = estimate_cost(
    file_path=Path("document.pdf"),
    model=ModelType.OPENAI,
    algorithm="page"
)

print(f"Estimated tokens: {estimate.estimated_tokens:,}")
print(f"Estimated cost: ${estimate.estimated_cost:.2f}")
print(f"Estimated time: {estimate.estimated_time:.1f}s")
print(f"Cost level: {estimate.cost_level.value}")

for warning in estimate.warnings:
    print(f"Warning: {warning}")

# Estimate with glossary and custom reasoning
estimate = estimate_cost(
    file_path=Path("document.pdf"),
    model=ModelType.ANTHROPIC,
    algorithm="context-aware",
    max_cost=10.0,
    use_glossary=True,
    reasoning_effort="high"
)
file_path
Path
required
Path to the document to estimate. Used to calculate approximate token count.
model
ModelType
required
Model provider to use for cost calculation. Options: ModelType.OPENAI, ModelType.ANTHROPIC, ModelType.GEMINI, ModelType.OLLAMA.
algorithm
str
default:"page"
Translation algorithm. Options: "page", "sliding-window", "context-aware". Context-aware uses more input tokens due to context overhead.
max_cost
float | None
default:"None"
Optional maximum cost threshold in USD. If provided, warnings are generated if the estimate exceeds this value.
use_glossary
bool
default:"False"
Whether glossary is enabled. Glossary adds approximately 20% token overhead.
reasoning_effort
str
default:"minimal"
Model reasoning effort level. Options: "minimal", "low", "medium", "high". Higher levels significantly increase cost and time.
estimated_tokens
int
Total estimated tokens (input + output).
estimated_cost
float
Estimated cost in USD.
estimated_time
float
Estimated time in seconds.
cost_level
CostLevel
Cost classification: "low" (< 1),"medium"(1), `"medium"` (1-5),"high"(5), `"high"` (5-20),or"veryhigh"(>20), or `"very_high"` (> 20).
warnings
list[str]
List of warning messages about document size, cost thresholds, algorithm overhead, or reasoning effort.

create_translator

Create a translator instance for performing translations.
from tinbox import create_translator, TranslationConfig
from pathlib import Path

# Create translator from configuration
config = TranslationConfig(
    source_lang="en",
    target_lang="fr",
    model="anthropic",
    model_name="claude-3-sonnet",
    algorithm="context-aware",
    input_file=Path("document.txt"),
)

translator = create_translator(config)

# Validate the translator is properly configured
is_valid = await translator.validate_model()
if not is_valid:
    print("Model validation failed")
config
TranslationConfig
required
Translation configuration. The model provider is determined from config.model.
return
ModelInterface
Configured translator instance implementing the ModelInterface protocol. Currently returns a LiteLLMTranslator instance that supports all model providers.
Model selection (provider and model name) is handled per-request via TranslationRequest.model and TranslationRequest.model_params, not at translator construction time. This allows using the same translator instance with different models.

Build docs developers (and LLMs) love