Function Signature
The zerox() function is an asynchronous API that performs OCR (Optical Character Recognition) to markdown using vision models. It processes PDF files, images, and various document formats and converts them into markdown format.
async def zerox (
cleanup : bool = True ,
concurrency : int = 10 ,
file_path : Optional[ str ] = "" ,
image_density : int = 300 ,
image_height : tuple[Optional[ int ], int ] = ( None , 1056 ),
maintain_format : bool = False ,
model : str = "gpt-4o-mini" ,
output_dir : Optional[ str ] = None ,
temp_dir : Optional[ str ] = None ,
custom_system_prompt : Optional[ str ] = None ,
select_pages : Optional[Union[ int , Iterable[ int ]]] = None ,
** kwargs
) -> ZeroxOutput
Import
from pyzerox import zerox
Basic Usage
The Python SDK uses async/await syntax. Make sure to call zerox() within an async function and use asyncio.run() to execute.
Process a PDF from URL
import asyncio
from pyzerox import zerox
import os
# Set up your API key
os.environ[ "OPENAI_API_KEY" ] = "your-api-key"
async def main ():
result = await zerox(
file_path = "https://example.com/document.pdf" ,
model = "gpt-4o-mini"
)
print (result)
# Run the async function
result = asyncio.run(main())
Process a Local File
import asyncio
from pyzerox import zerox
import os
os.environ[ "OPENAI_API_KEY" ] = "your-api-key"
async def main ():
result = await zerox(
file_path = "./path/to/document.pdf" ,
model = "gpt-4o-mini" ,
output_dir = "./output" # Save markdown to file
)
return result
result = asyncio.run(main())
Process Specific Pages
import asyncio
from pyzerox import zerox
async def main ():
# Process only pages 1, 3, and 5
result = await zerox(
file_path = "document.pdf" ,
model = "gpt-4o-mini" ,
select_pages = [ 1 , 3 , 5 ]
)
return result
result = asyncio.run(main())
Maintain Format Across Pages
import asyncio
from pyzerox import zerox
async def main ():
# Useful for documents with tables spanning multiple pages
result = await zerox(
file_path = "document.pdf" ,
model = "gpt-4o-mini" ,
maintain_format = True
)
return result
result = asyncio.run(main())
Using Different Model Providers
The Python SDK uses LiteLLM to support multiple vision model providers.
OpenAI
import os
os.environ[ "OPENAI_API_KEY" ] = "your-api-key"
result = await zerox(
file_path = "document.pdf" ,
model = "gpt-4o-mini" # or "gpt-4o"
)
Azure OpenAI
import os
os.environ[ "AZURE_API_KEY" ] = "your-azure-api-key"
os.environ[ "AZURE_API_BASE" ] = "https://example-endpoint.openai.azure.com"
os.environ[ "AZURE_API_VERSION" ] = "2023-05-15"
result = await zerox(
file_path = "document.pdf" ,
model = "azure/gpt-4o-mini" # Format: azure/<deployment_name>
)
Google Gemini
import os
os.environ[ "GEMINI_API_KEY" ] = "your-gemini-api-key"
result = await zerox(
file_path = "document.pdf" ,
model = "gemini/gemini-1.5-flash" # Format: gemini/<model_name>
)
Anthropic Claude
import os
os.environ[ "ANTHROPIC_API_KEY" ] = "your-anthropic-api-key"
result = await zerox(
file_path = "document.pdf" ,
model = "claude-3-opus-20240229"
)
AWS Bedrock
import os
os.environ[ "AWS_ACCESS_KEY_ID" ] = "your-access-key"
os.environ[ "AWS_SECRET_ACCESS_KEY" ] = "your-secret-key"
os.environ[ "AWS_REGION_NAME" ] = "us-east-1"
result = await zerox(
file_path = "document.pdf" ,
model = "bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
)
Vertex AI
import json
# Load service account credentials
with open ( 'path/to/vertex_ai_service_account.json' , 'r' ) as file :
vertex_credentials = json.load( file )
vertex_credentials_json = json.dumps(vertex_credentials)
result = await zerox(
file_path = "document.pdf" ,
model = "vertex_ai/gemini-1.5-flash-001" ,
vertex_credentials = vertex_credentials_json
)
Custom System Prompts
The Python SDK supports custom system prompts, unlike the Node.js SDK which uses a fixed prompt.
from pyzerox import zerox, DEFAULT_SYSTEM_PROMPT
# Use custom prompt
custom_prompt = """
Convert this document to markdown.
Focus on extracting tables and charts.
Return only markdown with no explanations.
"""
result = await zerox(
file_path = "document.pdf" ,
model = "gpt-4o-mini" ,
custom_system_prompt = custom_prompt
)
Advanced Configuration
result = await zerox(
file_path = "document.pdf" ,
model = "gpt-4o-mini" ,
# Processing options
concurrency = 5 , # Process 5 pages concurrently
maintain_format = True , # Maintain formatting across pages
select_pages = [ 1 , 2 , 3 ], # Process only specific pages
# Image conversion options
image_density = 300 , # DPI for image conversion
image_height = ( None , 1056 ), # Max height for images
# File management
output_dir = "./output" , # Save markdown to file
temp_dir = "./temp" , # Custom temp directory
cleanup = True , # Clean up temp files after processing
# Model customization
custom_system_prompt = "..." , # Override default prompt
# Additional LiteLLM parameters
temperature = 0.1 ,
max_tokens = 4096
)
Parameters Detailed parameter documentation
Response Response structure and fields