curl --request POST \
--url https://api.example.com/api/batch/process \
--header 'Content-Type: application/json' \
--data '
{
"config": "<string>"
}
'{
"success": true,
"total_documents": 123,
"processed_count": 123,
"failed_count": 123,
"output_csv_url": "<string>",
"summary_report": {
"documents": [
{}
],
"errors": [
{}
]
},
"processing_time": 123
}Synchronous batch processing endpoint - legacy CSV upload
curl --request POST \
--url https://api.example.com/api/batch/process \
--header 'Content-Type: application/json' \
--data '
{
"config": "<string>"
}
'{
"success": true,
"total_documents": 123,
"processed_count": 123,
"failed_count": 123,
"output_csv_url": "<string>",
"summary_report": {
"documents": [
{}
],
"errors": [
{}
]
},
"processing_time": 123
}/api/batch/ws/{job_id} instead.Authorization: Bearer your_access_token
multipart/form-data
.csv extension.Required columns:title - Document titlefile_source_type - Source type: url, s3, or localfile_path - Path or URL to the filedescription - Document descriptionpublishing_date - Publication datefile_size - File sizeTaggingConfig object:"openai/gpt-4o-mini").txt or .pdf)Contains words/phrases to exclude from generated tags:# are ignoredexclusion_words in config if provided{
"success": true,
"total_documents": 10,
"processed_count": 9,
"failed_count": 1,
"output_csv_url": "https://storage.example.com/results/batch_20250115_123456.csv",
"summary_report": {
"documents": [
{
"title": "Training Manual",
"file_path": "https://example.com/doc1.pdf",
"success": true,
"tags": [
"employee-training",
"onboarding-procedures",
"workplace-safety",
"compliance-guidelines"
],
"error": null
},
{
"title": "Annual Report",
"file_path": "https://example.com/doc2.pdf",
"success": false,
"tags": [],
"error": "Failed to download file: HTTP 404"
}
],
"errors": [
{
"row": 2,
"title": "Annual Report",
"error": "Failed to download file: HTTP 404"
}
]
},
"processing_time": 45.2
}
title,file_source_type,file_path
"Document 1",url,https://example.com/doc1.pdf
"Document 2",url,https://example.com/doc2.pdf
title,description,file_source_type,file_path,publishing_date,file_size
"Training Manual","Employee onboarding guide",url,https://example.com/doc1.pdf,2025-01-15,1.2MB
"Q4 Report","Financial results",s3,company-docs/q4-2024.pdf,2024-12-31,2.5MB
curl -X POST "http://localhost:8000/api/batch/process" \
-H "Authorization: Bearer your_access_token" \
-F "csv_file=@batch_documents.csv" \
-F 'config={
"api_key": "your_openrouter_key",
"model_name": "openai/gpt-4o-mini",
"num_pages": 3,
"num_tags": 8
}' \
-F "exclusion_file=@exclusion_list.txt"
interface TaggingConfig {
api_key: string;
model_name?: string;
num_pages?: number;
num_tags?: number;
exclusion_words?: string[];
}
interface BatchProcessResponse {
success: boolean;
total_documents: number;
processed_count: number;
failed_count: number;
output_csv_url: string;
summary_report: {
documents: Array<{
title: string;
file_path: string;
success: boolean;
tags: string[];
error: string | null;
}>;
errors: Array<{
row: number;
title: string;
error: string;
}>;
};
processing_time: number;
}
async function processBatchCSV(
csvFile: File,
config: TaggingConfig,
exclusionFile?: File,
token: string
): Promise<BatchProcessResponse> {
const formData = new FormData();
formData.append('csv_file', csvFile);
formData.append('config', JSON.stringify(config));
if (exclusionFile) {
formData.append('exclusion_file', exclusionFile);
}
const response = await fetch('http://localhost:8000/api/batch/process', {
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
},
body: formData,
});
if (!response.ok) {
throw new Error(`Batch processing failed: ${response.statusText}`);
}
return response.json();
}
// Usage
const csvFile = document.getElementById('csv-input').files[0];
const result = await processBatchCSV(
csvFile,
{
api_key: 'your_openrouter_key',
model_name: 'openai/gpt-4o-mini',
num_pages: 3,
num_tags: 8,
},
undefined,
'your_access_token'
);
console.log(`Processed ${result.processed_count}/${result.total_documents} documents`);
console.log(`Processing time: ${result.processing_time}s`);
import requests
from typing import Dict, Optional
def process_batch_csv(
csv_path: str,
config: Dict,
token: str,
exclusion_file_path: Optional[str] = None,
base_url: str = "http://localhost:8000"
) -> Dict:
files = {
'csv_file': open(csv_path, 'rb'),
}
if exclusion_file_path:
files['exclusion_file'] = open(exclusion_file_path, 'rb')
data = {
'config': json.dumps(config)
}
try:
response = requests.post(
f"{base_url}/api/batch/process",
headers={"Authorization": f"Bearer {token}"},
files=files,
data=data,
timeout=600 # 10 minute timeout for large batches
)
response.raise_for_status()
return response.json()
finally:
for f in files.values():
f.close()
# Usage
config = {
"api_key": "your_openrouter_key",
"model_name": "openai/gpt-4o-mini",
"num_pages": 3,
"num_tags": 8,
}
result = process_batch_csv(
csv_path="batch_documents.csv",
config=config,
token="your_access_token",
exclusion_file_path="exclusion_list.txt"
)
print(f"Success: {result['success']}")
print(f"Processed: {result['processed_count']}/{result['total_documents']}")
print(f"Failed: {result['failed_count']}")
print(f"Time: {result['processing_time']}s")
print(f"Output: {result['output_csv_url']}")
{
"detail": "Invalid file type"
}
{
"detail": "Empty CSV file"
}
{
"detail": "Invalid config JSON format"
}
{
"detail": "Failed to parse exclusion file: [error details]"
}
{
"detail": "Internal server error: [error details]"
}