Skip to main content
MarkItDown integrates seamlessly with OpenAI’s API to enable powerful document analysis workflows, from basic Q&A to advanced multi-modal processing with vision capabilities.

Why MarkItDown + OpenAI?

MarkItDown’s Markdown output is ideal for OpenAI models because:
  • GPT models are extensively trained on Markdown and understand it natively
  • Markdown is highly token-efficient compared to raw text or HTML
  • Document structure (headers, lists, tables) is preserved for better comprehension
  • Vision models can generate descriptions for images during document conversion
  • Clean, structured output improves prompt engineering and response quality
MarkItDown has built-in support for OpenAI’s vision models to generate image descriptions during conversion.

Installation

pip install 'markitdown[all]' openai

Basic Document Analysis

Convert a document and analyze it with GPT:
from markitdown import MarkItDown
from openai import OpenAI

# Initialize clients
md = MarkItDown()
client = OpenAI(api_key="your-api-key")

# Convert document to Markdown
result = md.convert("quarterly_report.pdf")

# Analyze with GPT
response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {
            "role": "system",
            "content": "You are a financial analyst. Analyze documents and provide insights."
        },
        {
            "role": "user",
            "content": f"""Analyze this quarterly report and provide:
            1. Key financial highlights
            2. Major risks
            3. Growth opportunities
            
            Document:
            {result.text_content}"""
        }
    ],
    temperature=0
)

print(response.choices[0].message.content)

Document Q&A System

Build an interactive document Q&A system:
from markitdown import MarkItDown
from openai import OpenAI

class DocumentQA:
    """Question-answering system for documents."""
    
    def __init__(self, api_key: str, model: str = "gpt-4"):
        self.md = MarkItDown()
        self.client = OpenAI(api_key=api_key)
        self.model = model
        self.document_content = None
        self.conversation_history = []
    
    def load_document(self, file_path: str):
        """Load and convert a document."""
        result = self.md.convert(file_path)
        self.document_content = result.text_content
        
        # Initialize conversation with document context
        self.conversation_history = [
            {
                "role": "system",
                "content": """You are a helpful assistant that answers questions 
                about documents. Provide accurate, concise answers based solely on 
                the document content. Cite specific sections when possible."""
            },
            {
                "role": "user",
                "content": f"Here is the document to analyze:\n\n{self.document_content}"
            },
            {
                "role": "assistant",
                "content": "I've received the document and I'm ready to answer questions about it."
            }
        ]
        
        print(f"Loaded document ({len(self.document_content)} characters)")
    
    def ask(self, question: str) -> str:
        """Ask a question about the document."""
        if not self.document_content:
            return "Please load a document first."
        
        # Add question to conversation
        self.conversation_history.append({
            "role": "user",
            "content": question
        })
        
        # Get response
        response = self.client.chat.completions.create(
            model=self.model,
            messages=self.conversation_history,
            temperature=0
        )
        
        answer = response.choices[0].message.content
        
        # Add response to conversation history
        self.conversation_history.append({
            "role": "assistant",
            "content": answer
        })
        
        return answer
    
    def reset(self):
        """Reset conversation history."""
        self.conversation_history = self.conversation_history[:3]

# Usage
qa = DocumentQA(api_key="your-api-key")
qa.load_document("employee_handbook.pdf")

print(qa.ask("What is the vacation policy?"))
print(qa.ask("How many days of paid leave do employees get?"))
print(qa.ask("Are there any restrictions on when vacation can be taken?"))

Vision-Enhanced Conversion

Use OpenAI’s vision models to describe images during conversion:
from markitdown import MarkItDown
from openai import OpenAI

# Initialize with vision support
client = OpenAI(api_key="your-api-key")
md = MarkItDown(
    llm_client=client,
    llm_model="gpt-4o",
    llm_prompt="Describe this image in detail, focusing on key information."
)

# Convert presentation with images
result = md.convert("product_launch.pptx")

# The output now includes AI-generated descriptions for all images
print(result.text_content)

# Analyze the presentation
response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {
            "role": "user",
            "content": f"""Summarize this product launch presentation:
            
            {result.text_content}"""
        }
    ]
)

print(response.choices[0].message.content)

Structured Data Extraction

Extract structured data using function calling:
from markitdown import MarkItDown
from openai import OpenAI
import json

def extract_invoice_data(file_path: str, api_key: str):
    """Extract structured data from an invoice."""
    
    # Convert invoice to Markdown
    md = MarkItDown()
    result = md.convert(file_path)
    
    # Define extraction schema
    functions = [
        {
            "name": "extract_invoice",
            "description": "Extract structured information from an invoice",
            "parameters": {
                "type": "object",
                "properties": {
                    "invoice_number": {"type": "string"},
                    "date": {"type": "string"},
                    "vendor_name": {"type": "string"},
                    "total_amount": {"type": "number"},
                    "currency": {"type": "string"},
                    "line_items": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "description": {"type": "string"},
                                "quantity": {"type": "number"},
                                "unit_price": {"type": "number"},
                                "total": {"type": "number"}
                            }
                        }
                    }
                },
                "required": ["invoice_number", "total_amount"]
            }
        }
    ]
    
    # Extract with GPT
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "user",
                "content": f"Extract invoice data from this document:\n\n{result.text_content}"
            }
        ],
        functions=functions,
        function_call={"name": "extract_invoice"},
        temperature=0
    )
    
    # Parse function call result
    function_args = json.loads(
        response.choices[0].message.function_call.arguments
    )
    
    return function_args

# Usage
invoice_data = extract_invoice_data("invoice.pdf", "your-api-key")
print(json.dumps(invoice_data, indent=2))

Document Summarization

Summarize long documents efficiently:
from markitdown import MarkItDown
from openai import OpenAI

def summarize_document(
    file_path: str,
    api_key: str,
    summary_type: str = "concise"
):
    """Summarize a document."""
    
    # Convert to Markdown
    md = MarkItDown()
    result = md.convert(file_path)
    
    # Define prompts for different summary types
    prompts = {
        "concise": "Provide a concise 2-3 paragraph summary of the key points.",
        "executive": """Create an executive summary with:
            - Overview (2-3 sentences)
            - Key findings (bullet points)
            - Recommendations (bullet points)""",
        "detailed": "Provide a detailed summary covering all major sections and key points.",
        "bullet": "Summarize the main points as a bulleted list."
    }
    
    prompt = prompts.get(summary_type, prompts["concise"])
    
    # Generate summary
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": "You are an expert at summarizing documents clearly and accurately."
            },
            {
                "role": "user",
                "content": f"{prompt}\n\nDocument:\n{result.text_content}"
            }
        ],
        temperature=0.3
    )
    
    return response.choices[0].message.content

# Usage
summary = summarize_document(
    "research_paper.pdf",
    "your-api-key",
    summary_type="executive"
)
print(summary)

Multi-Document Analysis

Analyze and compare multiple documents:
from markitdown import MarkItDown
from openai import OpenAI
from pathlib import Path

def compare_documents(file_paths: list, analysis_task: str, api_key: str):
    """Compare multiple documents."""
    
    md = MarkItDown()
    client = OpenAI(api_key=api_key)
    
    # Convert all documents
    documents = {}
    for path in file_paths:
        result = md.convert(path)
        documents[Path(path).name] = result.text_content
    
    # Format for analysis
    formatted_docs = "\n\n".join([
        f"=== Document: {name} ===\n{content}"
        for name, content in documents.items()
    ])
    
    # Analyze
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": "You are an expert at analyzing and comparing documents."
            },
            {
                "role": "user",
                "content": f"""{analysis_task}
                
                Documents:
                {formatted_docs}"""
            }
        ],
        temperature=0
    )
    
    return response.choices[0].message.content

# Usage
analysis = compare_documents(
    file_paths=[
        "contract_draft_v1.pdf",
        "contract_draft_v2.pdf",
        "contract_final.pdf"
    ],
    analysis_task="""Compare these contract versions and identify:
        1. Major changes between versions
        2. New clauses added
        3. Terms that were modified
        4. Any concerning changes""",
    api_key="your-api-key"
)

print(analysis)

Batch Processing

Process multiple documents efficiently:
from markitdown import MarkItDown
from openai import OpenAI
from pathlib import Path
import asyncio

class BatchDocumentProcessor:
    """Process multiple documents in batch."""
    
    def __init__(self, api_key: str):
        self.md = MarkItDown()
        self.client = OpenAI(api_key=api_key)
    
    async def process_document(self, file_path: str, task: str):
        """Process a single document."""
        # Convert
        result = self.md.convert(file_path)
        
        # Analyze
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[
                {
                    "role": "user",
                    "content": f"{task}\n\nDocument:\n{result.text_content}"
                }
            ],
            temperature=0
        )
        
        return {
            "file": file_path,
            "result": response.choices[0].message.content
        }
    
    async def process_directory(
        self,
        directory: str,
        task: str,
        glob_pattern: str = "*.pdf"
    ):
        """Process all documents in a directory."""
        tasks = []
        
        for file_path in Path(directory).glob(glob_pattern):
            task_coro = self.process_document(str(file_path), task)
            tasks.append(task_coro)
        
        results = await asyncio.gather(*tasks)
        return results

# Usage
async def main():
    processor = BatchDocumentProcessor(api_key="your-api-key")
    
    results = await processor.process_directory(
        directory="./invoices",
        task="Extract: invoice number, date, total amount, and vendor name.",
        glob_pattern="*.pdf"
    )
    
    for result in results:
        print(f"\nFile: {result['file']}")
        print(result['result'])

asyncio.run(main())

Assistants API Integration

Use MarkItDown with OpenAI Assistants:
from markitdown import MarkItDown
from openai import OpenAI
import time

class DocumentAssistant:
    """OpenAI Assistant for document analysis."""
    
    def __init__(self, api_key: str):
        self.md = MarkItDown()
        self.client = OpenAI(api_key=api_key)
        
        # Create assistant
        self.assistant = self.client.beta.assistants.create(
            name="Document Analyst",
            instructions="""You are a document analysis assistant. 
            Analyze documents and provide clear, accurate insights.
            When asked about specific details, cite the relevant sections.""",
            model="gpt-4-1106-preview"
        )
        
        self.thread = None
    
    def load_document(self, file_path: str):
        """Load a document into a new conversation thread."""
        # Convert document
        result = self.md.convert(file_path)
        
        # Create thread with document
        self.thread = self.client.beta.threads.create(
            messages=[
                {
                    "role": "user",
                    "content": f"Please analyze this document:\n\n{result.text_content}"
                }
            ]
        )
        
        print(f"Loaded {file_path}")
    
    def ask(self, question: str) -> str:
        """Ask a question about the document."""
        if not self.thread:
            return "Please load a document first."
        
        # Add message
        self.client.beta.threads.messages.create(
            thread_id=self.thread.id,
            role="user",
            content=question
        )
        
        # Run assistant
        run = self.client.beta.threads.runs.create(
            thread_id=self.thread.id,
            assistant_id=self.assistant.id
        )
        
        # Wait for completion
        while run.status in ["queued", "in_progress"]:
            time.sleep(0.5)
            run = self.client.beta.threads.runs.retrieve(
                thread_id=self.thread.id,
                run_id=run.id
            )
        
        # Get response
        messages = self.client.beta.threads.messages.list(
            thread_id=self.thread.id
        )
        
        return messages.data[0].content[0].text.value

# Usage
assistant = DocumentAssistant(api_key="your-api-key")
assistant.load_document("policy_document.pdf")

print(assistant.ask("What are the key policies outlined in this document?"))
print(assistant.ask("Are there any compliance requirements mentioned?"))

Custom Analysis Templates

Create reusable analysis templates:
from markitdown import MarkItDown
from openai import OpenAI
from typing import Dict, List

class DocumentAnalyzer:
    """Reusable document analyzer with templates."""
    
    TEMPLATES = {
        "contract_review": {
            "system": "You are a legal contract reviewer.",
            "prompt": """Review this contract and identify:
                1. Key terms and conditions
                2. Obligations and responsibilities
                3. Termination clauses
                4. Potential risks or concerns
                5. Missing or unclear provisions"""
        },
        "financial_analysis": {
            "system": "You are a financial analyst.",
            "prompt": """Analyze this financial document:
                1. Key financial metrics and trends
                2. Revenue and profitability analysis
                3. Major expenses and cost drivers
                4. Financial risks and concerns
                5. Recommendations"""
        },
        "technical_review": {
            "system": "You are a technical documentation reviewer.",
            "prompt": """Review this technical document:
                1. Clarity and completeness
                2. Technical accuracy
                3. Missing information
                4. Suggested improvements"""
        }
    }
    
    def __init__(self, api_key: str):
        self.md = MarkItDown()
        self.client = OpenAI(api_key=api_key)
    
    def analyze(self, file_path: str, template: str, model: str = "gpt-4"):
        """Analyze document using a template."""
        # Convert document
        result = self.md.convert(file_path)
        
        # Get template
        template_config = self.TEMPLATES.get(template)
        if not template_config:
            raise ValueError(f"Unknown template: {template}")
        
        # Analyze
        response = self.client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": template_config["system"]
                },
                {
                    "role": "user",
                    "content": f"{template_config['prompt']}\n\nDocument:\n{result.text_content}"
                }
            ],
            temperature=0
        )
        
        return response.choices[0].message.content

# Usage
analyzer = DocumentAnalyzer(api_key="your-api-key")

# Contract review
contract_analysis = analyzer.analyze(
    "service_agreement.pdf",
    template="contract_review"
)
print(contract_analysis)

# Financial analysis
financial_analysis = analyzer.analyze(
    "quarterly_report.pdf",
    template="financial_analysis"
)
print(financial_analysis)

Best Practices

Token Management: For large documents, consider chunking the Markdown output or using GPT-4 Turbo with its 128K context window.
Vision Integration: Enable llm_client and llm_model in MarkItDown for documents with important visual content (charts, diagrams, images).
Structured Output: Use function calling for extracting structured data - MarkItDown’s clean Markdown makes extraction more reliable.
Temperature Settings: Use temperature=0 for factual extraction, 0.3-0.7 for summaries and creative analysis.
System Messages: Leverage system messages to define the assistant’s role and analysis approach - this improves consistency.

Resources

Build docs developers (and LLMs) love