Skip to main content

Overview

The Document Management module provides comprehensive document control with revision tracking, metadata management, AI-powered search, and collaborative features like PDF annotations.

Key Features

Version Control

Track document revisions with complete history

AI Search

Semantic search using Google Gemini embeddings

PDF Annotations

Add comments and pins directly on PDF pages

Metadata Management

Flexible custom metadata fields

Automated Processing

n8n integration for text extraction and analysis

Access Control

Fine-grained permissions and traceability

Data Model

Documento (Document)

documentos/models.py
class Documento(models.Model):
    ESTADOS = [
        ('BORRADOR', 'Draft'),
        ('EN_REVISION', 'In Review'),
        ('APROBADO', 'Approved'),
        ('OBSOLETO', 'Obsolete'),
    ]
    
    # Identification
    codigo = models.CharField(max_length=100, unique=True)
    titulo = models.CharField(max_length=500)
    descripcion = models.TextField(blank=True)
    
    # Classification
    tipo = models.ForeignKey('TipoDocumento', on_delete=models.PROTECT)
    disciplina = models.ForeignKey('Disciplina', on_delete=models.SET_NULL)
    
    # Relationships
    activo = models.ForeignKey('activos.Activo', on_delete=models.SET_NULL)
    ubicacion = models.ForeignKey('activos.Ubicacion', on_delete=models.SET_NULL)
    
    # Status
    estado = models.CharField(max_length=20, choices=ESTADOS)
    
    # Metadata
    creado_por = models.ForeignKey(User, on_delete=models.SET_NULL)
    fecha_creacion = models.DateTimeField(auto_now_add=True)

Revision (Document Revision)

documentos/models.py
class Revision(models.Model):
    documento = models.ForeignKey('Documento', on_delete=models.CASCADE)
    numero_revision = models.CharField(max_length=20)
    
    # File storage (MinIO/S3)
    archivo = models.FileField(upload_to='documentos/')
    nombre_archivo = models.CharField(max_length=255)
    tamano = models.BigIntegerField()  # bytes
    
    # AI processing
    texto_extraido = models.TextField(blank=True)
    embedding = VectorField(dimensions=768, blank=True, null=True)
    procesado = models.BooleanField(default=False)
    
    # Tracking
    subido_por = models.ForeignKey(User, on_delete=models.SET_NULL)
    fecha_subida = models.DateTimeField(auto_now_add=True)
    
    # Change description
    comentarios = models.TextField(blank=True)

MetadatoValor (Custom Metadata)

documentos/models.py
class MetadatoConfig(models.Model):
    TIPOS = [
        ('TEXT', 'Text'),
        ('NUMBER', 'Number'),
        ('DATE', 'Date'),
        ('SELECT', 'Dropdown'),
    ]
    
    nombre = models.CharField(max_length=100)
    tipo = models.CharField(max_length=20, choices=TIPOS)
    tipo_documento = models.ForeignKey('TipoDocumento', on_delete=models.CASCADE)
    obligatorio = models.BooleanField(default=False)
    opciones = models.JSONField(blank=True, null=True)  # For SELECT type

class MetadatoValor(models.Model):
    documento = models.ForeignKey('Documento', on_delete=models.CASCADE)
    metadato = models.ForeignKey('MetadatoConfig', on_delete=models.CASCADE)
    valor = models.TextField()

Document Upload and Processing

Upload Workflow

1

Upload File

User uploads document file to MinIO storage
documentos/views.py
def upload_documento(request):
    archivo = request.FILES['archivo']
    
    # Create document and revision
    documento = Documento.objects.create(
        codigo=generate_codigo(),
        titulo=request.POST.get('titulo'),
        tipo_id=request.POST.get('tipo')
    )
    
    revision = Revision.objects.create(
        documento=documento,
        numero_revision='0',
        archivo=archivo,
        subido_por=request.user
    )
2

Trigger n8n Workflow

Send webhook to n8n for processing
documentos/tasks.py
@shared_task
def procesar_documento(revision_id):
    revision = Revision.objects.get(id=revision_id)
    
    # Trigger n8n workflow
    response = requests.post(
        settings.N8N_PROCESS_DOCUMENT_WEBHOOK_URL,
        json={
            'revision_id': revision.id,
            'archivo_url': revision.archivo.url,
            'callback_url': f'{settings.INTERNAL_SITE_URL}/documentos/callback/'
        }
    )
3

Extract Text (n8n)

n8n extracts text from PDF using OCR/PDF libraries
4

Generate Embeddings

Create vector embeddings using Gemini API
documentos/tasks.py
@shared_task
def generar_embeddings(revision_id):
    revision = Revision.objects.get(id=revision_id)
    
    # Generate embedding with Gemini
    import google.generativeai as genai
    genai.configure(api_key=settings.GEMINI_API_KEY)
    
    result = genai.embed_content(
        model="models/embedding-001",
        content=revision.texto_extraido
    )
    
    # Store in pgvector
    revision.embedding = result['embedding']
    revision.procesado = True
    revision.save()
5

Index for Search

Document is now searchable via text and semantic search
Find documents by meaning, not just keywords:
documentos/views.py
def buscar_semantico(request):
    query = request.GET.get('q')
    
    # Generate query embedding
    import google.generativeai as genai
    genai.configure(api_key=settings.GEMINI_API_KEY)
    
    query_embedding = genai.embed_content(
        model="models/embedding-001",
        content=query
    )['embedding']
    
    # Vector similarity search with pgvector
    from django.db.models import F
    from pgvector.django import CosineDistance
    
    resultados = Revision.objects.filter(
        procesado=True
    ).annotate(
        distancia=CosineDistance('embedding', query_embedding)
    ).filter(
        distancia__lt=0.5  # Similarity threshold
    ).order_by('distancia')[:20]
    
    return JsonResponse({
        'resultados': [
            {
                'documento': r.documento.codigo,
                'titulo': r.documento.titulo,
                'similarity': 1 - r.distancia
            }
            for r in resultados
        ]
    })
Combine keyword and semantic search:
documentos/views.py
def buscar_hibrido(request):
    query = request.GET.get('q')
    
    # Keyword search
    keyword_results = Documento.objects.filter(
        Q(titulo__icontains=query) |
        Q(codigo__icontains=query) |
        Q(revisiones__texto_extraido__icontains=query)
    ).distinct()
    
    # Semantic search
    semantic_results = buscar_semantico_interno(query)
    
    # Merge and rank results
    combined = merge_results(keyword_results, semantic_results)
    
    return JsonResponse({'resultados': combined})

PDF Annotations

ComentarioDocumento (PDF Comment)

documentos/models.py
class ComentarioDocumento(models.Model):
    documento = models.ForeignKey('Documento', on_delete=models.CASCADE)
    revision = models.ForeignKey('Revision', on_delete=models.CASCADE)
    
    # Position on PDF
    pagina = models.IntegerField()
    posicion_x = models.FloatField()  # Percentage 0-100
    posicion_y = models.FloatField()  # Percentage 0-100
    
    # Content
    texto = models.TextField()
    autor = models.ForeignKey(User, on_delete=models.CASCADE)
    fecha = models.DateTimeField(auto_now_add=True)
    
    # Thread
    respuesta_a = models.ForeignKey('self', on_delete=models.CASCADE,
                                   null=True, blank=True,
                                   related_name='respuestas')

Interactive PDF Viewer

Display PDF with clickable comment pins:
// Add comment pin to PDF
function addComment(pageNum, x, y) {
  const comentario = {
    documento_id: documentoId,
    pagina: pageNum,
    posicion_x: (x / pageWidth) * 100,
    posicion_y: (y / pageHeight) * 100,
    texto: commentText
  };
  
  fetch('/documentos/comentarios/', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'X-CSRFToken': csrfToken
    },
    body: JSON.stringify(comentario)
  });
}

AI Chat Assistant

N8nChatHistory (Chat History)

documentos/models.py
class N8nChatHistory(models.Model):
    documento = models.ForeignKey('Documento', on_delete=models.CASCADE)
    usuario = models.ForeignKey(User, on_delete=models.CASCADE)
    
    pregunta = models.TextField()
    respuesta = models.TextField()
    
    fecha = models.DateTimeField(auto_now_add=True)
    session_id = models.CharField(max_length=100)

Chat Interface

Ask questions about document content:
documentos/views.py
def chat_documento(request, documento_id):
    documento = Documento.objects.get(id=documento_id)
    pregunta = request.POST.get('pregunta')
    
    # Send to n8n chat workflow
    response = requests.post(
        settings.N8N_CHAT_WEBHOOK_URL,
        json={
            'documento_id': documento.id,
            'pregunta': pregunta,
            'contexto': documento.ultima_revision.texto_extraido[:2000]
        }
    )
    
    respuesta = response.json()['respuesta']
    
    # Save to history
    N8nChatHistory.objects.create(
        documento=documento,
        usuario=request.user,
        pregunta=pregunta,
        respuesta=respuesta
    )
    
    return JsonResponse({'respuesta': respuesta})

API Endpoints

Document Search

Search with filters and AI

Upload & Processing

Upload files and track processing

Annotations

Manage PDF comments and pins

AI Chat

Chat with document content

Best Practices

Naming Convention: Use consistent document codes (e.g., DWG-ELEC-001 for electrical drawings)
Large Files: Files over 50MB may timeout during processing - consider splitting
Metadata Quality: Rich metadata improves search accuracy significantly

Assets

Link documents to equipment

Projects

Associate with projects

n8n Integration

Automation workflows

Build docs developers (and LLMs) love