Backend Stack
Django 1.11
Web framework for the backend
Django REST Framework
RESTful API toolkit
PostgreSQL 12
Relational database
Celery
Distributed task queue
Project Structure
Backend code lives incontentcuration/contentcuration/:
contentcuration/contentcuration/
├── models.py # Django models
├── viewsets/ # DRF viewsets
│ ├── base.py # Base classes
│ ├── channel.py # Channel viewsets
│ ├── contentnode.py # Content node viewsets
│ ├── assessmentitem.py # Assessment viewsets
│ ├── file.py # File viewsets
│ └── sync/ # Sync endpoint
│ ├── endpoint.py # Main sync endpoint
│ ├── constants.py # Change type constants
│ └── utils.py # Sync utilities
├── tests/ # Python tests
├── management/ # Django management commands
└── db/ # Custom DB utilities
└── models/
├── manager.py # Custom managers
└── query.py # Custom query classes
Django Models
Core Models
Studio’s main models are defined inmodels.py:
from django.db import models
from mptt.models import MPTTModel, TreeForeignKey
class Channel(models.Model):
"""Represents a content channel."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4)
name = models.CharField(max_length=200)
description = models.TextField(blank=True)
thumbnail = models.TextField(blank=True)
public = models.BooleanField(default=False)
published_data = models.JSONField(default=dict)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
class Meta:
db_table = 'contentcuration_channel'
class ContentNode(MPTTModel):
"""Represents a node in the content tree."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4)
title = models.CharField(max_length=200)
description = models.TextField(blank=True)
kind_id = models.CharField(max_length=200, choices=content_kinds.choices)
parent = TreeForeignKey('self', null=True, blank=True,
related_name='children',
on_delete=models.CASCADE)
channel = models.ForeignKey(Channel, related_name='nodes',
on_delete=models.CASCADE)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
class Meta:
db_table = 'contentcuration_contentnode'
Model Relationships
- One-to-Many
- Many-to-Many
- Tree Structure (MPTT)
class Channel(models.Model):
# ...
class ContentNode(models.Model):
channel = models.ForeignKey(
Channel,
related_name='nodes',
on_delete=models.CASCADE
)
# Usage
channel = Channel.objects.get(id=channel_id)
nodes = channel.nodes.all() # Access via related_name
class User(AbstractBaseUser):
# ...
class Channel(models.Model):
editors = models.ManyToManyField(
User,
related_name='editable_channels',
through='ChannelUser'
)
class ChannelUser(models.Model):
"""Through model for user-channel relationship."""
user = models.ForeignKey(User, on_delete=models.CASCADE)
channel = models.ForeignKey(Channel, on_delete=models.CASCADE)
can_edit = models.BooleanField(default=False)
can_view = models.BooleanField(default=False)
from mptt.models import MPTTModel, TreeForeignKey
class ContentNode(MPTTModel):
parent = TreeForeignKey(
'self',
null=True,
blank=True,
related_name='children',
on_delete=models.CASCADE
)
# MPTT adds these fields automatically:
# - tree_id: Tree identifier
# - lft, rght: Nested set fields
# - level: Depth in tree
# Usage
node = ContentNode.objects.get(id=node_id)
ancestors = node.get_ancestors() # All parent nodes
descendants = node.get_descendants() # All child nodes
siblings = node.get_siblings() # Same level, same parent
ViewSets
Custom Base ViewSet
Studio usesValuesViewset for optimized read performance:
# viewsets/base.py
from rest_framework.viewsets import GenericViewSet
class ValuesViewset(GenericViewSet):
"""
Viewset that uses .values() for reads instead of serializer.
Significantly faster for large querysets.
"""
values = () # Tuple of fields to return
field_map = {} # Dict to rename/transform fields
def get_queryset(self):
"""Override to filter queryset."""
return self.queryset
def prefetch_queryset(self, queryset):
"""Add prefetch_related/select_related here."""
return queryset
def annotate_queryset(self, queryset):
"""Add annotations before .values() call."""
return queryset
def consolidate(self, items, queryset):
"""Post-process list of dicts after .values() call."""
return items
Example ViewSet
# viewsets/contentnode.py
from rest_framework.permissions import IsAuthenticated
from rest_framework.decorators import action
from .base import ValuesViewset, BulkModelSerializer, BulkListSerializer
from contentcuration.models import ContentNode
class ContentNodeViewset(ValuesViewset):
queryset = ContentNode.objects.all()
permission_classes = [IsAuthenticated]
values = (
'id',
'title',
'description',
'kind_id',
'parent_id',
'channel_id',
'created',
'modified',
)
field_map = {
'kind': 'kind_id', # Rename kind_id to kind
}
def get_queryset(self):
"""Filter to nodes user can access."""
return ContentNode.objects.filter(
channel__editors=self.request.user
)
def annotate_queryset(self, queryset):
"""Add computed fields."""
return queryset.annotate(
child_count=Count('children')
)
@action(detail=True, methods=['post'])
def copy(self, request, pk=None):
"""Copy a node to another location."""
node = self.get_object()
target_id = request.data.get('target')
# Copy logic...
return Response({'id': copied_node.id})
Serializers
UseBulkModelSerializer for bulk operations:
from .base import BulkModelSerializer, BulkListSerializer
class ContentNodeSerializer(BulkModelSerializer):
class Meta:
model = ContentNode
fields = (
'id',
'title',
'description',
'kind',
'parent',
'channel',
)
list_serializer_class = BulkListSerializer
def validate_title(self, value):
"""Custom validation."""
if not value.strip():
raise ValidationError("Title cannot be empty")
return value
def create(self, validated_data):
"""Custom create logic."""
# Add automatic fields
validated_data['created_by'] = self.context['request'].user
return super().create(validated_data)
def update(self, instance, validated_data):
"""Custom update logic."""
# Track modification
validated_data['modified_by'] = self.context['request'].user
return super().update(instance, validated_data)
Bulk Operations
The base viewset provides bulk create, update, and delete:# Client sends
POST /api/contentnodes/bulk_create/
[
{"title": "Node 1", "kind": "video", "parent": "abc123"},
{"title": "Node 2", "kind": "document", "parent": "abc123"},
]
PATCH /api/contentnodes/bulk_update/
[
{"id": "node1", "title": "Updated Title"},
{"id": "node2", "description": "New description"},
]
DELETE /api/contentnodes/bulk_delete/
["node1", "node2", "node3"]
class ContentNodeViewset(ValuesViewset):
serializer_class = ContentNodeSerializer
def perform_bulk_create(self, serializer):
"""Called during bulk create."""
# Add default values
serializer.save(created_by=self.request.user)
def perform_bulk_update(self, serializer):
"""Called during bulk update."""
serializer.save(modified_by=self.request.user)
Sync Endpoint
The sync endpoint handles batched changes from the frontend:# viewsets/sync/endpoint.py
from collections import OrderedDict
from rest_framework.decorators import api_view
from .constants import CREATED, UPDATED, DELETED, MOVED
SYNC_MODELS = OrderedDict([
('contentnode', ContentNodeViewset),
('channel', ChannelViewset),
('assessmentitem', AssessmentItemViewset),
('file', FileViewset),
])
@api_view(['POST'])
def sync(request):
"""Process batched changes from frontend."""
changes = request.data.get('changes', [])
results = []
errors = []
# Group changes by table and type
grouped = group_changes(changes)
for table, change_type, items in grouped:
viewset = SYNC_MODELS[table]
try:
if change_type == CREATED:
viewset.bulk_create(items)
elif change_type == UPDATED:
viewset.bulk_update(items)
elif change_type == DELETED:
viewset.bulk_delete(items)
# ...
except ValidationError as e:
errors.append({'table': table, 'error': str(e)})
return Response({
'changes': results,
'errors': errors,
})
Database Queries
Efficient Querying
- Select Related
- Prefetch Related
- Annotations
- Values
Use
select_related() for foreign key lookups:# Bad - N+1 query problem
nodes = ContentNode.objects.all()
for node in nodes:
print(node.channel.name) # Queries DB for each node!
# Good - single JOIN query
nodes = ContentNode.objects.select_related('channel')
for node in nodes:
print(node.channel.name) # No additional queries
Use
prefetch_related() for reverse foreign keys and many-to-many:# Bad - N+1 queries
channels = Channel.objects.all()
for channel in channels:
print(channel.nodes.count()) # Query per channel!
# Good - prefetch in 2 queries total
channels = Channel.objects.prefetch_related('nodes')
for channel in channels:
print(channel.nodes.count()) # No additional queries
Use
annotate() for computed values:from django.db.models import Count, Sum
channels = Channel.objects.annotate(
node_count=Count('nodes'),
total_size=Sum('nodes__files__file_size'),
)
for channel in channels:
print(f"{channel.name}: {channel.node_count} nodes")
Use
.values() to return dicts instead of model instances:# Returns list of ContentNode objects
nodes = ContentNode.objects.all()
# Returns list of dicts - faster, less memory
node_data = ContentNode.objects.values(
'id', 'title', 'description'
)
# [{'id': '...', 'title': '...', 'description': '...'}, ...]
Complex Queries
from django.db.models import Q, F, Exists, OuterRef, Subquery
# Q objects for complex filters
nodes = ContentNode.objects.filter(
Q(kind='video') | Q(kind='audio'),
Q(published=True) & Q(public=True)
)
# F objects for field comparisons
nodes = ContentNode.objects.filter(
modified__gt=F('created') + timedelta(days=7)
)
# Exists subquery
from django.db.models import Exists, OuterRef
has_files = File.objects.filter(contentnode=OuterRef('pk'))
nodes_with_files = ContentNode.objects.filter(
Exists(has_files)
)
# Subquery for related values
latest_file = File.objects.filter(
contentnode=OuterRef('pk')
).order_by('-created')
nodes = ContentNode.objects.annotate(
latest_file_id=Subquery(latest_file.values('id')[:1])
)
Celery Tasks
Define async tasks for long-running operations:from celery import shared_task
from contentcuration.models import Channel
@shared_task(bind=True)
def export_channel(self, channel_id):
"""
Export channel to Kolibri format.
"""
channel = Channel.objects.get(id=channel_id)
# Update task progress
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 100}
)
# Perform export...
for i, node in enumerate(channel.nodes.all()):
# Export node...
self.update_state(
state='PROGRESS',
meta={'current': i, 'total': channel.nodes.count()}
)
return {'status': 'complete', 'channel_id': channel_id}
# Trigger from viewset
from .tasks import export_channel
class ChannelViewset(ValuesViewset):
@action(detail=True, methods=['post'])
def export(self, request, pk=None):
task = export_channel.delay(pk)
return Response({'task_id': task.id})
Testing
Write tests using pytest:from contentcuration.tests.base import StudioTestCase
from contentcuration.tests import testdata
from contentcuration.models import ContentNode
class ContentNodeTest(StudioTestCase):
def setUp(self):
super().setUp()
self.channel = testdata.channel()
self.user = testdata.user()
def test_create_node(self):
"""Test creating a content node."""
node = ContentNode.objects.create(
title='Test Node',
kind_id='video',
channel=self.channel,
)
self.assertEqual(node.title, 'Test Node')
self.assertEqual(node.kind_id, 'video')
def test_node_permissions(self):
"""Test user can only access their channels."""
node = testdata.node({'channel': self.channel})
other_channel = testdata.channel()
other_node = testdata.node({'channel': other_channel})
# User can access own channel's nodes
self.channel.editors.add(self.user)
accessible = ContentNode.objects.filter(
channel__editors=self.user
)
self.assertIn(node, accessible)
self.assertNotIn(other_node, accessible)
Best Practices
Use ValuesViewset for read-heavy endpoints
Use ValuesViewset for read-heavy endpoints
ValuesViewset is much faster than standard DRF serializers for reads. Use it when returning lists of objects.Optimize queries with select_related/prefetch_related
Optimize queries with select_related/prefetch_related
Always use
select_related() for foreign keys and prefetch_related() for reverse relations to avoid N+1 queries.Use bulk operations for multiple items
Use bulk operations for multiple items
Bulk create, update, and delete are much faster than individual operations. Always batch when possible.
Add indexes for filtered fields
Add indexes for filtered fields
Add database indexes for fields frequently used in
WHERE clauses:class ContentNode(models.Model):
# ...
class Meta:
indexes = [
models.Index(fields=['channel', 'kind']),
models.Index(fields=['modified']),
]
Use Celery for long-running tasks
Use Celery for long-running tasks
Operations taking > 5 seconds should be async Celery tasks to avoid blocking the request/response cycle.
Next Steps
Testing
Write comprehensive backend tests
Frontend Development
Connect backend APIs to frontend
