Task structures, scheduling, and multitasking architecture in Aeolos
Aeolos implements preemptive multitasking with a priority-based round-robin scheduler. Tasks can run in kernel mode or user mode, with each task having its own kernel stack and optional virtual address space.
typedef struct task_t { void* kstack_top; // kernel stack top uint64_t cr3; // virtual address space tid_t tid; // task id priority_t priority; // task priority tstatus_t status; // current status of task timeval_t wakeuptime; // time at which task should wake up tmode_t mode; // kernel mode or usermode void* kstack_limit; // kernel stack limit vec_struct(vfs_node_desc_t*) openfiles; // stores open files struct task_t* next; struct task_t* prev;} task_t;
typedef enum { TASK_RUNNING = 1 << 0, // Currently executing on a CPU TASK_BLOCKED = 1 << 1, // Blocked waiting for an event TASK_ASLEEP = 1 << 2, // Sleeping for a specified time TASK_DEAD = 1 << 3 // Marked for cleanup} tstatus_t;#define TASK_READY 0 // Ready to run (no flags set)
The 4KB kernel stack is relatively small. Deep call chains or large stack allocations could cause overflow. Stack guards are not currently implemented.
static lock_t sched_lock;// an idle task for each cpustatic task_t* tasks_idle[CPU_MAX];// currently running tasks on each cpustatic task_t* tasks_running[CPU_MAX];// list of all tasksvec_new_static(task_t*, tasks_all);// asleep and dead tasks respectivelystatic tqueue_t tasks_asleep, tasks_dead;
void sched_init(void (*first_task)(tid_t)){ uint16_t cpu_id = smp_get_current_info()->cpu_id; /* create idle task for current core and set it to running idle tasks are removed from task list to prevent the scheduler from attempting to schedule them */ tid_t tid_idle = task_add(idle, 0, TASK_KERNEL_MODE, NULL, 0); tasks_idle[cpu_id] = tasks_all.data[tid_idle]; tasks_running[cpu_id] = tasks_all.data[tid_idle]; tasks_all.data[tid_idle] = NULL; // scheduler has been started on the bsp, do some initialization if (first_task) { task_add(first_task, PRIORITY_MID, TASK_KERNEL_MODE, NULL, 0); task_add(sched_janitor, PRIORITY_MIN, TASK_KERNEL_MODE, NULL, 0); klog_ok("started on bsp\n"); } // configure and start the lapic timer apic_timer_set_period(TIMESLICE_DEFAULT); apic_timer_set_mode(APIC_TIMER_MODE_ONESHOT); apic_timer_set_handler(init_context_switch); apic_timer_start();}
.global init_context_switch.global finish_context_switch.extern _do_context_switchinit_context_switch: push %rax push %rbx push %rcx push %rdx push %rsi push %rdi push %rbp push %r8 push %r9 push %r10 push %r11 push %r12 push %r13 push %r14 push %r15 mov %rsp, %rdi call _do_context_switchfinish_context_switch: mov (%rdi), %rsp pop %r15 pop %r14 pop %r13 pop %r12 pop %r11 pop %r10 pop %r9 pop %r8 pop %rbp pop %rdi pop %rsi pop %rdx pop %rcx pop %rbx pop %rax iretq
void _do_context_switch(task_state_t* state){ lock_wait(&sched_lock); uint16_t cpu = smp_get_current_info()->cpu_id; // save state of current task task_t* curr = tasks_running[cpu]; curr->kstack_top = state; curr->status &= ~TASK_RUNNING; // wake up tasks which need to be woken up while (tasks_asleep.back && tasks_asleep.back->wakeuptime < hpet_get_nanos()) { task_t* t = tq_pop_back(&tasks_asleep); t->status &= ~TASK_ASLEEP; } // choose next task task_t* next = NULL; for (tid_t i = curr->tid + 1; i < tasks_all.len; i++) { task_t* t = tasks_all.data[i]; if (t != NULL && t->status == TASK_READY) { next = t; goto task_chosen; } } for (tid_t i = 0; i <= curr->tid; i++) { task_t* t = tasks_all.data[i]; if (t != NULL && t->status == TASK_READY) { next = t; goto task_chosen; } }task_chosen: // could not find a runnable task, so idle if (!next) next = tasks_idle[cpu]; // we've chosen next task next->status |= TASK_RUNNING; tasks_running[cpu] = next; // set rsp0 in the tss smp_get_current_info()->tss.rsp0 = (uint64_t)(next->kstack_limit + KSTACK_SIZE); apic_timer_set_period(TIMESLICE_DEFAULT); apic_send_eoi(); // release sched_lock and switch to next task // TODO: possible stack-related race condition? lock_release(&sched_lock); finish_context_switch(next);}
int64_t sched_kill(tid_t tid){ lock_wait(&sched_lock); if (!IS_TID_VALID(tid)) { klog_err("there's no task with tid %d\n", tid); lock_release(&sched_lock); return -1; } // mark the task as dead, it will be cleaned up later task_t* t = tasks_all.data[tid]; t->status |= TASK_DEAD; tasks_all.data[tid] = NULL; tq_push_front(&tasks_dead, t); // if it was sleeping, remove it from the sleeping list if (t->status & TASK_ASLEEP) { tq_remove(t, &tasks_asleep); t->status &= ~TASK_ASLEEP; } bool is_current = (t == tasks_running[smp_get_current_info()->cpu_id]); lock_release(&sched_lock); // we can't return if it was the current one if (is_current) asm volatile("hlt"); return 0;}
The janitor runs every second. Tasks marked as dead may not be cleaned up immediately, but their TIDs are freed instantly.