Validation ensures your optimized kernel produces correct results and meets performance targets. The challenge provides multiple layers of validation, from unit tests to submission benchmarks.
Correctness before performance. Always ensure your kernel is correct before optimizing for speed.
class Tests(unittest.TestCase): def test_ref_kernels(self): """Test the reference kernels against each other""" def test_kernel_trace(self): # Full-scale example for performance testing do_kernel_test(10, 16, 256, trace=True, prints=False) def test_kernel_cycles(self): do_kernel_test(10, 16, 256)
The commented-out test_kernel_correctness test can be uncommented for additional debugging with various parameter combinations.
body.append(("debug", ("compare", tmp_idx, (round, i, "idx"))))body.append(("alu", ("+", tmp_addr, self.scratch["inp_values_p"], i_const)))body.append(("load", ("load", tmp_val, tmp_addr)))body.append(("debug", ("compare", tmp_val, (round, i, "val"))))
The debug engine compares your kernel’s value against the reference:
problem.py:366-374
if name == "debug": if not self.enable_debug: continue for slot in slots: if slot[0] == "compare": loc, key = slot[1], slot[2] ref = self.value_trace[key] res = core.scratch[loc] assert res == ref, f"{res} != {ref} for {key} at pc={core.pc}"
Reference kernel uses yield to pause at checkpoints:
problem.py:535-568
def reference_kernel2(mem: list[int], trace: dict[Any, int] = {}): # ... initialization ... yield mem # Initial state for h in range(rounds): for i in range(batch_size): # ... computation ... yield mem # Final state
for i, ref_mem in enumerate(reference_kernel2(mem, value_trace)): machine.run() # Run until next pause inp_values_p = ref_mem[6] assert ( machine.mem[inp_values_p : inp_values_p + len(inp.values)] == ref_mem[inp_values_p : inp_values_p + len(inp.values)] ), f"Incorrect result on round {i}"
Important: Pause instructions must match the reference kernel’s yields during development testing, but are ignored by the submission simulator.
def test_ref_kernels(self): """Test the reference kernels against each other""" random.seed(123) for i in range(10): f = Tree.generate(4) inp = Input.generate(f, 10, 6) mem = build_mem_image(f, inp) reference_kernel(f, inp) for _ in reference_kernel2(mem, {}): pass assert inp.indices == mem[mem[5] : mem[5] + len(inp.indices)] assert inp.values == mem[mem[6] : mem[6] + len(inp.values)]