Week: 11
Points: 20
Time: ~5 hours
Deliverable: toolchain/compiler/codegen.py + end-to-end VirtusLang → simulation + diary/week-11.md
What you ship
toolchain/compiler/codegen.pytoolchain/compiler/symtable.pycompiler/week-11/Counter.vl— test classcompiler/week-11/Counter.vm— generated VM bytecodecompiler/week-11/Counter_expected.vm— expected output (hand-written)- Simulation result showing correct output
diary/week-11.md
Lab 11.1: Write the symbol table
Write toolchain/compiler/symtable.py:
from enum import Enum, auto
from dataclasses import dataclass
class VarKind(Enum):
STATIC = auto()
FIELD = auto()
LOCAL = auto()
ARGUMENT = auto()
@dataclass
class VarInfo:
kind: VarKind
type_: str
index: int
class SymbolTable:
def __init__(self):
self._class_scope: dict[str, VarInfo] = {}
self._sub_scope: dict[str, VarInfo] = {}
self._counts: dict[VarKind, int] = {k: 0 for k in VarKind}
def start_subroutine(self):
"""Reset subroutine scope at the start of each subroutine."""
self._sub_scope = {}
self._counts[VarKind.LOCAL] = 0
self._counts[VarKind.ARGUMENT] = 0
def define(self, name: str, type_: str, kind: VarKind):
info = VarInfo(kind, type_, self._counts[kind])
self._counts[kind] += 1
if kind in (VarKind.STATIC, VarKind.FIELD):
self._class_scope[name] = info
else:
self._sub_scope[name] = info
def lookup(self, name: str) -> VarInfo | None:
# Subroutine scope shadows class scope
return self._sub_scope.get(name) or self._class_scope.get(name)
def var_count(self, kind: VarKind) -> int:
return self._counts[kind]
def kind_of(self, name: str) -> VarKind | None:
info = self.lookup(name)
return info.kind if info else None
def type_of(self, name: str) -> str | None:
info = self.lookup(name)
return info.type_ if info else None
def index_of(self, name: str) -> int | None:
info = self.lookup(name)
return info.index if info else None
SEGMENT = {
VarKind.STATIC: 'static',
VarKind.FIELD: 'this',
VarKind.LOCAL: 'local',
VarKind.ARGUMENT: 'argument',
}
Test it independently: define a class with two static fields, two instance fields, and a method with two local variables. Verify that lookup('fieldName') returns the correct kind and index.
Lab 11.2: Write the code generator
Write toolchain/compiler/codegen.py:
from symtable import SymbolTable, VarKind, SEGMENT
from parser import ParseNode
class CodeGen:
def __init__(self):
self.output: list[str] = []
self._sym = SymbolTable()
self._class_name= ''
self._label_n = 0
def _emit(self, *lines): self.output.extend(lines)
def _new_label(self) -> str:
n = self._label_n; self._label_n += 1; return f"L{n}"
def _push_pop_var(self, name: str, push: bool):
info = self._sym.lookup(name)
if info is None:
raise NameError(f"Undeclared variable: {name}")
seg = SEGMENT[info.kind]
if push: self._emit(f"push {seg} {info.index}")
else: self._emit(f"pop {seg} {info.index}")
def compile_class(self, tree: ParseNode):
# tree.children: 'class', className, '{', classVarDec*, subroutineDec*, '}'
self._class_name = tree.children[1].value
for child in tree.children:
if child.tag == 'classVarDec':
self._compile_class_var_dec(child)
elif child.tag == 'subroutineDec':
self._compile_subroutine(child)
def _compile_class_var_dec(self, node: ParseNode):
kind_map = {'static': VarKind.STATIC, 'field': VarKind.FIELD}
kind = kind_map[node.children[0].value]
type_ = node.children[1].value
for child in node.children[2:]:
if child.tag == 'identifier':
self._sym.define(child.value, type_, kind)
def _compile_subroutine(self, node: ParseNode):
self._sym.start_subroutine()
sub_type = node.children[0].value # constructor|function|method
ret_type = node.children[1].value
sub_name = node.children[2].value
full_name = f"{self._class_name}.{sub_name}"
# For methods: argument 0 = 'this'
if sub_type == 'method':
self._sym.define('this', self._class_name, VarKind.ARGUMENT)
# Compile parameter list
param_list = next(c for c in node.children if c.tag == 'parameterList')
params = [c for c in param_list.children if c.tag not in ('symbol',)]
i = 0
while i < len(params):
type_ = params[i].value; i += 1
pname = params[i].value; i += 1
self._sym.define(pname, type_, VarKind.ARGUMENT)
i += 1 # skip comma node if present
# Compile var declarations to count locals
body = next(c for c in node.children if c.tag == 'subroutineBody')
for child in body.children:
if child.tag == 'varDec':
self._compile_var_dec(child)
n_locals = self._sym.var_count(VarKind.LOCAL)
self._emit(f"function {full_name} {n_locals}")
# Preamble: constructor allocates 'this'; method sets 'this'
if sub_type == 'constructor':
n_fields = self._sym.var_count(VarKind.FIELD)
self._emit(f"push constant {n_fields}",
"call Memory.alloc 1",
"pop pointer 0")
elif sub_type == 'method':
self._emit("push argument 0", "pop pointer 0")
stmts = next(c for c in body.children if c.tag == 'statements')
self._compile_statements(stmts)
def _compile_var_dec(self, node: ParseNode):
type_ = node.children[1].value
for child in node.children[2:]:
if child.tag == 'identifier':
self._sym.define(child.value, type_, VarKind.LOCAL)
def _compile_statements(self, node: ParseNode):
for stmt in node.children:
tag = stmt.tag
if tag == 'letStatement': self._compile_let(stmt)
elif tag == 'ifStatement': self._compile_if(stmt)
elif tag == 'whileStatement': self._compile_while(stmt)
elif tag == 'doStatement': self._compile_do(stmt)
elif tag == 'returnStatement': self._compile_return(stmt)
def _compile_let(self, node: ParseNode):
varname = node.children[1].value
# Check for array assignment
if node.children[2].value == '[':
self._push_pop_var(varname, push=True)
self._compile_expression(node.children[3])
self._emit("add")
self._compile_expression(node.children[-2])
self._emit("pop temp 0", "pop pointer 1",
"push temp 0", "pop that 0")
else:
self._compile_expression(node.children[-2])
self._push_pop_var(varname, push=False)
def _compile_if(self, node: ParseNode):
false_l = self._new_label()
end_l = self._new_label()
self._compile_expression(next(c for c in node.children if c.tag == 'expression'))
self._emit("not", f"if-goto {false_l}")
# true branch
true_stmts = [c for c in node.children if c.tag == 'statements']
self._compile_statements(true_stmts[0])
self._emit(f"goto {end_l}")
self._emit(f"label {false_l}")
if len(true_stmts) > 1:
self._compile_statements(true_stmts[1])
self._emit(f"label {end_l}")
def _compile_while(self, node: ParseNode):
loop_l = self._new_label()
exit_l = self._new_label()
self._emit(f"label {loop_l}")
self._compile_expression(next(c for c in node.children if c.tag == 'expression'))
self._emit("not", f"if-goto {exit_l}")
self._compile_statements(next(c for c in node.children if c.tag == 'statements'))
self._emit(f"goto {loop_l}", f"label {exit_l}")
def _compile_do(self, node: ParseNode):
# skip 'do', then subroutine call, then ';'
children = node.children[1:-1]
self._compile_subroutine_call(children)
self._emit("pop temp 0") # discard return value
def _compile_return(self, node: ParseNode):
exprs = [c for c in node.children if c.tag == 'expression']
if exprs:
self._compile_expression(exprs[0])
else:
self._emit("push constant 0") # void return
self._emit("return")
def _compile_subroutine_call(self, children: list):
name_node = children[0]
if len(children) > 1 and children[1].value == '.':
class_or_var = name_node.value
method_name = children[2].value
expr_list = next(c for c in children if c.tag == 'expressionList')
info = self._sym.lookup(class_or_var)
if info:
# method call on object variable
self._push_pop_var(class_or_var, push=True)
n_args = self._compile_expression_list(expr_list) + 1
full = f"{info.type_}.{method_name}"
else:
# function/constructor call on class name
n_args = self._compile_expression_list(expr_list)
full = f"{class_or_var}.{method_name}"
else:
# local method call (implicit 'this')
method_name = name_node.value
expr_list = next(c for c in children if c.tag == 'expressionList')
self._emit("push pointer 0")
n_args = self._compile_expression_list(expr_list) + 1
full = f"{self._class_name}.{method_name}"
self._emit(f"call {full} {n_args}")
def _compile_expression(self, node: ParseNode):
children = [c for c in node.children]
self._compile_term(children[0])
i = 1
while i < len(children):
op = children[i].value; i += 1
self._compile_term(children[i]); i += 1
op_vm = {'+':'add','-':'sub','*':'call Math.multiply 2',
'/':'call Math.divide 2','&':'and','|':'or',
'<':'lt','>':'gt','=':'eq'}[op]
self._emit(op_vm)
def _compile_term(self, node: ParseNode):
ch = node.children
if not ch: return
first = ch[0]
if first.tag == 'integerConstant':
self._emit(f"push constant {first.value}")
elif first.tag == 'stringConstant':
s = first.value
self._emit(f"push constant {len(s)}", "call String.new 1")
for c in s:
self._emit(f"push constant {ord(c)}", "call String.appendChar 2")
elif first.value in ('true', 'false', 'null', 'this'):
if first.value == 'true': self._emit("push constant 0", "not")
elif first.value in ('false', 'null'): self._emit("push constant 0")
elif first.value == 'this': self._emit("push pointer 0")
elif first.value == '(':
self._compile_expression(ch[1])
elif first.value in ('-', '~'):
self._compile_term(ch[1])
self._emit("neg" if first.value == '-' else "not")
else:
# identifier — variable, array access, or subroutine call
if len(ch) > 1 and ch[1].value == '[':
self._push_pop_var(first.value, push=True)
self._compile_expression(ch[2])
self._emit("add", "pop pointer 1", "push that 0")
elif len(ch) > 1 and ch[1].value in ('.', '('):
self._compile_subroutine_call(ch)
else:
self._push_pop_var(first.value, push=True)
def _compile_expression_list(self, node: ParseNode) -> int:
exprs = [c for c in node.children if c.tag == 'expression']
for expr in exprs:
self._compile_expression(expr)
return len(exprs)
Lab 11.3: End-to-end — Counter.vl
Create and compile compiler/week-11/Counter.vl:
class Counter {
field int value;
constructor Counter new(int start) {
let value = start;
return this;
}
method void increment() {
let value = value + 1;
return;
}
method int get() {
return value;
}
method void dispose() {
do Memory.deAlloc(this);
return;
}
}
And a main class compiler/week-11/Main.vl:
class Main {
function void main() {
var Counter c;
let c = Counter.new(0);
do c.increment();
do c.increment();
do c.increment();
do Output.printInt(c.get());
do Output.println();
do c.dispose();
return;
}
}
Run the compiler, VM translator, assembler, linker, and simulation:
python3 toolchain/compiler/compiler.py compiler/week-11/ -o build/vm/
for f in build/vm/*.vm; do
python3 toolchain/vm-translator/translator.py "$f" \
-o "build/asm/$(basename ${f%.vm}.s)"
done
# Assemble + link against your Virtus OS stubs
vvp cpu_sim 2>&1 | grep -E 'print|output|[0-9]'
Expected output: 3
Lab 11.4: Five-category baseline measurement
In diary/week-11.md, record the instruction count for each of the five compiler output categories. Run the simulation with instruction tracing enabled:
| Category | What to measure | How to measure |
|---|---|---|
| Arithmetic ops | Instructions for a + b |
Count ADD, SUB in Counter.vm |
| Function calls | Instructions per call |
Count CALL+RETURN boilerplate |
| Conditionals | Instructions for if block |
Count branch overhead in generated if |
| Loops | Instructions for while body |
Count GOTO+branch per iteration |
| Method dispatch | Extra instructions for push pointer 0 |
Count pointer push per method call |
This baseline is the forward promise to CSA-201: the peephole optimizer will target exactly these five categories. Record the numbers now; you will compare them to optimized output in CSA-201.
Lab 11.5: Ghidra diagnostic
Load your compiled Counter.vof (or the final linked binary) into Ghidra. Use the RISC-V disassembler. Locate the generated code for Counter.increment.
In diary/week-11.md, write one paragraph answering: does the disassembly match what you expected from reading Counter.vm? Note any instruction that surprised you and explain why your compiler emitted it.
This exercise is the prescribed diagnostic for the symbol-table scope bug described in the Week 11 lecture. If Ghidra shows an unexpected push argument 0 where you expected push this 0, trace back through the symbol table definition order.
Grading
| Component | Points |
|---|---|
symtable.py: two scopes, define/lookup/start_subroutine, correct shadow behavior |
4 |
codegen.py: compiles class/subroutine/statements/expressions correctly |
10 |
Counter.vl end-to-end simulation produces output 3 |
3 |
| Lab 11.4: five-category baseline numbers in diary | 2 |
| Lab 11.5: Ghidra paragraph, honest observation | 1 |