Files
sx/src/sema.zig
2026-02-11 14:22:25 +02:00

1129 lines
43 KiB
Zig

const std = @import("std");
const ast = @import("ast.zig");
const Node = ast.Node;
const Span = ast.Span;
const Type = @import("types.zig").Type;
const errors = @import("errors.zig");
const Diagnostic = errors.Diagnostic;
pub const SymbolKind = enum {
variable,
constant,
function,
enum_type,
struct_type,
type_alias,
param,
namespace,
};
pub const Symbol = struct {
name: []const u8,
kind: SymbolKind,
ty: ?Type,
def_span: Span,
scope_depth: u32,
};
pub const Reference = struct {
span: Span,
symbol_index: u32,
};
pub const FnSignature = struct {
param_types: []const Type,
return_type: Type,
is_variadic: bool = false,
};
pub const StructTypeInfo = struct {
field_names: []const []const u8,
field_types: []const Type,
};
pub const TypeMap = std.AutoHashMap(*const Node, Type);
pub const SemaResult = struct {
symbols: []const Symbol,
references: []const Reference,
diagnostics: []const Diagnostic,
fn_signatures: std.StringHashMap(FnSignature),
struct_types: std.StringHashMap(StructTypeInfo),
enum_types: std.StringHashMap([]const []const u8),
type_aliases: std.StringHashMap([]const u8),
type_map: TypeMap,
};
pub const Analyzer = struct {
allocator: std.mem.Allocator,
symbols: std.ArrayList(Symbol),
references: std.ArrayList(Reference),
diagnostics: std.ArrayList(Diagnostic),
scope_depth: u32,
/// Stack of symbol counts at each scope entry, for popScope cleanup.
scope_starts: std.ArrayList(u32),
// Type registries
fn_signatures: std.StringHashMap(FnSignature),
struct_types: std.StringHashMap(StructTypeInfo),
enum_types: std.StringHashMap([]const []const u8),
type_aliases: std.StringHashMap([]const u8),
type_map: TypeMap,
pub fn init(allocator: std.mem.Allocator) Analyzer {
return .{
.allocator = allocator,
.symbols = std.ArrayList(Symbol).empty,
.references = std.ArrayList(Reference).empty,
.diagnostics = std.ArrayList(Diagnostic).empty,
.scope_depth = 0,
.scope_starts = std.ArrayList(u32).empty,
.fn_signatures = std.StringHashMap(FnSignature).init(allocator),
.struct_types = std.StringHashMap(StructTypeInfo).init(allocator),
.enum_types = std.StringHashMap([]const []const u8).init(allocator),
.type_aliases = std.StringHashMap([]const u8).init(allocator),
.type_map = TypeMap.init(allocator),
};
}
pub fn analyze(self: *Analyzer, root: *Node) !SemaResult {
if (root.data != .root) return error.InvalidRoot;
// Pass 1: Register all top-level declarations so forward references work.
for (root.data.root.decls) |decl| {
try self.registerTopLevelDecl(decl);
}
// Pass 2: Analyze bodies (all top-level names are now in scope).
for (root.data.root.decls) |decl| {
try self.analyzeTopLevelDecl(decl);
}
return .{
.symbols = try self.symbols.toOwnedSlice(self.allocator),
.references = try self.references.toOwnedSlice(self.allocator),
.diagnostics = try self.diagnostics.toOwnedSlice(self.allocator),
.fn_signatures = self.fn_signatures,
.struct_types = self.struct_types,
.enum_types = self.enum_types,
.type_aliases = self.type_aliases,
.type_map = self.type_map,
};
}
/// Pass 1: register the name/kind/type of a top-level declaration without
/// analysing its body or value expression.
fn registerTopLevelDecl(self: *Analyzer, node: *Node) !void {
try self.registerTopLevelDeclPrefixed(node, null);
}
fn registerTopLevelDeclPrefixed(self: *Analyzer, node: *Node, ns_prefix: ?[]const u8) !void {
switch (node.data) {
.fn_decl => |fd| {
const ret_ty = resolveReturnType(fd);
try self.addSymbol(fd.name, .function, ret_ty, node.span);
// Populate fn_signatures registry
var param_types = std.ArrayList(Type).empty;
var has_variadic = false;
for (fd.params) |param| {
const pt = Type.fromTypeExpr(param.type_expr) orelse Type.s(64);
if (param.is_variadic) {
has_variadic = true;
// Variadic param becomes a slice type
const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32";
try param_types.append(self.allocator, .{ .slice_type = .{ .element_name = elem_name } });
} else {
try param_types.append(self.allocator, pt);
}
}
const key = if (ns_prefix) |pfx|
try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ pfx, fd.name })
else
fd.name;
try self.fn_signatures.put(key, .{
.param_types = try param_types.toOwnedSlice(self.allocator),
.return_type = ret_ty orelse .void_type,
.is_variadic = has_variadic,
});
},
.const_decl => |cd| {
const ty = resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value);
const kind = classifyConstDecl(cd);
try self.addSymbol(cd.name, kind, ty, node.span);
// Populate type_aliases registry
if (cd.value.data == .type_expr) {
try self.type_aliases.put(cd.name, cd.value.data.type_expr.name);
}
// Lambda as function
if (cd.value.data == .lambda) {
const lam = cd.value.data.lambda;
var param_types = std.ArrayList(Type).empty;
for (lam.params) |param| {
const pt = Type.fromTypeExpr(param.type_expr) orelse Type.s(64);
try param_types.append(self.allocator, pt);
}
const ret = if (lam.return_type) |rt| Type.fromTypeExpr(rt) orelse .void_type else .void_type;
const key = if (ns_prefix) |pfx|
try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ pfx, cd.name })
else
cd.name;
try self.fn_signatures.put(key, .{
.param_types = try param_types.toOwnedSlice(self.allocator),
.return_type = ret,
});
}
},
.var_decl => |vd| {
const ty = resolveTypeAnnotation(vd.type_annotation);
try self.addSymbol(vd.name, .variable, ty, node.span);
},
.enum_decl => |ed| {
try self.addSymbol(ed.name, .enum_type, .{ .enum_type = ed.name }, node.span);
try self.enum_types.put(ed.name, ed.variants);
},
.struct_decl => |sd| {
try self.addSymbol(sd.name, .struct_type, .{ .struct_type = sd.name }, node.span);
// Populate struct_types registry
var field_types = std.ArrayList(Type).empty;
for (sd.field_types) |ft| {
const resolved = Type.fromTypeExpr(ft) orelse Type.s(64);
try field_types.append(self.allocator, resolved);
}
try self.struct_types.put(sd.name, .{
.field_names = sd.field_names,
.field_types = try field_types.toOwnedSlice(self.allocator),
});
},
.union_decl => |ud| {
try self.addSymbol(ud.name, .enum_type, .{ .union_type = ud.name }, node.span);
},
.namespace_decl => |ns| {
try self.addSymbol(ns.name, .namespace, null, node.span);
// Recurse into namespace decls with qualified prefix (in own scope
// so inner names don't collide with flat imports of the same names)
try self.pushScope();
for (ns.decls) |d| {
try self.registerTopLevelDeclPrefixed(d, ns.name);
}
self.popScope();
},
else => {},
}
}
/// Resolve a type annotation node to a Type.
/// Handles primitives, type_expr, array_type_expr, parameterized_type_expr,
/// type aliases, enum types, and struct types.
pub fn resolveTypeNode(self: *Analyzer, type_node: ?*Node) Type {
if (type_node) |tn| {
if (Type.fromTypeExpr(tn)) |t| return t;
// Array type: [N]T
if (tn.data == .array_type_expr) {
const ate = tn.data.array_type_expr;
const length: u32 = @intCast(ate.length.data.int_literal.value);
const elem_type = self.resolveTypeNode(ate.element_type);
const elem_name = elem_type.displayName(self.allocator) catch return .void_type;
return .{ .array_type = .{ .element_name = elem_name, .length = length } };
}
// Slice type: []T
if (tn.data == .slice_type_expr) {
const ste = tn.data.slice_type_expr;
const elem_type = self.resolveTypeNode(ste.element_type);
const elem_name = elem_type.displayName(self.allocator) catch return .void_type;
return .{ .slice_type = .{ .element_name = elem_name } };
}
// Pointer type: *T
if (tn.data == .pointer_type_expr) {
const pte = tn.data.pointer_type_expr;
const pointee_type = self.resolveTypeNode(pte.pointee_type);
const pointee_name = pointee_type.displayName(self.allocator) catch return .void_type;
return .{ .pointer_type = .{ .pointee_name = pointee_name } };
}
// Many-pointer type: [*]T
if (tn.data == .many_pointer_type_expr) {
const mpte = tn.data.many_pointer_type_expr;
const elem_type = self.resolveTypeNode(mpte.element_type);
const elem_name = elem_type.displayName(self.allocator) catch return .void_type;
return .{ .many_pointer_type = .{ .element_name = elem_name } };
}
// Sema does not resolve generics; codegen handles instantiation
if (tn.data == .parameterized_type_expr) {
return .void_type;
}
// type_expr or identifier — check aliases, enums, structs
if (tn.data == .type_expr or tn.data == .identifier) {
const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name;
if (Type.fromName(name)) |t| return t;
if (self.type_aliases.get(name)) |target| {
if (Type.fromName(target)) |t| return t;
if (self.struct_types.contains(target)) return .{ .struct_type = target };
}
if (self.enum_types.contains(name)) return .{ .enum_type = name };
if (self.struct_types.contains(name)) return .{ .struct_type = name };
}
return .void_type;
}
return .void_type;
}
/// Infer the type of an expression node without LLVM.
/// Uses fn_signatures for call return types, struct_types for field access,
/// symbols for identifier types, and Type.widen for arithmetic promotion.
pub fn inferExprType(self: *Analyzer, node: *const Node) Type {
return switch (node.data) {
.int_literal => Type.s(64),
.float_literal => .f32,
.bool_literal => .boolean,
.string_literal => .string_type,
.insert_expr => .void_type,
.comptime_expr => |ct| self.inferExprType(ct.expr),
.binary_op => |binop| {
switch (binop.op) {
.eq, .neq, .lt, .lte, .gt, .gte, .and_op, .or_op => return .boolean,
else => {
const lhs_ty = self.inferExprType(binop.lhs);
const rhs_ty = self.inferExprType(binop.rhs);
return Type.widen(lhs_ty, rhs_ty);
},
}
},
.chained_comparison => .boolean,
.identifier => |ident| {
// Search symbols backwards for matching name at or above current scope
var i = self.symbols.items.len;
while (i > 0) {
i -= 1;
const sym = self.symbols.items[i];
if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, ident.name)) {
return sym.ty orelse Type.s(64);
}
}
return Type.s(64);
},
.if_expr => |ie| {
return self.inferExprType(ie.then_branch);
},
.block => |blk| {
if (blk.stmts.len > 0) {
return self.inferExprType(blk.stmts[blk.stmts.len - 1]);
}
return .void_type;
},
.match_expr => |me| {
for (me.arms) |arm| {
if (!arm.is_break) return self.inferExprType(arm.body);
}
return .void_type;
},
.call => |call_node| {
const callee_name = self.resolveCalleeName(call_node) orelse return Type.s(64);
// Check fn_signatures registry
if (self.fn_signatures.get(callee_name)) |sig| {
return sig.return_type;
}
// Built-in: sqrt returns same type as argument
const base = if (std.mem.lastIndexOfScalar(u8, callee_name, '.')) |idx| callee_name[idx + 1 ..] else callee_name;
if (std.mem.eql(u8, base, "sqrt")) {
if (call_node.args.len > 0) return self.inferExprType(call_node.args[0]);
return .f32;
}
return Type.s(64);
},
.unary_op => |unop| {
return self.inferExprType(unop.operand);
},
.field_access => |fa| {
const obj_ty = self.inferExprType(fa.object);
if (obj_ty == .string_type) {
if (std.mem.eql(u8, fa.field, "len")) return Type.s(64);
if (std.mem.eql(u8, fa.field, "ptr")) return .string_type;
}
if (obj_ty.isStruct()) {
if (self.struct_types.get(obj_ty.struct_type)) |info| {
for (info.field_names, 0..) |fname, idx| {
if (std.mem.eql(u8, fname, fa.field)) {
return info.field_types[idx];
}
}
}
}
if (obj_ty.isArray()) {
return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(64);
}
return Type.s(64);
},
.index_expr => |ie| {
const obj_ty = self.inferExprType(ie.object);
if (obj_ty == .string_type) return Type.u(8);
if (obj_ty.isArray()) {
return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(64);
}
return Type.s(64);
},
.slice_expr => |se| {
const obj_ty = self.inferExprType(se.object);
if (obj_ty == .string_type) return .string_type;
if (obj_ty.isArray()) return .{ .slice_type = .{ .element_name = obj_ty.array_type.element_name } };
if (obj_ty.isSlice()) return obj_ty;
return .void_type;
},
.while_expr => .void_type,
.for_expr => .void_type,
.spread_expr => .void_type,
.break_expr => .void_type,
.continue_expr => .void_type,
.enum_literal => .{ .enum_type = "" },
.union_literal => |ul| {
if (ul.union_name) |name| return .{ .union_type = name };
return .void_type;
},
.struct_literal => |sl| {
if (sl.struct_name) |name| {
if (self.struct_types.contains(name)) return .{ .struct_type = name };
if (self.type_aliases.get(name)) |target| {
if (self.struct_types.contains(target)) return .{ .struct_type = target };
}
} else if (sl.type_expr) |te| {
// Handle parameterized struct: List(s32).{} parses as call node
if (te.data == .call) {
if (self.resolveCalleeName(te.data.call)) |callee| {
if (self.struct_types.contains(callee)) return .{ .struct_type = callee };
}
}
return self.inferExprType(te);
}
return .void_type;
},
.deref_expr => |de| {
const ptr_ty = self.inferExprType(de.operand);
if (ptr_ty.isPointer()) return ptr_ty.pointerPointeeType() orelse .void_type;
return .void_type;
},
.null_literal => .void_type,
.array_literal => .void_type,
.type_expr => |te| .{ .meta_type = .{ .name = te.name } },
.parameterized_type_expr => |pte| {
if (self.struct_types.contains(pte.name)) return .{ .struct_type = pte.name };
return .void_type;
},
else => .void_type,
};
}
/// Resolve the callee name from a call node (handles identifiers and field_access).
fn resolveCalleeName(self: *Analyzer, call_node: ast.Call) ?[]const u8 {
_ = self;
if (call_node.callee.data == .identifier) {
return call_node.callee.data.identifier.name;
}
if (call_node.callee.data == .field_access) {
const fa = call_node.callee.data.field_access;
if (fa.object.data == .identifier) {
// Return qualified name — caller will look up in fn_signatures
// We can't allocate here easily, so just return the field name
// and let the caller try both qualified and unqualified
return fa.field;
}
}
return null;
}
/// Pass 2: analyse the body/value of a top-level declaration.
/// The symbol itself was already registered in Pass 1.
fn analyzeTopLevelDecl(self: *Analyzer, node: *Node) !void {
switch (node.data) {
.fn_decl => |fd| {
try self.pushScope();
for (fd.params) |param| {
const param_type = Type.fromTypeExpr(param.type_expr);
try self.addSymbol(param.name, .param, param_type, param.name_span);
}
try self.analyzeNode(fd.body);
self.popScope();
},
.const_decl => |cd| {
try self.analyzeNode(cd.value);
},
.var_decl => |vd| {
if (vd.value) |val| {
try self.analyzeNode(val);
}
},
.enum_decl, .struct_decl, .union_decl, .array_type_expr, .slice_type_expr, .array_literal, .parameterized_type_expr, .index_expr, .slice_expr, .insert_expr => {},
.namespace_decl => |ns| {
try self.pushScope();
for (ns.decls) |d| {
try self.registerTopLevelDecl(d);
}
for (ns.decls) |d| {
try self.analyzeTopLevelDecl(d);
}
self.popScope();
},
else => {
try self.analyzeNode(node);
},
}
}
fn pushScope(self: *Analyzer) !void {
try self.scope_starts.append(self.allocator, @intCast(self.symbols.items.len));
self.scope_depth += 1;
}
fn popScope(self: *Analyzer) void {
if (self.scope_starts.items.len > 0) {
_ = self.scope_starts.pop();
self.scope_depth -= 1;
}
}
fn addSymbol(self: *Analyzer, name: []const u8, kind: SymbolKind, ty: ?Type, span: Span) !void {
// Check for duplicate only within the current scope window.
const scope_start: usize = if (self.scope_starts.items.len > 0)
self.scope_starts.items[self.scope_starts.items.len - 1]
else
0;
for (self.symbols.items[scope_start..]) |sym| {
if (sym.scope_depth == self.scope_depth and std.mem.eql(u8, sym.name, name)) {
try self.diagnostics.append(self.allocator, .{
.level = .warn,
.span = span,
.message = "duplicate declaration",
});
break;
}
}
try self.symbols.append(self.allocator, .{
.name = name,
.kind = kind,
.ty = ty,
.def_span = span,
.scope_depth = self.scope_depth,
});
}
fn resolveIdentifier(self: *Analyzer, name: []const u8, span: Span) !void {
// Search backwards to find the most recent declaration with this name
// that is at or above the current scope depth.
var i = self.symbols.items.len;
while (i > 0) {
i -= 1;
const sym = self.symbols.items[i];
if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, name)) {
try self.references.append(self.allocator, .{
.span = span,
.symbol_index = @intCast(i),
});
return;
}
}
// Built-in names that aren't declared in source
if (std.mem.eql(u8, name, "io")) return;
if (std.mem.eql(u8, name, "true") or std.mem.eql(u8, name, "false")) return;
if (std.mem.eql(u8, name, "cast")) return;
try self.diagnostics.append(self.allocator, .{
.level = .warn,
.span = span,
.message = "undefined variable",
});
}
fn analyzeNode(self: *Analyzer, node: *Node) !void {
switch (node.data) {
.fn_decl => |fd| {
try self.addSymbol(fd.name, .function, resolveReturnType(fd), node.span);
try self.pushScope();
// Add params as symbols
for (fd.params) |param| {
const param_type = Type.fromTypeExpr(param.type_expr);
try self.addSymbol(param.name, .param, param_type, param.name_span);
}
try self.analyzeNode(fd.body);
self.popScope();
},
.block => |blk| {
try self.pushScope();
for (blk.stmts) |stmt| {
try self.analyzeNode(stmt);
}
self.popScope();
},
.const_decl => |cd| {
// Analyze value first (so it can't reference itself)
try self.analyzeNode(cd.value);
const ty = resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value);
const kind = classifyConstDecl(cd);
try self.addSymbol(cd.name, kind, ty, node.span);
},
.var_decl => |vd| {
if (vd.value) |val| {
try self.analyzeNode(val);
}
const ty = resolveTypeAnnotation(vd.type_annotation) orelse
if (vd.value) |val| self.inferExprType(val) else null;
try self.addSymbol(vd.name, .variable, ty, node.span);
},
.enum_decl => |ed| {
try self.addSymbol(ed.name, .enum_type, .{ .enum_type = ed.name }, node.span);
},
.struct_decl => |sd| {
try self.addSymbol(sd.name, .struct_type, .{ .struct_type = sd.name }, node.span);
},
.identifier => |id| {
try self.resolveIdentifier(id.name, node.span);
},
.binary_op => |bop| {
try self.analyzeNode(bop.lhs);
try self.analyzeNode(bop.rhs);
},
.chained_comparison => |cc| {
for (cc.operands) |operand| {
try self.analyzeNode(operand);
}
},
.unary_op => |uop| {
try self.analyzeNode(uop.operand);
},
.call => |call| {
try self.analyzeNode(call.callee);
for (call.args) |arg| {
try self.analyzeNode(arg);
}
},
.field_access => |fa| {
try self.analyzeNode(fa.object);
},
.if_expr => |ie| {
try self.analyzeNode(ie.condition);
try self.analyzeNode(ie.then_branch);
if (ie.else_branch) |eb| {
try self.analyzeNode(eb);
}
},
.match_expr => |me| {
try self.analyzeNode(me.subject);
for (me.arms) |arm| {
try self.analyzeNode(arm.body);
}
},
.while_expr => |we| {
try self.analyzeNode(we.condition);
try self.analyzeNode(we.body);
},
.for_expr => |fe| {
try self.analyzeNode(fe.iterable);
try self.analyzeNode(fe.body);
},
.spread_expr => |se| try self.analyzeNode(se.operand),
.break_expr, .continue_expr => {},
.assignment => |asgn| {
try self.analyzeNode(asgn.target);
try self.analyzeNode(asgn.value);
},
.return_stmt => |ret| {
if (ret.value) |val| {
try self.analyzeNode(val);
}
},
.defer_stmt => |ds| {
try self.analyzeNode(ds.expr);
},
.comptime_expr => |ct| {
try self.analyzeNode(ct.expr);
},
.insert_expr => |ins| {
try self.analyzeNode(ins.expr);
},
.lambda => |lam| {
try self.pushScope();
for (lam.params) |param| {
const param_type = Type.fromTypeExpr(param.type_expr);
try self.addSymbol(param.name, .param, param_type, param.name_span);
}
try self.analyzeNode(lam.body);
self.popScope();
},
.struct_literal => |sl| {
if (sl.type_expr) |te| try self.analyzeNode(te);
for (sl.field_inits) |fi| {
try self.analyzeNode(fi.value);
}
},
.union_decl => |ud| {
try self.addSymbol(ud.name, .enum_type, .{ .union_type = ud.name }, node.span);
},
.union_literal => |ul| {
if (ul.payload) |p| {
try self.analyzeNode(p);
}
},
// Leaf nodes — nothing to recurse into
.int_literal,
.float_literal,
.bool_literal,
.string_literal,
.enum_literal,
.type_expr,
.param,
.match_arm,
.undef_literal,
.builtin_expr,
.import_decl,
.array_type_expr,
.slice_type_expr,
.pointer_type_expr,
.many_pointer_type_expr,
.null_literal,
.array_literal,
.parameterized_type_expr,
.index_expr,
.slice_expr,
=> {},
.deref_expr => |de| {
try self.analyzeNode(de.operand);
},
.namespace_decl => |ns| {
for (ns.decls) |d| {
try self.analyzeNode(d);
}
},
.root => {
// Should not appear nested
},
}
// Populate TypeMap for expression nodes
switch (node.data) {
.int_literal,
.float_literal,
.bool_literal,
.string_literal,
.identifier,
.binary_op,
.chained_comparison,
.unary_op,
.call,
.field_access,
.if_expr,
.match_expr,
.block,
.comptime_expr,
.enum_literal,
.struct_literal,
.union_literal,
.array_literal,
.index_expr,
.slice_expr,
.deref_expr,
.null_literal,
.type_expr,
.insert_expr,
.while_expr,
.for_expr,
.spread_expr,
.break_expr,
.continue_expr,
=> {
const ty = self.inferExprType(node);
self.type_map.put(node, ty) catch {};
},
else => {},
}
}
fn resolveReturnType(fd: ast.FnDecl) ?Type {
if (fd.return_type) |rt| {
return Type.fromTypeExpr(rt);
}
return null;
}
fn resolveTypeAnnotation(type_node: ?*Node) ?Type {
if (type_node) |tn| {
return Type.fromTypeExpr(tn);
}
return null;
}
fn inferValueType(value: *Node) ?Type {
return switch (value.data) {
.int_literal => Type.s(64),
.float_literal => .f64,
.bool_literal => .boolean,
.string_literal => .string_type,
.type_expr => null, // type alias — no value type
.lambda => null,
.comptime_expr => null,
.insert_expr => null,
else => null,
};
}
fn classifyConstDecl(cd: ast.ConstDecl) SymbolKind {
return switch (cd.value.data) {
.type_expr => .type_alias,
.lambda => .function,
else => .constant,
};
}
};
/// Convenience: parse and analyze in one call.
pub fn analyzeSource(allocator: std.mem.Allocator, root: *Node) !SemaResult {
var analyzer = Analyzer.init(allocator);
return analyzer.analyze(root);
}
/// Find the symbol whose definition span contains the given byte offset.
pub fn findSymbolAtOffset(symbols: []const Symbol, offset: u32) ?usize {
for (symbols, 0..) |sym, i| {
if (offset >= sym.def_span.start and offset < sym.def_span.end) {
return i;
}
}
return null;
}
/// Find the reference at the given byte offset.
pub fn findReferenceAtOffset(references: []const Reference, offset: u32) ?usize {
for (references, 0..) |ref_, i| {
if (offset >= ref_.span.start and offset < ref_.span.end) {
return i;
}
}
return null;
}
/// Walk the AST to find the innermost node whose span contains the offset.
pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node {
if (offset < node.span.start or offset >= node.span.end) return null;
// Try to find a more specific child node
switch (node.data) {
.root => |r| {
for (r.decls) |decl| {
if (findNodeAtOffset(decl, offset)) |found| return found;
}
},
.fn_decl => |fd| {
if (fd.return_type) |rt| {
if (findNodeAtOffset(rt, offset)) |found| return found;
}
if (findNodeAtOffset(fd.body, offset)) |found| return found;
},
.block => |blk| {
for (blk.stmts) |stmt| {
if (findNodeAtOffset(stmt, offset)) |found| return found;
}
},
.const_decl => |cd| {
if (cd.type_annotation) |ta| {
if (findNodeAtOffset(ta, offset)) |found| return found;
}
if (findNodeAtOffset(cd.value, offset)) |found| return found;
},
.var_decl => |vd| {
if (vd.type_annotation) |ta| {
if (findNodeAtOffset(ta, offset)) |found| return found;
}
if (vd.value) |val| {
if (findNodeAtOffset(val, offset)) |found| return found;
}
},
.binary_op => |bop| {
if (findNodeAtOffset(bop.lhs, offset)) |found| return found;
if (findNodeAtOffset(bop.rhs, offset)) |found| return found;
},
.chained_comparison => |cc| {
for (cc.operands) |operand| {
if (findNodeAtOffset(operand, offset)) |found| return found;
}
},
.unary_op => |uop| {
if (findNodeAtOffset(uop.operand, offset)) |found| return found;
},
.call => |call| {
if (findNodeAtOffset(call.callee, offset)) |found| return found;
for (call.args) |arg| {
if (findNodeAtOffset(arg, offset)) |found| return found;
}
},
.field_access => |fa| {
if (findNodeAtOffset(fa.object, offset)) |found| return found;
},
.if_expr => |ie| {
if (findNodeAtOffset(ie.condition, offset)) |found| return found;
if (findNodeAtOffset(ie.then_branch, offset)) |found| return found;
if (ie.else_branch) |eb| {
if (findNodeAtOffset(eb, offset)) |found| return found;
}
},
.match_expr => |me| {
if (findNodeAtOffset(me.subject, offset)) |found| return found;
for (me.arms) |arm| {
if (findNodeAtOffset(arm.body, offset)) |found| return found;
if (arm.pattern) |pat| {
if (findNodeAtOffset(pat, offset)) |found| return found;
}
}
},
.while_expr => |we| {
if (findNodeAtOffset(we.condition, offset)) |found| return found;
if (findNodeAtOffset(we.body, offset)) |found| return found;
},
.for_expr => |fe| {
if (findNodeAtOffset(fe.iterable, offset)) |found| return found;
if (findNodeAtOffset(fe.body, offset)) |found| return found;
},
.spread_expr => |se| {
if (findNodeAtOffset(se.operand, offset)) |found| return found;
},
.break_expr, .continue_expr => {},
.assignment => |asgn| {
if (findNodeAtOffset(asgn.target, offset)) |found| return found;
if (findNodeAtOffset(asgn.value, offset)) |found| return found;
},
.return_stmt => |ret| {
if (ret.value) |val| {
if (findNodeAtOffset(val, offset)) |found| return found;
}
},
.defer_stmt => |ds| {
if (findNodeAtOffset(ds.expr, offset)) |found| return found;
},
.comptime_expr => |ct| {
if (findNodeAtOffset(ct.expr, offset)) |found| return found;
},
.insert_expr => |ins| {
if (findNodeAtOffset(ins.expr, offset)) |found| return found;
},
.lambda => |lam| {
if (findNodeAtOffset(lam.body, offset)) |found| return found;
},
.struct_literal => |sl| {
for (sl.field_inits) |fi| {
if (findNodeAtOffset(fi.value, offset)) |found| return found;
}
},
.union_literal => |ul| {
if (ul.payload) |p| {
if (findNodeAtOffset(p, offset)) |found| return found;
}
},
// Leaf nodes
.identifier,
.int_literal,
.float_literal,
.bool_literal,
.string_literal,
.enum_literal,
.type_expr,
.param,
.match_arm,
.undef_literal,
.builtin_expr,
.enum_decl,
.struct_decl,
.union_decl,
.import_decl,
.array_type_expr,
.slice_type_expr,
.pointer_type_expr,
.many_pointer_type_expr,
.null_literal,
.array_literal,
.parameterized_type_expr,
.index_expr,
.slice_expr,
=> {},
.deref_expr => |de| {
if (findNodeAtOffset(de.operand, offset)) |found| return found;
},
.namespace_decl => |ns| {
for (ns.decls) |d| {
if (findNodeAtOffset(d, offset)) |found| return found;
}
},
}
return node;
}
test "sema: collect top-level declarations" {
const parser_mod = @import("parser.zig");
const source = "main :: () { 42; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Should have one symbol: main (function)
try std.testing.expectEqual(@as(usize, 1), result.symbols.len);
try std.testing.expectEqualStrings("main", result.symbols[0].name);
try std.testing.expectEqual(SymbolKind.function, result.symbols[0].kind);
}
test "sema: function params as symbols" {
const parser_mod = @import("parser.zig");
const source = "add :: (a: s32, b: s32) -> s32 { a + b; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Symbols: add (function), a (param), b (param)
try std.testing.expectEqual(@as(usize, 3), result.symbols.len);
try std.testing.expectEqualStrings("add", result.symbols[0].name);
try std.testing.expectEqual(SymbolKind.function, result.symbols[0].kind);
try std.testing.expectEqualStrings("a", result.symbols[1].name);
try std.testing.expectEqual(SymbolKind.param, result.symbols[1].kind);
try std.testing.expectEqualStrings("b", result.symbols[2].name);
try std.testing.expectEqual(SymbolKind.param, result.symbols[2].kind);
// References: a and b used in body should be resolved
try std.testing.expect(result.references.len >= 2);
}
test "sema: variable declaration and reference" {
const parser_mod = @import("parser.zig");
const source = "main :: () { x := 42; x; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Symbols: main (function), x (variable)
try std.testing.expectEqual(@as(usize, 2), result.symbols.len);
try std.testing.expectEqualStrings("main", result.symbols[0].name);
try std.testing.expectEqualStrings("x", result.symbols[1].name);
try std.testing.expectEqual(SymbolKind.variable, result.symbols[1].kind);
// x should have a reference
try std.testing.expect(result.references.len >= 1);
// The reference should point to symbol index 1 (x)
try std.testing.expectEqual(@as(u32, 1), result.references[0].symbol_index);
}
test "sema: undefined variable diagnostic" {
const parser_mod = @import("parser.zig");
const source = "main :: () { y; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Should have a diagnostic for undefined 'y'
try std.testing.expect(result.diagnostics.len >= 1);
try std.testing.expectEqualStrings("undefined variable", result.diagnostics[0].message);
}
test "sema: enum and struct declarations" {
const parser_mod = @import("parser.zig");
const source = "Color :: enum { red; green; blue; } Vec2 :: struct { x, y: f32; } main :: () { 0; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Symbols: Color (enum), Vec2 (struct), main (function)
try std.testing.expectEqual(@as(usize, 3), result.symbols.len);
try std.testing.expectEqualStrings("Color", result.symbols[0].name);
try std.testing.expectEqual(SymbolKind.enum_type, result.symbols[0].kind);
try std.testing.expectEqualStrings("Vec2", result.symbols[1].name);
try std.testing.expectEqual(SymbolKind.struct_type, result.symbols[1].kind);
try std.testing.expectEqualStrings("main", result.symbols[2].name);
}
test "sema: var_decl infers struct type from parameterized struct literal" {
const parser_mod = @import("parser.zig");
const source = "List :: struct { len: s64; } main :: () { list := List.{}; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Find the 'list' variable symbol
var found_list = false;
for (result.symbols) |sym| {
if (std.mem.eql(u8, sym.name, "list")) {
found_list = true;
try std.testing.expectEqual(SymbolKind.variable, sym.kind);
// Must have inferred struct type
const ty = sym.ty orelse return error.TestUnexpectedResult;
try std.testing.expect(ty == .struct_type);
try std.testing.expectEqualStrings("List", ty.struct_type);
break;
}
}
try std.testing.expect(found_list);
}
test "sema: var_decl infers struct type from parameterized call literal" {
const parser_mod = @import("parser.zig");
// List(s32).{} — parser produces struct_literal with type_expr = call node
const source = "List :: struct { len: s64; } main :: () { list := List(s32).{}; }";
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var parser = parser_mod.Parser.init(alloc, source);
const root = try parser.parse();
var analyzer = Analyzer.init(alloc);
const result = try analyzer.analyze(root);
// Find the 'list' variable symbol
var found_list = false;
for (result.symbols) |sym| {
if (std.mem.eql(u8, sym.name, "list")) {
found_list = true;
try std.testing.expectEqual(SymbolKind.variable, sym.kind);
const ty = sym.ty orelse return error.TestUnexpectedResult;
try std.testing.expect(ty == .struct_type);
try std.testing.expectEqualStrings("List", ty.struct_type);
break;
}
}
try std.testing.expect(found_list);
}