1129 lines
43 KiB
Zig
1129 lines
43 KiB
Zig
const std = @import("std");
|
|
const ast = @import("ast.zig");
|
|
const Node = ast.Node;
|
|
const Span = ast.Span;
|
|
const Type = @import("types.zig").Type;
|
|
const errors = @import("errors.zig");
|
|
const Diagnostic = errors.Diagnostic;
|
|
|
|
pub const SymbolKind = enum {
|
|
variable,
|
|
constant,
|
|
function,
|
|
enum_type,
|
|
struct_type,
|
|
type_alias,
|
|
param,
|
|
namespace,
|
|
};
|
|
|
|
pub const Symbol = struct {
|
|
name: []const u8,
|
|
kind: SymbolKind,
|
|
ty: ?Type,
|
|
def_span: Span,
|
|
scope_depth: u32,
|
|
};
|
|
|
|
pub const Reference = struct {
|
|
span: Span,
|
|
symbol_index: u32,
|
|
};
|
|
|
|
pub const FnSignature = struct {
|
|
param_types: []const Type,
|
|
return_type: Type,
|
|
is_variadic: bool = false,
|
|
};
|
|
|
|
pub const StructTypeInfo = struct {
|
|
field_names: []const []const u8,
|
|
field_types: []const Type,
|
|
};
|
|
|
|
pub const TypeMap = std.AutoHashMap(*const Node, Type);
|
|
|
|
pub const SemaResult = struct {
|
|
symbols: []const Symbol,
|
|
references: []const Reference,
|
|
diagnostics: []const Diagnostic,
|
|
fn_signatures: std.StringHashMap(FnSignature),
|
|
struct_types: std.StringHashMap(StructTypeInfo),
|
|
enum_types: std.StringHashMap([]const []const u8),
|
|
type_aliases: std.StringHashMap([]const u8),
|
|
type_map: TypeMap,
|
|
};
|
|
|
|
pub const Analyzer = struct {
|
|
allocator: std.mem.Allocator,
|
|
symbols: std.ArrayList(Symbol),
|
|
references: std.ArrayList(Reference),
|
|
diagnostics: std.ArrayList(Diagnostic),
|
|
scope_depth: u32,
|
|
/// Stack of symbol counts at each scope entry, for popScope cleanup.
|
|
scope_starts: std.ArrayList(u32),
|
|
// Type registries
|
|
fn_signatures: std.StringHashMap(FnSignature),
|
|
struct_types: std.StringHashMap(StructTypeInfo),
|
|
enum_types: std.StringHashMap([]const []const u8),
|
|
type_aliases: std.StringHashMap([]const u8),
|
|
type_map: TypeMap,
|
|
|
|
pub fn init(allocator: std.mem.Allocator) Analyzer {
|
|
return .{
|
|
.allocator = allocator,
|
|
.symbols = std.ArrayList(Symbol).empty,
|
|
.references = std.ArrayList(Reference).empty,
|
|
.diagnostics = std.ArrayList(Diagnostic).empty,
|
|
.scope_depth = 0,
|
|
.scope_starts = std.ArrayList(u32).empty,
|
|
.fn_signatures = std.StringHashMap(FnSignature).init(allocator),
|
|
.struct_types = std.StringHashMap(StructTypeInfo).init(allocator),
|
|
.enum_types = std.StringHashMap([]const []const u8).init(allocator),
|
|
.type_aliases = std.StringHashMap([]const u8).init(allocator),
|
|
.type_map = TypeMap.init(allocator),
|
|
};
|
|
}
|
|
|
|
pub fn analyze(self: *Analyzer, root: *Node) !SemaResult {
|
|
if (root.data != .root) return error.InvalidRoot;
|
|
|
|
// Pass 1: Register all top-level declarations so forward references work.
|
|
for (root.data.root.decls) |decl| {
|
|
try self.registerTopLevelDecl(decl);
|
|
}
|
|
|
|
// Pass 2: Analyze bodies (all top-level names are now in scope).
|
|
for (root.data.root.decls) |decl| {
|
|
try self.analyzeTopLevelDecl(decl);
|
|
}
|
|
|
|
return .{
|
|
.symbols = try self.symbols.toOwnedSlice(self.allocator),
|
|
.references = try self.references.toOwnedSlice(self.allocator),
|
|
.diagnostics = try self.diagnostics.toOwnedSlice(self.allocator),
|
|
.fn_signatures = self.fn_signatures,
|
|
.struct_types = self.struct_types,
|
|
.enum_types = self.enum_types,
|
|
.type_aliases = self.type_aliases,
|
|
.type_map = self.type_map,
|
|
};
|
|
}
|
|
|
|
/// Pass 1: register the name/kind/type of a top-level declaration without
|
|
/// analysing its body or value expression.
|
|
fn registerTopLevelDecl(self: *Analyzer, node: *Node) !void {
|
|
try self.registerTopLevelDeclPrefixed(node, null);
|
|
}
|
|
|
|
fn registerTopLevelDeclPrefixed(self: *Analyzer, node: *Node, ns_prefix: ?[]const u8) !void {
|
|
switch (node.data) {
|
|
.fn_decl => |fd| {
|
|
const ret_ty = resolveReturnType(fd);
|
|
try self.addSymbol(fd.name, .function, ret_ty, node.span);
|
|
// Populate fn_signatures registry
|
|
var param_types = std.ArrayList(Type).empty;
|
|
var has_variadic = false;
|
|
for (fd.params) |param| {
|
|
const pt = Type.fromTypeExpr(param.type_expr) orelse Type.s(64);
|
|
if (param.is_variadic) {
|
|
has_variadic = true;
|
|
// Variadic param becomes a slice type
|
|
const elem_name = if (param.type_expr.data == .type_expr) param.type_expr.data.type_expr.name else "s32";
|
|
try param_types.append(self.allocator, .{ .slice_type = .{ .element_name = elem_name } });
|
|
} else {
|
|
try param_types.append(self.allocator, pt);
|
|
}
|
|
}
|
|
const key = if (ns_prefix) |pfx|
|
|
try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ pfx, fd.name })
|
|
else
|
|
fd.name;
|
|
try self.fn_signatures.put(key, .{
|
|
.param_types = try param_types.toOwnedSlice(self.allocator),
|
|
.return_type = ret_ty orelse .void_type,
|
|
.is_variadic = has_variadic,
|
|
});
|
|
},
|
|
.const_decl => |cd| {
|
|
const ty = resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value);
|
|
const kind = classifyConstDecl(cd);
|
|
try self.addSymbol(cd.name, kind, ty, node.span);
|
|
// Populate type_aliases registry
|
|
if (cd.value.data == .type_expr) {
|
|
try self.type_aliases.put(cd.name, cd.value.data.type_expr.name);
|
|
}
|
|
// Lambda as function
|
|
if (cd.value.data == .lambda) {
|
|
const lam = cd.value.data.lambda;
|
|
var param_types = std.ArrayList(Type).empty;
|
|
for (lam.params) |param| {
|
|
const pt = Type.fromTypeExpr(param.type_expr) orelse Type.s(64);
|
|
try param_types.append(self.allocator, pt);
|
|
}
|
|
const ret = if (lam.return_type) |rt| Type.fromTypeExpr(rt) orelse .void_type else .void_type;
|
|
const key = if (ns_prefix) |pfx|
|
|
try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ pfx, cd.name })
|
|
else
|
|
cd.name;
|
|
try self.fn_signatures.put(key, .{
|
|
.param_types = try param_types.toOwnedSlice(self.allocator),
|
|
.return_type = ret,
|
|
});
|
|
}
|
|
},
|
|
.var_decl => |vd| {
|
|
const ty = resolveTypeAnnotation(vd.type_annotation);
|
|
try self.addSymbol(vd.name, .variable, ty, node.span);
|
|
},
|
|
.enum_decl => |ed| {
|
|
try self.addSymbol(ed.name, .enum_type, .{ .enum_type = ed.name }, node.span);
|
|
try self.enum_types.put(ed.name, ed.variants);
|
|
},
|
|
.struct_decl => |sd| {
|
|
try self.addSymbol(sd.name, .struct_type, .{ .struct_type = sd.name }, node.span);
|
|
// Populate struct_types registry
|
|
var field_types = std.ArrayList(Type).empty;
|
|
for (sd.field_types) |ft| {
|
|
const resolved = Type.fromTypeExpr(ft) orelse Type.s(64);
|
|
try field_types.append(self.allocator, resolved);
|
|
}
|
|
try self.struct_types.put(sd.name, .{
|
|
.field_names = sd.field_names,
|
|
.field_types = try field_types.toOwnedSlice(self.allocator),
|
|
});
|
|
},
|
|
.union_decl => |ud| {
|
|
try self.addSymbol(ud.name, .enum_type, .{ .union_type = ud.name }, node.span);
|
|
},
|
|
.namespace_decl => |ns| {
|
|
try self.addSymbol(ns.name, .namespace, null, node.span);
|
|
// Recurse into namespace decls with qualified prefix (in own scope
|
|
// so inner names don't collide with flat imports of the same names)
|
|
try self.pushScope();
|
|
for (ns.decls) |d| {
|
|
try self.registerTopLevelDeclPrefixed(d, ns.name);
|
|
}
|
|
self.popScope();
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
/// Resolve a type annotation node to a Type.
|
|
/// Handles primitives, type_expr, array_type_expr, parameterized_type_expr,
|
|
/// type aliases, enum types, and struct types.
|
|
pub fn resolveTypeNode(self: *Analyzer, type_node: ?*Node) Type {
|
|
if (type_node) |tn| {
|
|
if (Type.fromTypeExpr(tn)) |t| return t;
|
|
// Array type: [N]T
|
|
if (tn.data == .array_type_expr) {
|
|
const ate = tn.data.array_type_expr;
|
|
const length: u32 = @intCast(ate.length.data.int_literal.value);
|
|
const elem_type = self.resolveTypeNode(ate.element_type);
|
|
const elem_name = elem_type.displayName(self.allocator) catch return .void_type;
|
|
return .{ .array_type = .{ .element_name = elem_name, .length = length } };
|
|
}
|
|
// Slice type: []T
|
|
if (tn.data == .slice_type_expr) {
|
|
const ste = tn.data.slice_type_expr;
|
|
const elem_type = self.resolveTypeNode(ste.element_type);
|
|
const elem_name = elem_type.displayName(self.allocator) catch return .void_type;
|
|
return .{ .slice_type = .{ .element_name = elem_name } };
|
|
}
|
|
// Pointer type: *T
|
|
if (tn.data == .pointer_type_expr) {
|
|
const pte = tn.data.pointer_type_expr;
|
|
const pointee_type = self.resolveTypeNode(pte.pointee_type);
|
|
const pointee_name = pointee_type.displayName(self.allocator) catch return .void_type;
|
|
return .{ .pointer_type = .{ .pointee_name = pointee_name } };
|
|
}
|
|
// Many-pointer type: [*]T
|
|
if (tn.data == .many_pointer_type_expr) {
|
|
const mpte = tn.data.many_pointer_type_expr;
|
|
const elem_type = self.resolveTypeNode(mpte.element_type);
|
|
const elem_name = elem_type.displayName(self.allocator) catch return .void_type;
|
|
return .{ .many_pointer_type = .{ .element_name = elem_name } };
|
|
}
|
|
// Sema does not resolve generics; codegen handles instantiation
|
|
if (tn.data == .parameterized_type_expr) {
|
|
return .void_type;
|
|
}
|
|
// type_expr or identifier — check aliases, enums, structs
|
|
if (tn.data == .type_expr or tn.data == .identifier) {
|
|
const name = if (tn.data == .type_expr) tn.data.type_expr.name else tn.data.identifier.name;
|
|
if (Type.fromName(name)) |t| return t;
|
|
if (self.type_aliases.get(name)) |target| {
|
|
if (Type.fromName(target)) |t| return t;
|
|
if (self.struct_types.contains(target)) return .{ .struct_type = target };
|
|
}
|
|
if (self.enum_types.contains(name)) return .{ .enum_type = name };
|
|
if (self.struct_types.contains(name)) return .{ .struct_type = name };
|
|
}
|
|
return .void_type;
|
|
}
|
|
return .void_type;
|
|
}
|
|
|
|
/// Infer the type of an expression node without LLVM.
|
|
/// Uses fn_signatures for call return types, struct_types for field access,
|
|
/// symbols for identifier types, and Type.widen for arithmetic promotion.
|
|
pub fn inferExprType(self: *Analyzer, node: *const Node) Type {
|
|
return switch (node.data) {
|
|
.int_literal => Type.s(64),
|
|
.float_literal => .f32,
|
|
.bool_literal => .boolean,
|
|
.string_literal => .string_type,
|
|
.insert_expr => .void_type,
|
|
.comptime_expr => |ct| self.inferExprType(ct.expr),
|
|
.binary_op => |binop| {
|
|
switch (binop.op) {
|
|
.eq, .neq, .lt, .lte, .gt, .gte, .and_op, .or_op => return .boolean,
|
|
else => {
|
|
const lhs_ty = self.inferExprType(binop.lhs);
|
|
const rhs_ty = self.inferExprType(binop.rhs);
|
|
return Type.widen(lhs_ty, rhs_ty);
|
|
},
|
|
}
|
|
},
|
|
.chained_comparison => .boolean,
|
|
.identifier => |ident| {
|
|
// Search symbols backwards for matching name at or above current scope
|
|
var i = self.symbols.items.len;
|
|
while (i > 0) {
|
|
i -= 1;
|
|
const sym = self.symbols.items[i];
|
|
if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, ident.name)) {
|
|
return sym.ty orelse Type.s(64);
|
|
}
|
|
}
|
|
return Type.s(64);
|
|
},
|
|
.if_expr => |ie| {
|
|
return self.inferExprType(ie.then_branch);
|
|
},
|
|
.block => |blk| {
|
|
if (blk.stmts.len > 0) {
|
|
return self.inferExprType(blk.stmts[blk.stmts.len - 1]);
|
|
}
|
|
return .void_type;
|
|
},
|
|
.match_expr => |me| {
|
|
for (me.arms) |arm| {
|
|
if (!arm.is_break) return self.inferExprType(arm.body);
|
|
}
|
|
return .void_type;
|
|
},
|
|
.call => |call_node| {
|
|
const callee_name = self.resolveCalleeName(call_node) orelse return Type.s(64);
|
|
// Check fn_signatures registry
|
|
if (self.fn_signatures.get(callee_name)) |sig| {
|
|
return sig.return_type;
|
|
}
|
|
// Built-in: sqrt returns same type as argument
|
|
const base = if (std.mem.lastIndexOfScalar(u8, callee_name, '.')) |idx| callee_name[idx + 1 ..] else callee_name;
|
|
if (std.mem.eql(u8, base, "sqrt")) {
|
|
if (call_node.args.len > 0) return self.inferExprType(call_node.args[0]);
|
|
return .f32;
|
|
}
|
|
return Type.s(64);
|
|
},
|
|
.unary_op => |unop| {
|
|
return self.inferExprType(unop.operand);
|
|
},
|
|
.field_access => |fa| {
|
|
const obj_ty = self.inferExprType(fa.object);
|
|
if (obj_ty == .string_type) {
|
|
if (std.mem.eql(u8, fa.field, "len")) return Type.s(64);
|
|
if (std.mem.eql(u8, fa.field, "ptr")) return .string_type;
|
|
}
|
|
if (obj_ty.isStruct()) {
|
|
if (self.struct_types.get(obj_ty.struct_type)) |info| {
|
|
for (info.field_names, 0..) |fname, idx| {
|
|
if (std.mem.eql(u8, fname, fa.field)) {
|
|
return info.field_types[idx];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (obj_ty.isArray()) {
|
|
return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(64);
|
|
}
|
|
return Type.s(64);
|
|
},
|
|
.index_expr => |ie| {
|
|
const obj_ty = self.inferExprType(ie.object);
|
|
if (obj_ty == .string_type) return Type.u(8);
|
|
if (obj_ty.isArray()) {
|
|
return Type.fromName(obj_ty.array_type.element_name) orelse Type.s(64);
|
|
}
|
|
return Type.s(64);
|
|
},
|
|
.slice_expr => |se| {
|
|
const obj_ty = self.inferExprType(se.object);
|
|
if (obj_ty == .string_type) return .string_type;
|
|
if (obj_ty.isArray()) return .{ .slice_type = .{ .element_name = obj_ty.array_type.element_name } };
|
|
if (obj_ty.isSlice()) return obj_ty;
|
|
return .void_type;
|
|
},
|
|
.while_expr => .void_type,
|
|
.for_expr => .void_type,
|
|
.spread_expr => .void_type,
|
|
.break_expr => .void_type,
|
|
.continue_expr => .void_type,
|
|
.enum_literal => .{ .enum_type = "" },
|
|
.union_literal => |ul| {
|
|
if (ul.union_name) |name| return .{ .union_type = name };
|
|
return .void_type;
|
|
},
|
|
.struct_literal => |sl| {
|
|
if (sl.struct_name) |name| {
|
|
if (self.struct_types.contains(name)) return .{ .struct_type = name };
|
|
if (self.type_aliases.get(name)) |target| {
|
|
if (self.struct_types.contains(target)) return .{ .struct_type = target };
|
|
}
|
|
} else if (sl.type_expr) |te| {
|
|
// Handle parameterized struct: List(s32).{} parses as call node
|
|
if (te.data == .call) {
|
|
if (self.resolveCalleeName(te.data.call)) |callee| {
|
|
if (self.struct_types.contains(callee)) return .{ .struct_type = callee };
|
|
}
|
|
}
|
|
return self.inferExprType(te);
|
|
}
|
|
return .void_type;
|
|
},
|
|
.deref_expr => |de| {
|
|
const ptr_ty = self.inferExprType(de.operand);
|
|
if (ptr_ty.isPointer()) return ptr_ty.pointerPointeeType() orelse .void_type;
|
|
return .void_type;
|
|
},
|
|
.null_literal => .void_type,
|
|
.array_literal => .void_type,
|
|
.type_expr => |te| .{ .meta_type = .{ .name = te.name } },
|
|
.parameterized_type_expr => |pte| {
|
|
if (self.struct_types.contains(pte.name)) return .{ .struct_type = pte.name };
|
|
return .void_type;
|
|
},
|
|
else => .void_type,
|
|
};
|
|
}
|
|
|
|
/// Resolve the callee name from a call node (handles identifiers and field_access).
|
|
fn resolveCalleeName(self: *Analyzer, call_node: ast.Call) ?[]const u8 {
|
|
_ = self;
|
|
if (call_node.callee.data == .identifier) {
|
|
return call_node.callee.data.identifier.name;
|
|
}
|
|
if (call_node.callee.data == .field_access) {
|
|
const fa = call_node.callee.data.field_access;
|
|
if (fa.object.data == .identifier) {
|
|
// Return qualified name — caller will look up in fn_signatures
|
|
// We can't allocate here easily, so just return the field name
|
|
// and let the caller try both qualified and unqualified
|
|
return fa.field;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/// Pass 2: analyse the body/value of a top-level declaration.
|
|
/// The symbol itself was already registered in Pass 1.
|
|
fn analyzeTopLevelDecl(self: *Analyzer, node: *Node) !void {
|
|
switch (node.data) {
|
|
.fn_decl => |fd| {
|
|
try self.pushScope();
|
|
for (fd.params) |param| {
|
|
const param_type = Type.fromTypeExpr(param.type_expr);
|
|
try self.addSymbol(param.name, .param, param_type, param.name_span);
|
|
}
|
|
try self.analyzeNode(fd.body);
|
|
self.popScope();
|
|
},
|
|
.const_decl => |cd| {
|
|
try self.analyzeNode(cd.value);
|
|
},
|
|
.var_decl => |vd| {
|
|
if (vd.value) |val| {
|
|
try self.analyzeNode(val);
|
|
}
|
|
},
|
|
.enum_decl, .struct_decl, .union_decl, .array_type_expr, .slice_type_expr, .array_literal, .parameterized_type_expr, .index_expr, .slice_expr, .insert_expr => {},
|
|
.namespace_decl => |ns| {
|
|
try self.pushScope();
|
|
for (ns.decls) |d| {
|
|
try self.registerTopLevelDecl(d);
|
|
}
|
|
for (ns.decls) |d| {
|
|
try self.analyzeTopLevelDecl(d);
|
|
}
|
|
self.popScope();
|
|
},
|
|
else => {
|
|
try self.analyzeNode(node);
|
|
},
|
|
}
|
|
}
|
|
|
|
fn pushScope(self: *Analyzer) !void {
|
|
try self.scope_starts.append(self.allocator, @intCast(self.symbols.items.len));
|
|
self.scope_depth += 1;
|
|
}
|
|
|
|
fn popScope(self: *Analyzer) void {
|
|
if (self.scope_starts.items.len > 0) {
|
|
_ = self.scope_starts.pop();
|
|
self.scope_depth -= 1;
|
|
}
|
|
}
|
|
|
|
fn addSymbol(self: *Analyzer, name: []const u8, kind: SymbolKind, ty: ?Type, span: Span) !void {
|
|
// Check for duplicate only within the current scope window.
|
|
const scope_start: usize = if (self.scope_starts.items.len > 0)
|
|
self.scope_starts.items[self.scope_starts.items.len - 1]
|
|
else
|
|
0;
|
|
for (self.symbols.items[scope_start..]) |sym| {
|
|
if (sym.scope_depth == self.scope_depth and std.mem.eql(u8, sym.name, name)) {
|
|
try self.diagnostics.append(self.allocator, .{
|
|
.level = .warn,
|
|
.span = span,
|
|
.message = "duplicate declaration",
|
|
});
|
|
break;
|
|
}
|
|
}
|
|
|
|
try self.symbols.append(self.allocator, .{
|
|
.name = name,
|
|
.kind = kind,
|
|
.ty = ty,
|
|
.def_span = span,
|
|
.scope_depth = self.scope_depth,
|
|
});
|
|
}
|
|
|
|
fn resolveIdentifier(self: *Analyzer, name: []const u8, span: Span) !void {
|
|
// Search backwards to find the most recent declaration with this name
|
|
// that is at or above the current scope depth.
|
|
var i = self.symbols.items.len;
|
|
while (i > 0) {
|
|
i -= 1;
|
|
const sym = self.symbols.items[i];
|
|
if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, name)) {
|
|
try self.references.append(self.allocator, .{
|
|
.span = span,
|
|
.symbol_index = @intCast(i),
|
|
});
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Built-in names that aren't declared in source
|
|
if (std.mem.eql(u8, name, "io")) return;
|
|
if (std.mem.eql(u8, name, "true") or std.mem.eql(u8, name, "false")) return;
|
|
if (std.mem.eql(u8, name, "cast")) return;
|
|
|
|
try self.diagnostics.append(self.allocator, .{
|
|
.level = .warn,
|
|
.span = span,
|
|
.message = "undefined variable",
|
|
});
|
|
}
|
|
|
|
fn analyzeNode(self: *Analyzer, node: *Node) !void {
|
|
switch (node.data) {
|
|
.fn_decl => |fd| {
|
|
try self.addSymbol(fd.name, .function, resolveReturnType(fd), node.span);
|
|
try self.pushScope();
|
|
// Add params as symbols
|
|
for (fd.params) |param| {
|
|
const param_type = Type.fromTypeExpr(param.type_expr);
|
|
try self.addSymbol(param.name, .param, param_type, param.name_span);
|
|
}
|
|
try self.analyzeNode(fd.body);
|
|
self.popScope();
|
|
},
|
|
.block => |blk| {
|
|
try self.pushScope();
|
|
for (blk.stmts) |stmt| {
|
|
try self.analyzeNode(stmt);
|
|
}
|
|
self.popScope();
|
|
},
|
|
.const_decl => |cd| {
|
|
// Analyze value first (so it can't reference itself)
|
|
try self.analyzeNode(cd.value);
|
|
const ty = resolveTypeAnnotation(cd.type_annotation) orelse inferValueType(cd.value);
|
|
const kind = classifyConstDecl(cd);
|
|
try self.addSymbol(cd.name, kind, ty, node.span);
|
|
},
|
|
.var_decl => |vd| {
|
|
if (vd.value) |val| {
|
|
try self.analyzeNode(val);
|
|
}
|
|
const ty = resolveTypeAnnotation(vd.type_annotation) orelse
|
|
if (vd.value) |val| self.inferExprType(val) else null;
|
|
try self.addSymbol(vd.name, .variable, ty, node.span);
|
|
},
|
|
.enum_decl => |ed| {
|
|
try self.addSymbol(ed.name, .enum_type, .{ .enum_type = ed.name }, node.span);
|
|
},
|
|
.struct_decl => |sd| {
|
|
try self.addSymbol(sd.name, .struct_type, .{ .struct_type = sd.name }, node.span);
|
|
},
|
|
.identifier => |id| {
|
|
try self.resolveIdentifier(id.name, node.span);
|
|
},
|
|
.binary_op => |bop| {
|
|
try self.analyzeNode(bop.lhs);
|
|
try self.analyzeNode(bop.rhs);
|
|
},
|
|
.chained_comparison => |cc| {
|
|
for (cc.operands) |operand| {
|
|
try self.analyzeNode(operand);
|
|
}
|
|
},
|
|
.unary_op => |uop| {
|
|
try self.analyzeNode(uop.operand);
|
|
},
|
|
.call => |call| {
|
|
try self.analyzeNode(call.callee);
|
|
for (call.args) |arg| {
|
|
try self.analyzeNode(arg);
|
|
}
|
|
},
|
|
.field_access => |fa| {
|
|
try self.analyzeNode(fa.object);
|
|
},
|
|
.if_expr => |ie| {
|
|
try self.analyzeNode(ie.condition);
|
|
try self.analyzeNode(ie.then_branch);
|
|
if (ie.else_branch) |eb| {
|
|
try self.analyzeNode(eb);
|
|
}
|
|
},
|
|
.match_expr => |me| {
|
|
try self.analyzeNode(me.subject);
|
|
for (me.arms) |arm| {
|
|
try self.analyzeNode(arm.body);
|
|
}
|
|
},
|
|
.while_expr => |we| {
|
|
try self.analyzeNode(we.condition);
|
|
try self.analyzeNode(we.body);
|
|
},
|
|
.for_expr => |fe| {
|
|
try self.analyzeNode(fe.iterable);
|
|
try self.analyzeNode(fe.body);
|
|
},
|
|
.spread_expr => |se| try self.analyzeNode(se.operand),
|
|
.break_expr, .continue_expr => {},
|
|
.assignment => |asgn| {
|
|
try self.analyzeNode(asgn.target);
|
|
try self.analyzeNode(asgn.value);
|
|
},
|
|
.return_stmt => |ret| {
|
|
if (ret.value) |val| {
|
|
try self.analyzeNode(val);
|
|
}
|
|
},
|
|
.defer_stmt => |ds| {
|
|
try self.analyzeNode(ds.expr);
|
|
},
|
|
.comptime_expr => |ct| {
|
|
try self.analyzeNode(ct.expr);
|
|
},
|
|
.insert_expr => |ins| {
|
|
try self.analyzeNode(ins.expr);
|
|
},
|
|
.lambda => |lam| {
|
|
try self.pushScope();
|
|
for (lam.params) |param| {
|
|
const param_type = Type.fromTypeExpr(param.type_expr);
|
|
try self.addSymbol(param.name, .param, param_type, param.name_span);
|
|
}
|
|
try self.analyzeNode(lam.body);
|
|
self.popScope();
|
|
},
|
|
.struct_literal => |sl| {
|
|
if (sl.type_expr) |te| try self.analyzeNode(te);
|
|
for (sl.field_inits) |fi| {
|
|
try self.analyzeNode(fi.value);
|
|
}
|
|
},
|
|
.union_decl => |ud| {
|
|
try self.addSymbol(ud.name, .enum_type, .{ .union_type = ud.name }, node.span);
|
|
},
|
|
.union_literal => |ul| {
|
|
if (ul.payload) |p| {
|
|
try self.analyzeNode(p);
|
|
}
|
|
},
|
|
// Leaf nodes — nothing to recurse into
|
|
.int_literal,
|
|
.float_literal,
|
|
.bool_literal,
|
|
.string_literal,
|
|
.enum_literal,
|
|
.type_expr,
|
|
.param,
|
|
.match_arm,
|
|
.undef_literal,
|
|
.builtin_expr,
|
|
.import_decl,
|
|
.array_type_expr,
|
|
.slice_type_expr,
|
|
.pointer_type_expr,
|
|
.many_pointer_type_expr,
|
|
.null_literal,
|
|
.array_literal,
|
|
.parameterized_type_expr,
|
|
.index_expr,
|
|
.slice_expr,
|
|
=> {},
|
|
.deref_expr => |de| {
|
|
try self.analyzeNode(de.operand);
|
|
},
|
|
.namespace_decl => |ns| {
|
|
for (ns.decls) |d| {
|
|
try self.analyzeNode(d);
|
|
}
|
|
},
|
|
.root => {
|
|
// Should not appear nested
|
|
},
|
|
}
|
|
|
|
// Populate TypeMap for expression nodes
|
|
switch (node.data) {
|
|
.int_literal,
|
|
.float_literal,
|
|
.bool_literal,
|
|
.string_literal,
|
|
.identifier,
|
|
.binary_op,
|
|
.chained_comparison,
|
|
.unary_op,
|
|
.call,
|
|
.field_access,
|
|
.if_expr,
|
|
.match_expr,
|
|
.block,
|
|
.comptime_expr,
|
|
.enum_literal,
|
|
.struct_literal,
|
|
.union_literal,
|
|
.array_literal,
|
|
.index_expr,
|
|
.slice_expr,
|
|
.deref_expr,
|
|
.null_literal,
|
|
.type_expr,
|
|
.insert_expr,
|
|
.while_expr,
|
|
.for_expr,
|
|
.spread_expr,
|
|
.break_expr,
|
|
.continue_expr,
|
|
=> {
|
|
const ty = self.inferExprType(node);
|
|
self.type_map.put(node, ty) catch {};
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
fn resolveReturnType(fd: ast.FnDecl) ?Type {
|
|
if (fd.return_type) |rt| {
|
|
return Type.fromTypeExpr(rt);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
fn resolveTypeAnnotation(type_node: ?*Node) ?Type {
|
|
if (type_node) |tn| {
|
|
return Type.fromTypeExpr(tn);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
fn inferValueType(value: *Node) ?Type {
|
|
return switch (value.data) {
|
|
.int_literal => Type.s(64),
|
|
.float_literal => .f64,
|
|
.bool_literal => .boolean,
|
|
.string_literal => .string_type,
|
|
.type_expr => null, // type alias — no value type
|
|
.lambda => null,
|
|
.comptime_expr => null,
|
|
.insert_expr => null,
|
|
else => null,
|
|
};
|
|
}
|
|
|
|
fn classifyConstDecl(cd: ast.ConstDecl) SymbolKind {
|
|
return switch (cd.value.data) {
|
|
.type_expr => .type_alias,
|
|
.lambda => .function,
|
|
else => .constant,
|
|
};
|
|
}
|
|
};
|
|
|
|
/// Convenience: parse and analyze in one call.
|
|
pub fn analyzeSource(allocator: std.mem.Allocator, root: *Node) !SemaResult {
|
|
var analyzer = Analyzer.init(allocator);
|
|
return analyzer.analyze(root);
|
|
}
|
|
|
|
/// Find the symbol whose definition span contains the given byte offset.
|
|
pub fn findSymbolAtOffset(symbols: []const Symbol, offset: u32) ?usize {
|
|
for (symbols, 0..) |sym, i| {
|
|
if (offset >= sym.def_span.start and offset < sym.def_span.end) {
|
|
return i;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/// Find the reference at the given byte offset.
|
|
pub fn findReferenceAtOffset(references: []const Reference, offset: u32) ?usize {
|
|
for (references, 0..) |ref_, i| {
|
|
if (offset >= ref_.span.start and offset < ref_.span.end) {
|
|
return i;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/// Walk the AST to find the innermost node whose span contains the offset.
|
|
pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node {
|
|
if (offset < node.span.start or offset >= node.span.end) return null;
|
|
|
|
// Try to find a more specific child node
|
|
switch (node.data) {
|
|
.root => |r| {
|
|
for (r.decls) |decl| {
|
|
if (findNodeAtOffset(decl, offset)) |found| return found;
|
|
}
|
|
},
|
|
.fn_decl => |fd| {
|
|
if (fd.return_type) |rt| {
|
|
if (findNodeAtOffset(rt, offset)) |found| return found;
|
|
}
|
|
if (findNodeAtOffset(fd.body, offset)) |found| return found;
|
|
},
|
|
.block => |blk| {
|
|
for (blk.stmts) |stmt| {
|
|
if (findNodeAtOffset(stmt, offset)) |found| return found;
|
|
}
|
|
},
|
|
.const_decl => |cd| {
|
|
if (cd.type_annotation) |ta| {
|
|
if (findNodeAtOffset(ta, offset)) |found| return found;
|
|
}
|
|
if (findNodeAtOffset(cd.value, offset)) |found| return found;
|
|
},
|
|
.var_decl => |vd| {
|
|
if (vd.type_annotation) |ta| {
|
|
if (findNodeAtOffset(ta, offset)) |found| return found;
|
|
}
|
|
if (vd.value) |val| {
|
|
if (findNodeAtOffset(val, offset)) |found| return found;
|
|
}
|
|
},
|
|
.binary_op => |bop| {
|
|
if (findNodeAtOffset(bop.lhs, offset)) |found| return found;
|
|
if (findNodeAtOffset(bop.rhs, offset)) |found| return found;
|
|
},
|
|
.chained_comparison => |cc| {
|
|
for (cc.operands) |operand| {
|
|
if (findNodeAtOffset(operand, offset)) |found| return found;
|
|
}
|
|
},
|
|
.unary_op => |uop| {
|
|
if (findNodeAtOffset(uop.operand, offset)) |found| return found;
|
|
},
|
|
.call => |call| {
|
|
if (findNodeAtOffset(call.callee, offset)) |found| return found;
|
|
for (call.args) |arg| {
|
|
if (findNodeAtOffset(arg, offset)) |found| return found;
|
|
}
|
|
},
|
|
.field_access => |fa| {
|
|
if (findNodeAtOffset(fa.object, offset)) |found| return found;
|
|
},
|
|
.if_expr => |ie| {
|
|
if (findNodeAtOffset(ie.condition, offset)) |found| return found;
|
|
if (findNodeAtOffset(ie.then_branch, offset)) |found| return found;
|
|
if (ie.else_branch) |eb| {
|
|
if (findNodeAtOffset(eb, offset)) |found| return found;
|
|
}
|
|
},
|
|
.match_expr => |me| {
|
|
if (findNodeAtOffset(me.subject, offset)) |found| return found;
|
|
for (me.arms) |arm| {
|
|
if (findNodeAtOffset(arm.body, offset)) |found| return found;
|
|
if (arm.pattern) |pat| {
|
|
if (findNodeAtOffset(pat, offset)) |found| return found;
|
|
}
|
|
}
|
|
},
|
|
.while_expr => |we| {
|
|
if (findNodeAtOffset(we.condition, offset)) |found| return found;
|
|
if (findNodeAtOffset(we.body, offset)) |found| return found;
|
|
},
|
|
.for_expr => |fe| {
|
|
if (findNodeAtOffset(fe.iterable, offset)) |found| return found;
|
|
if (findNodeAtOffset(fe.body, offset)) |found| return found;
|
|
},
|
|
.spread_expr => |se| {
|
|
if (findNodeAtOffset(se.operand, offset)) |found| return found;
|
|
},
|
|
.break_expr, .continue_expr => {},
|
|
.assignment => |asgn| {
|
|
if (findNodeAtOffset(asgn.target, offset)) |found| return found;
|
|
if (findNodeAtOffset(asgn.value, offset)) |found| return found;
|
|
},
|
|
.return_stmt => |ret| {
|
|
if (ret.value) |val| {
|
|
if (findNodeAtOffset(val, offset)) |found| return found;
|
|
}
|
|
},
|
|
.defer_stmt => |ds| {
|
|
if (findNodeAtOffset(ds.expr, offset)) |found| return found;
|
|
},
|
|
.comptime_expr => |ct| {
|
|
if (findNodeAtOffset(ct.expr, offset)) |found| return found;
|
|
},
|
|
.insert_expr => |ins| {
|
|
if (findNodeAtOffset(ins.expr, offset)) |found| return found;
|
|
},
|
|
.lambda => |lam| {
|
|
if (findNodeAtOffset(lam.body, offset)) |found| return found;
|
|
},
|
|
.struct_literal => |sl| {
|
|
for (sl.field_inits) |fi| {
|
|
if (findNodeAtOffset(fi.value, offset)) |found| return found;
|
|
}
|
|
},
|
|
.union_literal => |ul| {
|
|
if (ul.payload) |p| {
|
|
if (findNodeAtOffset(p, offset)) |found| return found;
|
|
}
|
|
},
|
|
// Leaf nodes
|
|
.identifier,
|
|
.int_literal,
|
|
.float_literal,
|
|
.bool_literal,
|
|
.string_literal,
|
|
.enum_literal,
|
|
.type_expr,
|
|
.param,
|
|
.match_arm,
|
|
.undef_literal,
|
|
.builtin_expr,
|
|
.enum_decl,
|
|
.struct_decl,
|
|
.union_decl,
|
|
.import_decl,
|
|
.array_type_expr,
|
|
.slice_type_expr,
|
|
.pointer_type_expr,
|
|
.many_pointer_type_expr,
|
|
.null_literal,
|
|
.array_literal,
|
|
.parameterized_type_expr,
|
|
.index_expr,
|
|
.slice_expr,
|
|
=> {},
|
|
.deref_expr => |de| {
|
|
if (findNodeAtOffset(de.operand, offset)) |found| return found;
|
|
},
|
|
.namespace_decl => |ns| {
|
|
for (ns.decls) |d| {
|
|
if (findNodeAtOffset(d, offset)) |found| return found;
|
|
}
|
|
},
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
test "sema: collect top-level declarations" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
const source = "main :: () { 42; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Should have one symbol: main (function)
|
|
try std.testing.expectEqual(@as(usize, 1), result.symbols.len);
|
|
try std.testing.expectEqualStrings("main", result.symbols[0].name);
|
|
try std.testing.expectEqual(SymbolKind.function, result.symbols[0].kind);
|
|
}
|
|
|
|
test "sema: function params as symbols" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
const source = "add :: (a: s32, b: s32) -> s32 { a + b; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Symbols: add (function), a (param), b (param)
|
|
try std.testing.expectEqual(@as(usize, 3), result.symbols.len);
|
|
try std.testing.expectEqualStrings("add", result.symbols[0].name);
|
|
try std.testing.expectEqual(SymbolKind.function, result.symbols[0].kind);
|
|
try std.testing.expectEqualStrings("a", result.symbols[1].name);
|
|
try std.testing.expectEqual(SymbolKind.param, result.symbols[1].kind);
|
|
try std.testing.expectEqualStrings("b", result.symbols[2].name);
|
|
try std.testing.expectEqual(SymbolKind.param, result.symbols[2].kind);
|
|
|
|
// References: a and b used in body should be resolved
|
|
try std.testing.expect(result.references.len >= 2);
|
|
}
|
|
|
|
test "sema: variable declaration and reference" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
const source = "main :: () { x := 42; x; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Symbols: main (function), x (variable)
|
|
try std.testing.expectEqual(@as(usize, 2), result.symbols.len);
|
|
try std.testing.expectEqualStrings("main", result.symbols[0].name);
|
|
try std.testing.expectEqualStrings("x", result.symbols[1].name);
|
|
try std.testing.expectEqual(SymbolKind.variable, result.symbols[1].kind);
|
|
|
|
// x should have a reference
|
|
try std.testing.expect(result.references.len >= 1);
|
|
// The reference should point to symbol index 1 (x)
|
|
try std.testing.expectEqual(@as(u32, 1), result.references[0].symbol_index);
|
|
}
|
|
|
|
test "sema: undefined variable diagnostic" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
const source = "main :: () { y; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Should have a diagnostic for undefined 'y'
|
|
try std.testing.expect(result.diagnostics.len >= 1);
|
|
try std.testing.expectEqualStrings("undefined variable", result.diagnostics[0].message);
|
|
}
|
|
|
|
test "sema: enum and struct declarations" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
const source = "Color :: enum { red; green; blue; } Vec2 :: struct { x, y: f32; } main :: () { 0; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Symbols: Color (enum), Vec2 (struct), main (function)
|
|
try std.testing.expectEqual(@as(usize, 3), result.symbols.len);
|
|
try std.testing.expectEqualStrings("Color", result.symbols[0].name);
|
|
try std.testing.expectEqual(SymbolKind.enum_type, result.symbols[0].kind);
|
|
try std.testing.expectEqualStrings("Vec2", result.symbols[1].name);
|
|
try std.testing.expectEqual(SymbolKind.struct_type, result.symbols[1].kind);
|
|
try std.testing.expectEqualStrings("main", result.symbols[2].name);
|
|
}
|
|
|
|
test "sema: var_decl infers struct type from parameterized struct literal" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
const source = "List :: struct { len: s64; } main :: () { list := List.{}; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Find the 'list' variable symbol
|
|
var found_list = false;
|
|
for (result.symbols) |sym| {
|
|
if (std.mem.eql(u8, sym.name, "list")) {
|
|
found_list = true;
|
|
try std.testing.expectEqual(SymbolKind.variable, sym.kind);
|
|
// Must have inferred struct type
|
|
const ty = sym.ty orelse return error.TestUnexpectedResult;
|
|
try std.testing.expect(ty == .struct_type);
|
|
try std.testing.expectEqualStrings("List", ty.struct_type);
|
|
break;
|
|
}
|
|
}
|
|
try std.testing.expect(found_list);
|
|
}
|
|
|
|
test "sema: var_decl infers struct type from parameterized call literal" {
|
|
const parser_mod = @import("parser.zig");
|
|
|
|
// List(s32).{} — parser produces struct_literal with type_expr = call node
|
|
const source = "List :: struct { len: s64; } main :: () { list := List(s32).{}; }";
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
const alloc = arena.allocator();
|
|
|
|
var parser = parser_mod.Parser.init(alloc, source);
|
|
const root = try parser.parse();
|
|
|
|
var analyzer = Analyzer.init(alloc);
|
|
const result = try analyzer.analyze(root);
|
|
|
|
// Find the 'list' variable symbol
|
|
var found_list = false;
|
|
for (result.symbols) |sym| {
|
|
if (std.mem.eql(u8, sym.name, "list")) {
|
|
found_list = true;
|
|
try std.testing.expectEqual(SymbolKind.variable, sym.kind);
|
|
const ty = sym.ty orelse return error.TestUnexpectedResult;
|
|
try std.testing.expect(ty == .struct_type);
|
|
try std.testing.expectEqualStrings("List", ty.struct_type);
|
|
break;
|
|
}
|
|
}
|
|
try std.testing.expect(found_list);
|
|
}
|