From a3be9cce7c73a4c567df70933230c45fd5376424 Mon Sep 17 00:00:00 2001 From: agra Date: Sun, 15 Feb 2026 17:26:15 +0200 Subject: [PATCH] ... --- src/codegen.zig | 211 ++++++++++++++++++++++++++++++------------------ src/parser.zig | 113 +++++++++++--------------- src/sema.zig | 97 ++++++++++++++-------- src/types.zig | 195 +++++++++++++++++++++++++------------------- 4 files changed, 356 insertions(+), 260 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index faa3754..93fe37d 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -117,6 +117,8 @@ pub const CodeGen = struct { tagged_enum_types: std.StringHashMap(TaggedEnumInfo), // Union registry: maps name to field info + LLVM type (untagged, C-style) union_types: std.StringHashMap(UnionInfo), + // Unified type registry: single lookup for all named types + type_registry: std.StringHashMap(TypeRegistryEntry), // Flags enum registry: tracks which enum names are flags flags_enum_types: std.StringHashMap(void), // Enum variant values: maps enum name → resolved i64 values per variant @@ -271,6 +273,14 @@ pub const CodeGen = struct { promoted_fields: std.StringHashMap(PromotedField), }; + const TypeRegistryEntry = union(enum) { + struct_info: StructInfo, + tagged_enum: TaggedEnumInfo, + union_info: UnionInfo, + plain_enum: []const []const u8, + alias: []const u8, + }; + // Scope stack entry: records what a name mapped to before being shadowed const ScopeEntry = struct { name: []const u8, @@ -326,6 +336,7 @@ pub const CodeGen = struct { .struct_types = std.StringHashMap(StructInfo).init(allocator), .tagged_enum_types = std.StringHashMap(TaggedEnumInfo).init(allocator), .union_types = std.StringHashMap(UnionInfo).init(allocator), + .type_registry = std.StringHashMap(TypeRegistryEntry).init(allocator), .flags_enum_types = std.StringHashMap(void).init(allocator), .enum_variant_values = std.StringHashMap([]const i64).init(allocator), .enum_backing_types = std.StringHashMap(c.LLVMTypeRef).init(allocator), @@ -361,6 +372,7 @@ pub const CodeGen = struct { self.struct_types.deinit(); self.tagged_enum_types.deinit(); self.union_types.deinit(); + self.type_registry.deinit(); self.comptime_globals.deinit(); self.enum_backing_types.deinit(); self.generic_templates.deinit(); @@ -422,9 +434,21 @@ pub const CodeGen = struct { return c.LLVMBuildAlloca(tmp_builder, ty, name); } + /// Convert a Zig slice to a null-terminated C string using a caller-provided stack buffer. + /// Returns the stack-based result when it fits, or falls back to allocator.dupeZ. + fn nameToCStr(self: *CodeGen, name: []const u8, buf: *[256]u8) [*:0]const u8 { + if (name.len < 256) { + @memcpy(buf[0..name.len], name); + buf[name.len] = 0; + return @ptrCast(buf[0..name.len :0]); + } + const duped = self.allocator.dupeZ(u8, name) catch unreachable; + return duped.ptr; + } + fn buildNamedAlloca(self: *CodeGen, ty: c.LLVMTypeRef, name: []const u8) !c.LLVMValueRef { - const name_z = try self.allocator.dupeZ(u8, name); - return self.buildEntryBlockAlloca(ty, name_z.ptr); + var buf: [256]u8 = undefined; + return self.buildEntryBlockAlloca(ty, self.nameToCStr(name, &buf)); } pub fn typeToLLVM(self: *CodeGen, ty: Type) c.LLVMTypeRef { @@ -860,8 +884,12 @@ pub const CodeGen = struct { } fn pushScope(self: *CodeGen) !void { - try self.scope_saves.append(self.allocator, std.ArrayList(ScopeEntry).empty); - try self.defer_stack.append(self.allocator, std.ArrayList(*Node).empty); + var saves = std.ArrayList(ScopeEntry).empty; + try saves.ensureTotalCapacity(self.allocator, 8); + try self.scope_saves.append(self.allocator, saves); + var defers = std.ArrayList(*Node).empty; + try defers.ensureTotalCapacity(self.allocator, 4); + try self.defer_stack.append(self.allocator, defers); } fn popScope(self: *CodeGen) !void { @@ -955,6 +983,7 @@ pub const CodeGen = struct { } else { // Payload-less enum try self.enum_types.put(ed.name, ed.variant_names); + try self.type_registry.put(ed.name, .{ .plain_enum = ed.variant_names }); _ = try self.getAnyTypeId(ed.name, .{ .enum_type = ed.name }); if (ed.is_flags) { @@ -997,6 +1026,7 @@ pub const CodeGen = struct { try self.registerLambdaAsFunction(cd.name, cd.value.data.lambda); } else if (cd.value.data == .type_expr) { try self.type_aliases.put(cd.name, cd.value.data.type_expr.name); + try self.type_registry.put(cd.name, .{ .alias = cd.value.data.type_expr.name }); } else if (cd.value.data == .call) { // Check if this is a generic struct or type function instantiation const callee_name = if (cd.value.data.call.callee.data == .identifier) @@ -1009,20 +1039,24 @@ pub const CodeGen = struct { const result_ty = try self.instantiateGenericStruct(cn, tmpl, cd.value.data.call.args); if (result_ty.isStruct()) { try self.type_aliases.put(cd.name, result_ty.struct_type); + try self.type_registry.put(cd.name, .{ .alias = result_ty.struct_type }); } } else if (self.generic_templates.get(cn)) |tmpl| { // Type-returning function: Foo :: Complex(u32); const result_ty = try self.instantiateTypeFunction(cd.name, cn, tmpl, cd.value.data.call.args); if (result_ty.isStruct()) { try self.type_aliases.put(cd.name, result_ty.struct_type); + try self.type_registry.put(cd.name, .{ .alias = result_ty.struct_type }); } else if (result_ty.isUnion()) { try self.type_aliases.put(cd.name, result_ty.union_type); + try self.type_registry.put(cd.name, .{ .alias = result_ty.union_type }); } } else if (self.builtin_functions.contains(cn)) { // Builtin type function (e.g., Vector(4, f32), Array(5, s32)) if (self.resolveBuiltinType(cn, cd.value.data.call.args)) |result_ty| { const display = try result_ty.displayName(self.allocator); try self.type_aliases.put(cd.name, display); + try self.type_registry.put(cd.name, .{ .alias = display }); } else { try self.registerTopLevelConstant(cd); } @@ -1054,26 +1088,6 @@ pub const CodeGen = struct { } } - // Pre-register all known types for Any type ID assignment - { - var it = self.struct_types.iterator(); - while (it.next()) |entry| { - _ = try self.getAnyTypeId(entry.key_ptr.*, .{ .struct_type = entry.key_ptr.* }); - } - } - { - var it = self.enum_types.iterator(); - while (it.next()) |entry| { - _ = try self.getAnyTypeId(entry.key_ptr.*, .{ .enum_type = entry.key_ptr.* }); - } - } - { - var it = self.tagged_enum_types.iterator(); - while (it.next()) |entry| { - _ = try self.getAnyTypeId(entry.key_ptr.*, .{ .union_type = entry.key_ptr.* }); - } - } - // Pass 2: Generate all function bodies // Functions with Any parameters (like any_to_string) are deferred to Pass 3 // so that all types are registered before their type-match expressions are compiled. @@ -1311,30 +1325,42 @@ pub const CodeGen = struct { if (self.type_param_bindings) |bindings| { if (bindings.get(name)) |t| return t; } - // Check type aliases - if (self.type_aliases.get(name)) |target| { - if (Type.fromName(target)) |t| return t; - if (self.struct_types.contains(target)) return .{ .struct_type = target }; - if (self.tagged_enum_types.contains(target)) return .{ .union_type = target }; - if (self.union_types.contains(target)) return .{ .union_type = target }; + // Unified type registry lookup + if (self.type_registry.get(name)) |entry| { + switch (entry) { + .struct_info => return .{ .struct_type = name }, + .tagged_enum => return .{ .union_type = name }, + .union_info => return .{ .union_type = name }, + .plain_enum => return .{ .enum_type = name }, + .alias => |target| { + if (Type.fromName(target)) |t| return t; + if (self.type_registry.get(target)) |inner| { + switch (inner) { + .struct_info => return .{ .struct_type = target }, + .tagged_enum => return .{ .union_type = target }, + .union_info => return .{ .union_type = target }, + .plain_enum => return .{ .enum_type = target }, + .alias => {}, + } + } + }, + } } - // Check enum types - if (self.enum_types.contains(name)) return .{ .enum_type = name }; - // Check struct types - if (self.struct_types.contains(name)) return .{ .struct_type = name }; - // Check union types (tagged enums and C-style unions) - if (self.tagged_enum_types.contains(name)) return .{ .union_type = name }; - if (self.union_types.contains(name)) return .{ .union_type = name }; } // Safety net: inline declarations that should have been hoisted if (tn.data == .struct_decl) { const sn = tn.data.struct_decl.name; - if (self.struct_types.contains(sn)) return .{ .struct_type = sn }; + if (self.type_registry.get(sn)) |e| { + if (e == .struct_info) return .{ .struct_type = sn }; + } } if (tn.data == .enum_decl) { const en = tn.data.enum_decl.name; - if (self.tagged_enum_types.contains(en)) return .{ .union_type = en }; - if (self.enum_types.contains(en)) return .{ .enum_type = en }; + if (self.type_registry.get(en)) |e| switch (e) { + .tagged_enum => return .{ .union_type = en }, + .plain_enum => return .{ .enum_type = en }, + else => {}, + }; } return .void_type; } @@ -1432,7 +1458,7 @@ pub const CodeGen = struct { } } - try self.struct_types.put(mangled_name, .{ + const si = StructInfo{ .field_names = sd.field_names, .field_types = build.field_sx_types, .field_defaults = resolved_defaults, @@ -1441,7 +1467,9 @@ pub const CodeGen = struct { .type_param_names = try tp_names.toOwnedSlice(self.allocator), .type_param_types = try tp_types.toOwnedSlice(self.allocator), .template_name = template_name, - }); + }; + try self.struct_types.put(mangled_name, si); + try self.type_registry.put(mangled_name, .{ .struct_info = si }); _ = try self.getAnyTypeId(mangled_name, .{ .struct_type = mangled_name }); return .{ .struct_type = mangled_name }; @@ -1492,13 +1520,15 @@ pub const CodeGen = struct { const resolved_defaults = try self.allocator.dupe(?*Node, struct_decl.field_defaults); const display_name = try self.allocator.dupe(u8, alias_name); - try self.struct_types.put(mangled_name, .{ + const si2 = StructInfo{ .field_names = struct_decl.field_names, .field_types = build.field_sx_types, .field_defaults = resolved_defaults, .llvm_type = build.llvm_type, .display_name = display_name, - }); + }; + try self.struct_types.put(mangled_name, si2); + try self.type_registry.put(mangled_name, .{ .struct_info = si2 }); _ = try self.getAnyTypeId(mangled_name, .{ .struct_type = mangled_name }); return .{ .struct_type = mangled_name }; @@ -1507,13 +1537,15 @@ pub const CodeGen = struct { fn registerInstantiatedTaggedEnum(self: *CodeGen, mangled_name: []const u8, union_decl: ast.EnumDecl) !Type { const build = try self.buildUnionFields(mangled_name, union_decl.variant_types); - try self.tagged_enum_types.put(mangled_name, .{ + const tei = TaggedEnumInfo{ .variant_names = union_decl.variant_names, .variant_types = build.variant_sx_types, .llvm_type = build.llvm_type, .max_payload_size = build.max_payload_size, .payload_field_index = build.payload_field_index, - }); + }; + try self.tagged_enum_types.put(mangled_name, tei); + try self.type_registry.put(mangled_name, .{ .tagged_enum = tei }); _ = try self.getAnyTypeId(mangled_name, .{ .union_type = mangled_name }); return .{ .union_type = mangled_name }; @@ -1868,9 +1900,12 @@ pub const CodeGen = struct { try self.registerTaggedEnum(ed); const qualified_u = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, ed.name }); try self.type_aliases.put(qualified_u, ed.name); + try self.type_registry.put(qualified_u, .{ .alias = ed.name }); } else { const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, ed.name }); try self.enum_types.put(qualified, ed.variant_names); + try self.type_registry.put(qualified, .{ .plain_enum = ed.variant_names }); + _ = try self.getAnyTypeId(qualified, .{ .enum_type = qualified }); if (ed.backing_type) |bt_node| { const bt = self.resolveType(bt_node); try self.enum_backing_types.put(qualified, self.typeToLLVM(bt)); @@ -1882,11 +1917,13 @@ pub const CodeGen = struct { // Register qualified alias so rl.Color resolves to Color const qualified_s = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, sd.name }); try self.type_aliases.put(qualified_s, sd.name); + try self.type_registry.put(qualified_s, .{ .alias = sd.name }); }, .union_decl => |ud| { try self.registerUnionType(ud); const qualified_u = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, ud.name }); try self.type_aliases.put(qualified_u, ud.name); + try self.type_registry.put(qualified_u, .{ .alias = ud.name }); }, .const_decl => |cd| { if (cd.value.data == .builtin_expr) { @@ -1897,6 +1934,7 @@ pub const CodeGen = struct { } else if (cd.value.data == .type_expr) { const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns.name, cd.name }); try self.type_aliases.put(qualified, cd.value.data.type_expr.name); + try self.type_registry.put(qualified, .{ .alias = cd.value.data.type_expr.name }); } }, .library_decl => |ld| { @@ -1948,8 +1986,8 @@ pub const CodeGen = struct { // For function calls, look up the registered function's return type if (expr.data == .call) { if (self.resolveCalleeName(expr.data.call)) |callee_name| { - const callee_name_z = self.allocator.dupeZ(u8, callee_name) catch return Type.s(64); - const callee_fn = c.LLVMGetNamedFunction(self.module, callee_name_z.ptr) orelse return Type.s(64); + var cnbuf: [256]u8 = undefined; + const callee_fn = c.LLVMGetNamedFunction(self.module, self.nameToCStr(callee_name, &cnbuf)) orelse return Type.s(64); const fn_type = c.LLVMGlobalGetValueType(callee_fn); const ret_llvm = c.LLVMGetReturnType(fn_type); return self.llvmTypeToSxType(ret_llvm); @@ -2947,14 +2985,14 @@ pub const CodeGen = struct { } // Fall back to function name → function pointer value { - const name_z = try self.allocator.dupeZ(u8, ident.name); - var fn_val = c.LLVMGetNamedFunction(self.module, name_z.ptr); + var nbuf: [256]u8 = undefined; + var fn_val = c.LLVMGetNamedFunction(self.module, self.nameToCStr(ident.name, &nbuf)); if (fn_val == null) { // Try qualified name with current namespace if (self.current_namespace) |ns| { const qualified = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, ident.name }); - const q_z = try self.allocator.dupeZ(u8, qualified); - fn_val = c.LLVMGetNamedFunction(self.module, q_z.ptr); + var qbuf: [256]u8 = undefined; + fn_val = c.LLVMGetNamedFunction(self.module, self.nameToCStr(qualified, &qbuf)); } } if (fn_val != null) return fn_val.?; @@ -3301,6 +3339,7 @@ pub const CodeGen = struct { try self.registerTaggedEnum(hoisted); } else { try self.enum_types.put(synthetic_name, inline_ed.variant_names); + try self.type_registry.put(synthetic_name, .{ .plain_enum = inline_ed.variant_names }); _ = try self.getAnyTypeId(synthetic_name, .{ .enum_type = synthetic_name }); if (inline_ed.backing_type) |bt_node| { const bt = self.resolveType(bt_node); @@ -3342,12 +3381,14 @@ pub const CodeGen = struct { } } - try self.struct_types.put(sd.name, .{ + const sinfo = StructInfo{ .field_names = sd.field_names, .field_types = build.field_sx_types, .field_defaults = resolved_defaults, .llvm_type = build.llvm_type, - }); + }; + try self.struct_types.put(sd.name, sinfo); + try self.type_registry.put(sd.name, .{ .struct_info = sinfo }); _ = try self.getAnyTypeId(sd.name, .{ .struct_type = sd.name }); } @@ -3376,13 +3417,15 @@ pub const CodeGen = struct { } } - try self.tagged_enum_types.put(ud.name, .{ + const tei_layout = TaggedEnumInfo{ .variant_names = ud.variant_names, .variant_types = try variant_sx_types.toOwnedSlice(self.allocator), .llvm_type = layout.llvm_type, .max_payload_size = layout.payload_size, .payload_field_index = layout.payload_field_index, - }); + }; + try self.tagged_enum_types.put(ud.name, tei_layout); + try self.type_registry.put(ud.name, .{ .tagged_enum = tei_layout }); } else { // Primitive backing type (e.g. enum u32 { ... }) if (ud.backing_type) |bt_node| { @@ -3392,13 +3435,15 @@ pub const CodeGen = struct { const build = try self.buildUnionFields(ud.name, ud.variant_types); - try self.tagged_enum_types.put(ud.name, .{ + const tei_build = TaggedEnumInfo{ .variant_names = ud.variant_names, .variant_types = build.variant_sx_types, .llvm_type = build.llvm_type, .max_payload_size = build.max_payload_size, .payload_field_index = build.payload_field_index, - }); + }; + try self.tagged_enum_types.put(ud.name, tei_build); + try self.type_registry.put(ud.name, .{ .tagged_enum = tei_build }); } _ = try self.getAnyTypeId(ud.name, .{ .union_type = ud.name }); @@ -3575,13 +3620,15 @@ pub const CodeGen = struct { } } - try self.union_types.put(ud.name, .{ + const uinfo = UnionInfo{ .field_names = ud.field_names, .field_types = resolved_field_types, .llvm_type = llvm_type, .total_size = max_size, .promoted_fields = promoted, - }); + }; + try self.union_types.put(ud.name, uinfo); + try self.type_registry.put(ud.name, .{ .union_info = uinfo }); // Note: C-style unions are not registered with the Any type system. // They can't be meaningfully printed as a whole — access individual fields instead. } @@ -5112,22 +5159,22 @@ pub const CodeGen = struct { return self.genMemcpy(call_node.args); } - const name_z = try self.allocator.dupeZ(u8, callee_name); - var callee_fn = c.LLVMGetNamedFunction(self.module, name_z.ptr); + var nbuf: [256]u8 = undefined; + var callee_fn = c.LLVMGetNamedFunction(self.module, self.nameToCStr(callee_name, &nbuf)); // Foreign function fallback: qualified name "ns.Func" → try unqualified "Func" (the C symbol) if (callee_fn == null) { if (std.mem.lastIndexOfScalar(u8, callee_name, '.')) |dot_idx| { const base_name = callee_name[dot_idx + 1 ..]; - const base_z = try self.allocator.dupeZ(u8, base_name); - callee_fn = c.LLVMGetNamedFunction(self.module, base_z.ptr); + var bbuf: [256]u8 = undefined; + callee_fn = c.LLVMGetNamedFunction(self.module, self.nameToCStr(base_name, &bbuf)); } } // Intra-namespace fallback: try qualified name if (callee_fn == null) { if (self.current_namespace) |ns| { const qualified2 = try std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns, callee_name }); - const qualified_z = try self.allocator.dupeZ(u8, qualified2); - callee_fn = c.LLVMGetNamedFunction(self.module, qualified_z.ptr); + var qbuf: [256]u8 = undefined; + callee_fn = c.LLVMGetNamedFunction(self.module, self.nameToCStr(qualified2, &qbuf)); } } // Function pointer indirect call: callee is a variable with function_type @@ -6677,13 +6724,23 @@ pub const CodeGen = struct { fn resolveTypeFromName(self: *CodeGen, name: []const u8) ?Type { // Primitives if (Type.fromName(name)) |t| return t; - // Structs - if (self.struct_types.contains(name)) return .{ .struct_type = name }; - // Unions (tagged enums and C-style) - if (self.tagged_enum_types.contains(name)) return .{ .union_type = name }; - if (self.union_types.contains(name)) return .{ .union_type = name }; - // Enums - if (self.enum_types.contains(name)) return .{ .enum_type = name }; + // Unified type registry lookup + if (self.type_registry.get(name)) |entry| switch (entry) { + .struct_info => return .{ .struct_type = name }, + .tagged_enum => return .{ .union_type = name }, + .union_info => return .{ .union_type = name }, + .plain_enum => return .{ .enum_type = name }, + .alias => |target| { + if (Type.fromName(target)) |t| return t; + if (self.type_registry.get(target)) |inner| switch (inner) { + .struct_info => return .{ .struct_type = target }, + .tagged_enum => return .{ .union_type = target }, + .union_info => return .{ .union_type = target }, + .plain_enum => return .{ .enum_type = target }, + .alias => {}, + }; + }, + }; // Vector display name: "Vector(N,T)" if (name.len > 8 and std.mem.startsWith(u8, name, "Vector(") and name[name.len - 1] == ')') { const inner = name[7 .. name.len - 1]; // "N,T" @@ -6847,14 +6904,14 @@ pub const CodeGen = struct { if (self.function_return_types.get(qualified)) |ret_ty| return ret_ty; } // Fallback: check non-generic LLVM functions - const callee_name_z = self.allocator.dupeZ(u8, callee_name) catch return Type.s(64); - var callee_fn_opt = c.LLVMGetNamedFunction(self.module, callee_name_z.ptr); + var cnbuf2: [256]u8 = undefined; + var callee_fn_opt = c.LLVMGetNamedFunction(self.module, self.nameToCStr(callee_name, &cnbuf2)); // Intra-namespace fallback if (callee_fn_opt == null) { if (self.current_namespace) |ns2| { const q = std.fmt.allocPrint(self.allocator, "{s}.{s}", .{ ns2, callee_name }) catch return Type.s(64); - const qz = self.allocator.dupeZ(u8, q) catch return Type.s(64); - callee_fn_opt = c.LLVMGetNamedFunction(self.module, qz.ptr); + var qbuf2: [256]u8 = undefined; + callee_fn_opt = c.LLVMGetNamedFunction(self.module, self.nameToCStr(q, &qbuf2)); } } if (callee_fn_opt) |callee_fn| { diff --git a/src/parser.zig b/src/parser.zig index 6074cca..9e34a8e 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -711,16 +711,11 @@ pub const Parser = struct { /// Collect generic type params and comptime value params from parameter annotations. fn collectTypeParams(self: *Parser, params: []const ast.Param) ![]const ast.StructTypeParam { var type_params = std.ArrayList(ast.StructTypeParam).empty; + var seen = std.StringHashMap(void).init(self.allocator); for (params) |param| { if (param.is_comptime) { - var found = false; - for (type_params.items) |existing| { - if (std.mem.eql(u8, existing.name, param.name)) { - found = true; - break; - } - } - if (!found) { + if (!seen.contains(param.name)) { + try seen.put(param.name, {}); try type_params.append(self.allocator, .{ .name = param.name, .constraint = param.type_expr }); } } else { @@ -728,14 +723,8 @@ pub const Parser = struct { var generic_names = std.ArrayList([]const u8).empty; collectGenericNames(param.type_expr, &generic_names, self.allocator); for (generic_names.items) |gen_name| { - var found = false; - for (type_params.items) |existing| { - if (std.mem.eql(u8, existing.name, gen_name)) { - found = true; - break; - } - } - if (!found) { + if (!seen.contains(gen_name)) { + try seen.put(gen_name, {}); const type_constraint = self.createNode(param.type_expr.span.start, .{ .type_expr = .{ .name = "Type" } }) catch continue; type_params.append(self.allocator, .{ .name = gen_name, .constraint = type_constraint }) catch {}; } @@ -1506,8 +1495,30 @@ pub const Parser = struct { return try self.createNode(start_pos, .{ .match_expr = .{ .subject = subject, .arms = try arms.toOwnedSlice(self.allocator) } }); } + /// Save state, skip past matching parens, return the tag of the next token, then restore. + /// Returns null if no matching ')' found before EOF. + fn peekPastParens(self: *Parser) ?Tag { + const saved_lexer = self.lexer; + const saved_current = self.current; + const saved_prev_end = self.prev_end; + defer { + self.lexer = saved_lexer; + self.current = saved_current; + self.prev_end = saved_prev_end; + } + self.advance(); // skip '(' + var depth: u32 = 1; + while (depth > 0 and self.current.tag != .eof) { + if (self.current.tag == .l_paren) depth += 1; + if (self.current.tag == .r_paren) depth -= 1; + if (depth > 0) self.advance(); + } + if (self.current.tag != .r_paren) return null; + self.advance(); // skip ')' + return self.current.tag; + } + fn isLambda(self: *Parser) bool { - // Peek ahead: save state, scan to matching ), check if => or -> ... => follows const saved_lexer = self.lexer; const saved_current = self.current; const saved_prev_end = self.prev_end; @@ -1517,32 +1528,23 @@ pub const Parser = struct { self.prev_end = saved_prev_end; } - self.advance(); // skip '(' - var depth: u32 = 1; - while (depth > 0 and self.current.tag != .eof) { - if (self.current.tag == .l_paren) depth += 1; - if (self.current.tag == .r_paren) depth -= 1; - if (depth > 0) self.advance(); - } - if (self.current.tag == .r_paren) { - self.advance(); // skip ')' - if (self.current.tag == .fat_arrow) return true; - // (params) -> ReturnType => expr - if (self.current.tag == .arrow) { - self.advance(); // skip '->' - // Skip past the return type tokens until we see '=>' or something unexpected - while (self.current.tag != .eof) { - if (self.current.tag == .fat_arrow) return true; - // Return type tokens: identifiers, dots, parens, type keywords, dollar, brackets - if (self.current.tag == .identifier or self.current.tag.isTypeKeyword() or - self.current.tag == .dot or self.current.tag == .dollar or - self.current.tag == .l_bracket or self.current.tag == .r_bracket or - self.current.tag == .l_paren or self.current.tag == .r_paren or - self.current.tag == .comma or self.current.tag == .int_literal) - { - self.advance(); - } else break; - } + // Use shared paren-scanning, then check for lambda patterns + const tag = self.peekPastParens() orelse return false; + if (tag == .fat_arrow) return true; + // (params) -> ReturnType => expr + if (tag == .arrow) { + self.advance(); // skip '->' + // Skip past the return type tokens until we see '=>' or something unexpected + while (self.current.tag != .eof) { + if (self.current.tag == .fat_arrow) return true; + if (self.current.tag == .identifier or self.current.tag.isTypeKeyword() or + self.current.tag == .dot or self.current.tag == .dollar or + self.current.tag == .l_bracket or self.current.tag == .r_bracket or + self.current.tag == .l_paren or self.current.tag == .r_paren or + self.current.tag == .comma or self.current.tag == .int_literal) + { + self.advance(); + } else break; } } return false; @@ -1573,29 +1575,8 @@ pub const Parser = struct { // ---- Helpers ---- fn isFunctionDef(self: *Parser) bool { - // Peek ahead: save state, scan to matching ), check what follows - const saved_lexer = self.lexer; - const saved_current = self.current; - const saved_prev_end = self.prev_end; - defer { - self.lexer = saved_lexer; - self.current = saved_current; - self.prev_end = saved_prev_end; - } - - self.advance(); // skip '(' - var depth: u32 = 1; - while (depth > 0 and self.current.tag != .eof) { - if (self.current.tag == .l_paren) depth += 1; - if (self.current.tag == .r_paren) depth -= 1; - if (depth > 0) self.advance(); - } - if (self.current.tag == .r_paren) { - self.advance(); // skip ')' - // Function if followed by '{', '->', '#builtin', '#foreign', or '=>' - return self.current.tag == .l_brace or self.current.tag == .arrow or self.current.tag == .hash_builtin or self.current.tag == .hash_foreign or self.current.tag == .fat_arrow; - } - return false; + const tag = self.peekPastParens() orelse return false; + return tag == .l_brace or tag == .arrow or tag == .hash_builtin or tag == .hash_foreign or tag == .fat_arrow; } fn isAssignOp(self: *const Parser) bool { diff --git a/src/sema.zig b/src/sema.zig index cca5f87..51235b1 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -68,6 +68,8 @@ pub const Analyzer = struct { scope_depth: u32, /// Stack of symbol counts at each scope entry, for popScope cleanup. scope_starts: std.ArrayList(u32), + /// Hash index: name → list of indices into symbols array for O(1) lookup + symbol_index: std.StringHashMap(std.ArrayList(u32)), // Type registries fn_signatures: std.StringHashMap(FnSignature), struct_types: std.StringHashMap(StructTypeInfo), @@ -83,6 +85,7 @@ pub const Analyzer = struct { .diagnostics = std.ArrayList(Diagnostic).empty, .scope_depth = 0, .scope_starts = std.ArrayList(u32).empty, + .symbol_index = std.StringHashMap(std.ArrayList(u32)).init(allocator), .fn_signatures = std.StringHashMap(FnSignature).init(allocator), .struct_types = std.StringHashMap(StructTypeInfo).init(allocator), .enum_types = std.StringHashMap([]const []const u8).init(allocator), @@ -315,13 +318,15 @@ pub const Analyzer = struct { }, .chained_comparison => .boolean, .identifier => |ident| { - // Search symbols backwards for matching name at or above current scope - var i = self.symbols.items.len; - while (i > 0) { - i -= 1; - const sym = self.symbols.items[i]; - if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, ident.name)) { - return sym.ty orelse Type.s(64); + // Use symbol index for O(1) name lookup + if (self.symbol_index.get(ident.name)) |indices| { + var j = indices.items.len; + while (j > 0) { + j -= 1; + const sym = self.symbols.items[indices.items[j]]; + if (sym.scope_depth <= self.scope_depth) { + return sym.ty orelse Type.s(64); + } } } return Type.s(64); @@ -508,19 +513,24 @@ pub const Analyzer = struct { } fn addSymbol(self: *Analyzer, name: []const u8, kind: SymbolKind, ty: ?Type, span: Span) !void { - // Check for duplicate only within the current scope window. - const scope_start: usize = if (self.scope_starts.items.len > 0) - self.scope_starts.items[self.scope_starts.items.len - 1] - else - 0; - for (self.symbols.items[scope_start..]) |sym| { - if (sym.scope_depth == self.scope_depth and std.mem.eql(u8, sym.name, name)) { - try self.diagnostics.append(self.allocator, .{ - .level = .warn, - .span = span, - .message = "duplicate declaration", - }); - break; + // Check for duplicate using the symbol index + if (self.symbol_index.get(name)) |indices| { + const scope_start: u32 = if (self.scope_starts.items.len > 0) + self.scope_starts.items[self.scope_starts.items.len - 1] + else + 0; + for (indices.items) |idx| { + if (idx >= scope_start) { + const sym = self.symbols.items[idx]; + if (sym.scope_depth == self.scope_depth) { + try self.diagnostics.append(self.allocator, .{ + .level = .warn, + .span = span, + .message = "duplicate declaration", + }); + break; + } + } } } @@ -531,26 +541,42 @@ pub const Analyzer = struct { .def_span = span, .scope_depth = self.scope_depth, }); + // Update symbol index + const idx: u32 = @intCast(self.symbols.items.len - 1); + const gop = try self.symbol_index.getOrPut(name); + if (!gop.found_existing) { + gop.value_ptr.* = std.ArrayList(u32).empty; + } + try gop.value_ptr.append(self.allocator, idx); } /// Pre-register an imported symbol so references in this file can resolve to it. pub fn preRegisterSymbol(self: *Analyzer, sym: Symbol) !void { try self.symbols.append(self.allocator, sym); + // Update symbol index + const idx: u32 = @intCast(self.symbols.items.len - 1); + const gop = try self.symbol_index.getOrPut(sym.name); + if (!gop.found_existing) { + gop.value_ptr.* = std.ArrayList(u32).empty; + } + try gop.value_ptr.append(self.allocator, idx); } fn resolveIdentifier(self: *Analyzer, name: []const u8, span: Span) !void { - // Search backwards to find the most recent declaration with this name - // that is at or above the current scope depth. - var i = self.symbols.items.len; - while (i > 0) { - i -= 1; - const sym = self.symbols.items[i]; - if (sym.scope_depth <= self.scope_depth and std.mem.eql(u8, sym.name, name)) { - try self.references.append(self.allocator, .{ - .span = span, - .symbol_index = @intCast(i), - }); - return; + // Use symbol index for O(1) name lookup, then walk backwards through indices + if (self.symbol_index.get(name)) |indices| { + var j = indices.items.len; + while (j > 0) { + j -= 1; + const idx = indices.items[j]; + const sym = self.symbols.items[idx]; + if (sym.scope_depth <= self.scope_depth) { + try self.references.append(self.allocator, .{ + .span = span, + .symbol_index = idx, + }); + return; + } } } @@ -787,9 +813,10 @@ pub const Analyzer = struct { const name = tn.data.type_expr.name; // Check type aliases first const resolved = self.type_aliases.get(name) orelse name; - for (self.symbols.items) |sym| { - if (!std.mem.eql(u8, sym.name, resolved)) continue; - if (sym.ty) |ty| return ty; + if (self.symbol_index.get(resolved)) |indices| { + for (indices.items) |idx| { + if (self.symbols.items[idx].ty) |ty| return ty; + } } } } diff --git a/src/types.zig b/src/types.zig index 1782301..c40c613 100644 --- a/src/types.zig +++ b/src/types.zig @@ -98,49 +98,65 @@ pub const Type = union(enum) { } pub fn fromName(name: []const u8) ?Type { - // Named types (check before variable-width integers since "string" starts with 's') - if (std.mem.eql(u8, name, "string")) return .string_type; - if (std.mem.eql(u8, name, "bool")) return .boolean; - if (std.mem.eql(u8, name, "f32")) return .f32; - if (std.mem.eql(u8, name, "f64")) return .f64; - if (std.mem.eql(u8, name, "Any")) return .any_type; - // Sentinel-terminated slice: [:0]T → string_type when T is u8 - if (name.len >= 5 and name[0] == '[' and name[1] == ':') { - // Find closing ']' - if (std.mem.indexOfScalar(u8, name, ']')) |close| { - const sentinel = name[2..close]; - const elem = name[close + 1 ..]; - if (std.mem.eql(u8, sentinel, "0") and std.mem.eql(u8, elem, "u8")) { - return .string_type; + if (name.len == 0) return null; + return switch (name[0]) { + 's' => { + if (std.mem.eql(u8, name, "string")) return .string_type; + if (name.len >= 2) { + const width = std.fmt.parseInt(u8, name[1..], 10) catch return null; + if (width >= 1 and width <= 64) return Type.s(width); } - } - } - // Many-pointer: [*]T - if (name.len >= 4 and name[0] == '[' and name[1] == '*' and name[2] == ']') { - return .{ .many_pointer_type = .{ .element_name = name[3..] } }; - } - // Pointer: *T - if (name.len >= 2 and name[0] == '*') { - return .{ .pointer_type = .{ .pointee_name = name[1..] } }; - } - // Vector: Vector(N,T) - if (name.len >= 10 and std.mem.startsWith(u8, name, "Vector(") and name[name.len - 1] == ')') { - const inner = name[7 .. name.len - 1]; // contents between ( and ) - if (std.mem.indexOfScalar(u8, inner, ',')) |comma| { - const length = std.fmt.parseInt(u32, inner[0..comma], 10) catch return null; - const elem_name = inner[comma + 1 ..]; - if (elem_name.len > 0) { - return .{ .vector_type = .{ .element_name = elem_name, .length = length } }; + return null; + }, + 'u' => { + if (name.len >= 2) { + const width = std.fmt.parseInt(u8, name[1..], 10) catch return null; + if (width >= 1 and width <= 64) return Type.u(width); } - } - } - // Variable-width integers: s1..s64, u1..u64 - if (name.len >= 2 and (name[0] == 's' or name[0] == 'u')) { - const width = std.fmt.parseInt(u8, name[1..], 10) catch return null; - if (width < 1 or width > 64) return null; - return if (name[0] == 's') Type.s(width) else Type.u(width); - } - return null; + return null; + }, + 'b' => if (std.mem.eql(u8, name, "bool")) .boolean else null, + 'f' => { + if (std.mem.eql(u8, name, "f32")) return .f32; + if (std.mem.eql(u8, name, "f64")) return .f64; + return null; + }, + 'A' => if (std.mem.eql(u8, name, "Any")) .any_type else null, + 'v' => if (std.mem.eql(u8, name, "void")) .void_type else null, + '[' => { + // Sentinel-terminated slice: [:0]u8 → string_type + if (name.len >= 5 and name[1] == ':') { + if (std.mem.indexOfScalar(u8, name, ']')) |close| { + const sentinel = name[2..close]; + const elem = name[close + 1 ..]; + if (std.mem.eql(u8, sentinel, "0") and std.mem.eql(u8, elem, "u8")) { + return .string_type; + } + } + } + // Many-pointer: [*]T + if (name.len >= 4 and name[1] == '*' and name[2] == ']') { + return .{ .many_pointer_type = .{ .element_name = name[3..] } }; + } + return null; + }, + '*' => if (name.len >= 2) .{ .pointer_type = .{ .pointee_name = name[1..] } } else null, + 'V' => { + // Vector(N,T) + if (name.len >= 10 and std.mem.startsWith(u8, name, "Vector(") and name[name.len - 1] == ')') { + const inner = name[7 .. name.len - 1]; + if (std.mem.indexOfScalar(u8, inner, ',')) |comma| { + const length = std.fmt.parseInt(u32, inner[0..comma], 10) catch return null; + const elem_name = inner[comma + 1 ..]; + if (elem_name.len > 0) { + return .{ .vector_type = .{ .element_name = elem_name, .length = length } }; + } + } + } + return null; + }, + else => null, + }; } pub fn fromTypeExpr(node: *Node) ?Type { @@ -363,20 +379,14 @@ pub const Type = union(enum) { pub fn displayName(self: Type, allocator: std.mem.Allocator) ![]const u8 { return switch (self) { .signed => |w| { - var buf = std.ArrayList(u8).empty; - try buf.append(allocator, 's'); - var tmp: [4]u8 = undefined; - const width_str = std.fmt.bufPrint(&tmp, "{d}", .{w}) catch unreachable; - try buf.appendSlice(allocator, width_str); - return try buf.toOwnedSlice(allocator); + var buf: [4]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "s{d}", .{w}) catch unreachable; + return try allocator.dupe(u8, result); }, .unsigned => |w| { - var buf = std.ArrayList(u8).empty; - try buf.append(allocator, 'u'); - var tmp: [4]u8 = undefined; - const width_str = std.fmt.bufPrint(&tmp, "{d}", .{w}) catch unreachable; - try buf.appendSlice(allocator, width_str); - return try buf.toOwnedSlice(allocator); + var buf: [4]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "u{d}", .{w}) catch unreachable; + return try allocator.dupe(u8, result); }, .f32 => "f32", .f64 => "f64", @@ -388,43 +398,64 @@ pub const Type = union(enum) { .struct_type => |name| name, .union_type => |name| name, .slice_type => |info| { - var buf = std.ArrayList(u8).empty; - try buf.appendSlice(allocator, "[]"); - try buf.appendSlice(allocator, info.element_name); - return try buf.toOwnedSlice(allocator); + var buf: [128]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "[]{s}", .{info.element_name}) catch { + // Fall back to dynamic allocation for very long element names + var dyn = std.ArrayList(u8).empty; + try dyn.appendSlice(allocator, "[]"); + try dyn.appendSlice(allocator, info.element_name); + return try dyn.toOwnedSlice(allocator); + }; + return try allocator.dupe(u8, result); }, .pointer_type => |info| { - var buf = std.ArrayList(u8).empty; - try buf.append(allocator, '*'); - try buf.appendSlice(allocator, info.pointee_name); - return try buf.toOwnedSlice(allocator); + var buf: [128]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "*{s}", .{info.pointee_name}) catch { + var dyn = std.ArrayList(u8).empty; + try dyn.appendSlice(allocator, "*"); + try dyn.appendSlice(allocator, info.pointee_name); + return try dyn.toOwnedSlice(allocator); + }; + return try allocator.dupe(u8, result); }, .many_pointer_type => |info| { - var buf = std.ArrayList(u8).empty; - try buf.appendSlice(allocator, "[*]"); - try buf.appendSlice(allocator, info.element_name); - return try buf.toOwnedSlice(allocator); + var buf: [128]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "[*]{s}", .{info.element_name}) catch { + var dyn = std.ArrayList(u8).empty; + try dyn.appendSlice(allocator, "[*]"); + try dyn.appendSlice(allocator, info.element_name); + return try dyn.toOwnedSlice(allocator); + }; + return try allocator.dupe(u8, result); }, .array_type => |info| { - var buf = std.ArrayList(u8).empty; - try buf.append(allocator, '['); - var tmp: [10]u8 = undefined; - const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable; - try buf.appendSlice(allocator, len_str); - try buf.append(allocator, ']'); - try buf.appendSlice(allocator, info.element_name); - return try buf.toOwnedSlice(allocator); + var buf: [128]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "[{d}]{s}", .{ info.length, info.element_name }) catch { + var dyn = std.ArrayList(u8).empty; + try dyn.appendSlice(allocator, "["); + var tmp: [10]u8 = undefined; + const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable; + try dyn.appendSlice(allocator, len_str); + try dyn.appendSlice(allocator, "]"); + try dyn.appendSlice(allocator, info.element_name); + return try dyn.toOwnedSlice(allocator); + }; + return try allocator.dupe(u8, result); }, .vector_type => |info| { - var buf = std.ArrayList(u8).empty; - try buf.appendSlice(allocator, "Vector("); - var tmp: [10]u8 = undefined; - const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable; - try buf.appendSlice(allocator, len_str); - try buf.appendSlice(allocator, ","); - try buf.appendSlice(allocator, info.element_name); - try buf.append(allocator, ')'); - return try buf.toOwnedSlice(allocator); + var buf: [128]u8 = undefined; + const result = std.fmt.bufPrint(&buf, "Vector({d},{s})", .{ info.length, info.element_name }) catch { + var dyn = std.ArrayList(u8).empty; + try dyn.appendSlice(allocator, "Vector("); + var tmp: [10]u8 = undefined; + const len_str = std.fmt.bufPrint(&tmp, "{d}", .{info.length}) catch unreachable; + try dyn.appendSlice(allocator, len_str); + try dyn.appendSlice(allocator, ","); + try dyn.appendSlice(allocator, info.element_name); + try dyn.appendSlice(allocator, ")"); + return try dyn.toOwnedSlice(allocator); + }; + return try allocator.dupe(u8, result); }, .function_type => |info| { var buf = std.ArrayList(u8).empty;