From 67e02a20a574f4661c71648fb2eb12b12f74d550 Mon Sep 17 00:00:00 2001 From: agra Date: Wed, 4 Mar 2026 09:18:24 +0200 Subject: [PATCH] ... --- specs.md | 30 +++++++++++++ src/ir/emit_llvm.zig | 33 ++++++++++++++ src/ir/inst.zig | 2 + src/ir/interp.zig | 8 ++++ src/ir/lower.zig | 102 +++++++++++++++++++++++++++++++++---------- src/ir/print.zig | 1 + 6 files changed, 153 insertions(+), 23 deletions(-) diff --git a/specs.md b/specs.md index da4ad7b..98e26e4 100644 --- a/specs.md +++ b/specs.md @@ -370,6 +370,36 @@ Arena :: struct { allocators : [2]Allocator = .[xx gpa, xx arena]; // protocol values in array ``` +#### Ownership and Lifetime + +Protocol values have two ownership modes depending on how they are created: + +| Conversion | `ctx` points to | Lifetime | Who frees | +|------------|----------------|----------|-----------| +| `xx value` | Heap-allocated copy | Until `free(p)` | Caller | +| `xx @ptr` | Original pointee | Tied to pointee | Caller manages pointee | + +**`xx value`** — the concrete data is heap-copied so the protocol value is self-contained. +It can be stored in containers, returned from functions, and outlives the scope where it was created. +Call `free(p)` to release the backing memory when done: +```sx +s : Sizable = xx Widget.{ value = 42 }; // heap-copies Widget +print("{}\n", s.size()); +free(s); // frees the heap-allocated Widget copy +``` + +**`xx @ptr`** — the protocol borrows the pointer. The protocol value is only valid as long as +the pointee is alive. Mutations through the protocol are visible through the original pointer: +```sx +w := Widget.{ value = 0 }; +s : Sizable = xx @w; // borrows &w +s.add(5); // modifies w through ctx +print("{}\n", w.value); // 5 +// do NOT free(s) — w owns the data +``` + +**Vtables** are global constants — shared across all protocol values of the same `(Protocol, ConcreteType)` pair. They are never allocated or freed at runtime. + #### Default Methods Protocol methods can have bodies. `self` dispatches through the vtable (dynamic dispatch): ```sx diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 930a946..8ff5cc8 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -192,6 +192,9 @@ pub const LLVMEmitter = struct { self.declareFunction(&func, @intCast(i)); } + // Pass 1.5: Initialize vtable globals (needs function declarations from Pass 1) + self.initVtableGlobals(); + // Pass 2: Emit function bodies for (self.ir_mod.functions.items, 0..) |func, i| { if (func.is_extern or func.blocks.items.len == 0) continue; @@ -279,6 +282,7 @@ pub const LLVMEmitter = struct { .float => |v| c.LLVMConstReal(llvm_ty, v), .boolean => |v| c.LLVMConstInt(llvm_ty, @intFromBool(v), 0), .string => |sid| self.emitConstStringGlobal(self.ir_mod.types.getString(sid)), + .vtable => c.LLVMConstNull(llvm_ty), // placeholder — initialized in initVtableGlobals after function declarations else => c.LLVMConstNull(llvm_ty), }; c.LLVMSetInitializer(llvm_global, init_val); @@ -290,6 +294,35 @@ pub const LLVMEmitter = struct { } } + /// Initialize vtable globals with function pointer constants. + /// Must run after Pass 1 (function declarations) so func_map is populated. + fn initVtableGlobals(self: *LLVMEmitter) void { + for (self.ir_mod.globals.items, 0..) |global, i| { + const iv = global.init_val orelse continue; + const func_ids = switch (iv) { + .vtable => |ids| ids, + else => continue, + }; + + const llvm_global = self.global_map.get(@intCast(i)) orelse continue; + const llvm_ty = self.toLLVMType(global.ty); + + // Build constant struct of function pointers + var field_vals = std.ArrayList(c.LLVMValueRef).empty; + defer field_vals.deinit(self.alloc); + for (func_ids) |fid| { + const llvm_func = self.func_map.get(fid.index()) orelse { + field_vals.append(self.alloc, c.LLVMConstNull(self.cached_ptr)) catch unreachable; + continue; + }; + field_vals.append(self.alloc, llvm_func) catch unreachable; + } + const init_val = c.LLVMConstNamedStruct(llvm_ty, field_vals.items.ptr, @intCast(field_vals.items.len)); + c.LLVMSetInitializer(llvm_global, init_val); + c.LLVMSetGlobalConstant(llvm_global, 1); + } + } + fn valueToLLVMConst(self: *LLVMEmitter, val: Value, llvm_ty: c.LLVMTypeRef) c.LLVMValueRef { _ = self; return switch (val) { diff --git a/src/ir/inst.zig b/src/ir/inst.zig index 1cc3062..19fd2f7 100644 --- a/src/ir/inst.zig +++ b/src/ir/inst.zig @@ -462,5 +462,7 @@ pub const ConstantValue = union(enum) { undef, zeroinit, aggregate: []const ConstantValue, + /// Vtable constant: struct of function pointers, used for protocol vtable globals. + vtable: []const FuncId, }; diff --git a/src/ir/interp.zig b/src/ir/interp.zig index 1b0deff..7e378d3 100644 --- a/src/ir/interp.zig +++ b/src/ir/interp.zig @@ -1124,6 +1124,14 @@ pub const Interpreter = struct { } return .{ .aggregate = fields }; }, + .vtable => |func_ids| { + // Vtable is a struct of function refs — represent as aggregate of func_ref values + const fields = self.alloc.alloc(Value, func_ids.len) catch return .undef; + for (func_ids, 0..) |fid, i| { + fields[i] = .{ .func_ref = fid }; + } + return .{ .aggregate = fields }; + }, }; } diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 7e321f6..e2e8ed3 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -100,6 +100,7 @@ pub const Lowering = struct { protocol_ast_map: std.StringHashMap(*const ast.ProtocolDecl) = std.StringHashMap(*const ast.ProtocolDecl).init(std.heap.page_allocator), // protocol name → AST node protocol_thunk_map: std.StringHashMap([]const FuncId) = std.StringHashMap([]const FuncId).init(std.heap.page_allocator), // "Proto\x00Type" → thunk FuncIds protocol_vtable_type_map: std.StringHashMap(TypeId) = std.StringHashMap(TypeId).init(std.heap.page_allocator), // protocol name → vtable struct TypeId + protocol_vtable_global_map: std.StringHashMap(inst_mod.GlobalId) = std.StringHashMap(inst_mod.GlobalId).init(std.heap.page_allocator), // "Proto\x00Type" → vtable GlobalId struct_const_map: std.StringHashMap(StructConstInfo) = std.StringHashMap(StructConstInfo).init(std.heap.page_allocator), // "Struct.CONST" → value info module_const_map: std.StringHashMap(ModuleConstInfo) = std.StringHashMap(ModuleConstInfo).init(std.heap.page_allocator), // module-level value constants (e.g. AF_INET :s32: 2) foreign_name_map: std.StringHashMap([]const u8) = std.StringHashMap([]const u8).init(std.heap.page_allocator), // sx name → C name for #foreign renames @@ -3701,6 +3702,15 @@ pub const Lowering = struct { } // Check builtins first (these are handled natively by interpreter and emitter) if (resolveBuiltin(id.name)) |bid| { + // free(protocol_value) → extract ctx (field 0) and free it + if (bid == .free and args.items.len == 1) { + const arg_ty = self.builder.getRefType(args.items[0]); + if (self.getProtocolInfo(arg_ty) != null) { + const void_ptr_ty = self.module.types.ptrTo(.void); + const ctx_ref = self.builder.emit(.{ .struct_get = .{ .base = args.items[0], .field_index = 0 } }, void_ptr_ty); + return self.builder.emit(.{ .heap_free = .{ .operand = ctx_ref } }, .void); + } + } const ret_ty: TypeId = switch (bid) { .malloc => .s64, // pointer .size_of => .s64, @@ -7500,18 +7510,38 @@ pub const Lowering = struct { /// Build a protocol value from a concrete pointer. /// For inline protocols: struct_init { ctx, thunk1, thunk2, ... } /// For vtable protocols: struct_init { ctx, vtable_ptr } where vtable is stack-allocated - fn buildProtocolValue(self: *Lowering, concrete_ptr: Ref, proto_name: []const u8, concrete_type_name: []const u8, proto_ty: TypeId) Ref { + /// When `heap_copy` is true, the concrete data is heap-copied so the protocol value + /// outlives the current stack frame (used when source is a value, not an explicit pointer). + /// When false, the pointer is used directly (user manages the pointee's lifetime). + fn buildProtocolValue(self: *Lowering, concrete_ptr: Ref, proto_name: []const u8, concrete_type_name: []const u8, proto_ty: TypeId, concrete_ty: TypeId, heap_copy: bool) Ref { const pd = self.protocol_decl_map.get(proto_name) orelse return concrete_ptr; const thunks = self.getOrCreateThunks(proto_name, concrete_type_name); if (thunks.len != pd.methods.len) return concrete_ptr; const void_ptr_ty = self.module.types.ptrTo(.void); + // When source is a value (not an explicit pointer), heap-allocate + // so the protocol value outlives the current stack frame. + // When source is an explicit pointer (xx @obj), use it directly — + // the user is responsible for the pointee's lifetime. + var ctx_ptr = concrete_ptr; + if (heap_copy) { + const concrete_size = self.module.types.typeSizeBytes(concrete_ty); + const size_ref = self.builder.constInt(@intCast(concrete_size), .s64); + const heap_ptr = self.builder.emit(.{ .heap_alloc = .{ .operand = size_ref } }, void_ptr_ty); + const memcpy_args = self.alloc.dupe(Ref, &.{ heap_ptr, concrete_ptr, size_ref }) catch unreachable; + _ = self.builder.emit(.{ .call_builtin = .{ + .builtin = inst_mod.BuiltinId.memcpy, + .args = memcpy_args, + } }, void_ptr_ty); + ctx_ptr = heap_ptr; + } + if (pd.is_inline) { // Inline: { ctx, fn1, fn2, ... } var field_vals = std.ArrayList(Ref).empty; defer field_vals.deinit(self.alloc); - field_vals.append(self.alloc, concrete_ptr) catch unreachable; + field_vals.append(self.alloc, ctx_ptr) catch unreachable; for (thunks) |thunk_id| { const fn_ref = self.builder.emit(.{ .func_ref = thunk_id }, void_ptr_ty); field_vals.append(self.alloc, fn_ref) catch unreachable; @@ -7520,24 +7550,37 @@ pub const Lowering = struct { return self.builder.emit(.{ .struct_init = .{ .fields = owned } }, proto_ty); } else { // Vtable: { ctx, vtable_ptr } - // Build vtable struct on stack: alloca + store fn_ptrs + // Vtable is a global constant (same function pointers for every instance + // of the same Protocol+ConcreteType pair). Cached per pair. const vtable_ty = self.protocol_vtable_type_map.get(proto_name) orelse return concrete_ptr; - var vtable_fields = std.ArrayList(Ref).empty; - defer vtable_fields.deinit(self.alloc); - for (thunks) |thunk_id| { - const fn_ref = self.builder.emit(.{ .func_ref = thunk_id }, void_ptr_ty); - vtable_fields.append(self.alloc, fn_ref) catch unreachable; - } - const vtable_fields_owned = self.alloc.dupe(Ref, vtable_fields.items) catch unreachable; - const vtable_val = self.builder.emit(.{ .struct_init = .{ .fields = vtable_fields_owned } }, vtable_ty); - const vtable_alloca = self.builder.alloca(vtable_ty); - self.builder.store(vtable_alloca, vtable_val); + + // Build cache key: "Proto\x00Type" + const key = std.fmt.allocPrint(self.alloc, "{s}\x00{s}", .{ proto_name, concrete_type_name }) catch unreachable; + + const vtable_global_id = self.protocol_vtable_global_map.get(key) orelse blk: { + // Create vtable global with function pointer initializer + const global_name = std.fmt.allocPrint(self.alloc, "__{s}__{s}__vtable", .{ proto_name, concrete_type_name }) catch unreachable; + const global_name_id = self.module.types.strings.intern(self.alloc, global_name); + const thunk_ids = self.alloc.dupe(FuncId, thunks) catch unreachable; + const gid = self.module.addGlobal(.{ + .name = global_name_id, + .ty = vtable_ty, + .init_val = .{ .vtable = thunk_ids }, + .is_const = true, + }); + self.protocol_vtable_global_map.put(key, gid) catch {}; + break :blk gid; + }; + + // Reference the vtable global's address + const vtable_ptr_ty = self.module.types.ptrTo(vtable_ty); + const vtable_addr = self.builder.emit(.{ .global_addr = vtable_global_id }, vtable_ptr_ty); // Build protocol struct: { ctx, &vtable } var proto_fields = std.ArrayList(Ref).empty; defer proto_fields.deinit(self.alloc); - proto_fields.append(self.alloc, concrete_ptr) catch unreachable; - proto_fields.append(self.alloc, vtable_alloca) catch unreachable; + proto_fields.append(self.alloc, ctx_ptr) catch unreachable; + proto_fields.append(self.alloc, vtable_addr) catch unreachable; const proto_owned = self.alloc.dupe(Ref, proto_fields.items) catch unreachable; return self.builder.emit(.{ .struct_init = .{ .fields = proto_owned } }, proto_ty); } @@ -8031,20 +8074,26 @@ pub const Lowering = struct { if (dst_info != .@"struct") return operand; const proto_name = self.module.types.getString(dst_info.@"struct".name); - // Determine concrete type name — resolve through pointer if needed + // Determine concrete type name and type — resolve through pointer if needed var concrete_ptr = operand; var concrete_type_name: ?[]const u8 = null; + var concrete_ty: TypeId = src_ty; + var heap_copy = false; if (!src_ty.isBuiltin()) { const src_info = self.module.types.get(src_ty); if (src_info == .pointer) { - // xx @acc — operand is already a pointer + // xx @acc — operand is already a pointer (user manages lifetime) const pointee = src_info.pointer.pointee; concrete_type_name = self.resolveConcreteTypeName(pointee); + concrete_ty = pointee; + heap_copy = false; } else if (src_info == .@"struct") { - // xx acc — operand is a value, need to take address + // xx acc — operand is a value, need to take address + heap-copy concrete_type_name = self.module.types.getString(src_info.@"struct".name); - // Alloca + store to get a pointer + concrete_ty = src_ty; + heap_copy = true; + // Alloca + store to get a pointer (will be heap-copied in buildProtocolValue) const slot = self.builder.alloca(src_ty); self.builder.store(slot, operand); concrete_ptr = slot; @@ -8054,10 +8103,11 @@ pub const Lowering = struct { // Also try from the operand node for struct literals: xx Accumulator.{ total = 0 } if (concrete_type_name == null) { concrete_type_name = self.inferConcreteTypeName(operand_node); + if (concrete_type_name != null) heap_copy = true; } if (concrete_type_name) |ctn| { - return self.buildProtocolValue(concrete_ptr, proto_name, ctn, dst_ty); + return self.buildProtocolValue(concrete_ptr, proto_name, ctn, dst_ty, concrete_ty, heap_copy); } return operand; } @@ -8343,17 +8393,23 @@ pub const Lowering = struct { if (dst_info == .@"struct") { const proto_name = self.module.types.getString(dst_info.@"struct".name); if (self.resolveConcreteTypeName(src_ty)) |ctn| { - // If src is a pointer, use directly; otherwise alloca+store + // If src is a pointer, use directly; otherwise alloca+store + heap-copy var concrete_ptr = val; + var concrete_ty = src_ty; + var heap_copy = false; if (!src_ty.isBuiltin()) { const si = self.module.types.get(src_ty); - if (si != .pointer) { + if (si == .pointer) { + concrete_ty = si.pointer.pointee; + heap_copy = false; + } else { const slot = self.builder.alloca(src_ty); self.builder.store(slot, val); concrete_ptr = slot; + heap_copy = true; } } - return self.buildProtocolValue(concrete_ptr, proto_name, ctn, dst_ty); + return self.buildProtocolValue(concrete_ptr, proto_name, ctn, dst_ty, concrete_ty, heap_copy); } } } diff --git a/src/ir/print.zig b/src/ir/print.zig index cb665da..8c348be 100644 --- a/src/ir/print.zig +++ b/src/ir/print.zig @@ -535,6 +535,7 @@ fn writeConstant(val: ConstantValue, writer: Writer) !void { .undef => try writer.writeAll("undef"), .zeroinit => try writer.writeAll("zeroinit"), .aggregate => try writer.writeAll("{...}"), + .vtable => try writer.writeAll("vtable{...}"), } }