From 82e7b04cca5d5d3044901aa339bb26d3144e5983 Mon Sep 17 00:00:00 2001 From: agra Date: Mon, 25 May 2026 15:01:58 +0300 Subject: [PATCH] =?UTF-8?q?mem:=20Phase=201.4=20=E2=80=94=20serialize=20ev?= =?UTF-8?q?ery=20interp=20Value=20variant=20for=20`#run`=20globals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `valueToLLVMConst` in emit_llvm previously handled int / float / boolean and collapsed everything else into `LLVMConstNull(ty)`. A `#run` returning a struct, string, function pointer, or anything aggregate produced a zero-initialized global silently — the comptime result was computed by the interp, then thrown away when emit_llvm couldn't represent it. Replaced with a real walk: - int / float / boolean — as before. - null_val — `LLVMConstNull`. - void_val / undef — `LLVMGetUndef`. - func_ref — `func_map` lookup (already populated for the implicit-Context static initializer of `__sx_default_context`). - string — `emitConstStringGlobal`, returns a pointer to the byte array. - aggregate — recurse field-by-field. Struct: walk `LLVMStructGetTypeAtIndex` and emit `LLVMConstNamedStruct`. Array: walk `LLVMGetElementType` and emit `LLVMConstArray2`. The remaining variants (heap_ptr, byte_ptr, slot_ptr, closure, type_tag) bail loudly with a `std.debug.print` carrying the global name — per CLAUDE.md REJECTED PATTERNS, no more silent unimplemented arms. heap_ptr serialization requires threading the IR `TypeId` so the heap content can be walked recursively; deferred to Phase 1.4a alongside cycle detection. The call site at emit_llvm.zig:676 now passes `global.name` so the diagnostic locates the offending `#run` binding. Type-inference fix at the binding site: `NAME :: #run expr;` with no annotation used to default to `s64` via `resolveType(null) -> .s64`, so even a successful Phase 1.4 serialization would emit `{0, 0}` — the global's destination type was wrong. `lowerComptimeGlobal` now calls `inferExprType(expr)` when no annotation is given, so the inferred type matches the comptime function's return type. The broader `resolveType(null)` fallback is left in place for other callers — flagged in the MEM checkpoint as a follow-up audit. Regression: `examples/134-comptime-aggregate-global.sx` exercises `POINT :: #run make_point()` returning a `Point { x: s32, y: s32 }`. Both interp (`sx run`) and codegen (`sx build`) now print `POINT.x = 7 / POINT.y = 13` instead of `0 / 0`. 156/156 example tests pass; chess unchanged. --- current/CHECKPOINT-MEM.md | 95 ++++++++++++++----- examples/134-comptime-aggregate-global.sx | 31 ++++++ src/ir/emit_llvm.zig | 69 +++++++++++++- src/ir/lower.zig | 9 +- .../134-comptime-aggregate-global.exit | 1 + .../134-comptime-aggregate-global.txt | 2 + 6 files changed, 176 insertions(+), 31 deletions(-) create mode 100644 examples/134-comptime-aggregate-global.sx create mode 100644 tests/expected/134-comptime-aggregate-global.exit create mode 100644 tests/expected/134-comptime-aggregate-global.txt diff --git a/current/CHECKPOINT-MEM.md b/current/CHECKPOINT-MEM.md index 00911d9..9d9b322 100644 --- a/current/CHECKPOINT-MEM.md +++ b/current/CHECKPOINT-MEM.md @@ -5,18 +5,33 @@ Tracking checkpoint for the mem.sx Zig-aligned implementation ## Last completed step -- **`List(T)` mutations gain an optional `alloc: Allocator = - context.allocator` argument** (plan: - `~/.claude/plans/lets-see-options-for-merry-dijkstra.md`). One - language-level change (function-param defaults already supported) - closes a whole bug class where long-lived containers' growth - silently landed in a per-frame arena. The chess panel-text - regression that triggered this work is fixed end-to-end on macOS. - GlyphCache, DockInteraction, StateStore, Gles3Gpu, and MetalGPU - all carry `parent_allocator` and pass it explicitly to internal - list growth. 155/155 example tests pass — zero-diff against - snapshots since every existing callsite still resolves to - `context.allocator`. +- **Phase 1.4 — `valueToLLVMConst` upgraded to handle every interp + `Value` variant.** The serializer at `emit_llvm.zig:734` used to + collapse anything past int/float/boolean into `LLVMConstNull(ty)` — + a silent fallback that emitted `{0, 0}` for any `#run` returning a + struct, string, function pointer, or pointer. Replaced with a real + walk: int/float/boolean as before; null_val → LLVMConstNull; + void_val/undef → LLVMGetUndef; func_ref → func_map lookup; + string → emitConstStringGlobal; aggregate → recurse via + `LLVMStructGetTypeAtIndex` / `LLVMGetElementType`. The unsupported + cases (heap_ptr/byte_ptr/slot_ptr/closure/type_tag) now bail loudly + with a named diagnostic that includes the global name (per CLAUDE.md + REJECTED PATTERNS — no silent unimplemented arms). Heap_ptr is + deferred to Phase 1.4a (needs IR TypeId threaded down for cyclic / + recursive heap content). + + Also closes the type-inference half of the same bug: `NAME :: #run + expr;` with no annotation used to default to `s64` (silent fallback + in `resolveType(null)`). `lowerComptimeGlobal` now infers from the + expression's return type when no annotation is provided. The + silent fallback in `resolveType` itself is left in place for other + callers — separate audit, separate session. + + Regression test at + `examples/134-comptime-aggregate-global.sx`: a `POINT :: #run + make_point();` struct binding now prints the real fields instead + of zeros, on both interp and codegen paths. 156/156 example tests + + chess clean. ## Current state @@ -152,21 +167,34 @@ chain at comptime in the interp. No remaining shortcut. Phase 1.3 (closure env allocation through context) shipped in commit `8e21cc5`. Phase 1.4 (codegen serializer for all interp Value -variants) remains open. Phase 1.2 (free / malloc through context) was -considered and **skipped** — `context.allocator.alloc/dealloc` -already works directly; wrapper-only `malloc`/`free` would be lossy -renames. +variants) shipped this session. Phase 1.2 (free / malloc through +context) was considered and **skipped** — `context.allocator.alloc +/dealloc` already works directly; wrapper-only `malloc`/`free` would +be lossy renames. -The `List(T)` allocator-arg work documented above is **outside the -original MEM plan** but lives in the same problem space (long-lived -container growth silently capturing the wrong allocator). It -generalises the `parent_allocator` capture pattern that -`ChessGameState` / `UIPipeline` already used. +The `List(T)` allocator-arg work shipped earlier this session is +**outside the original MEM plan** but lives in the same problem +space (long-lived container growth silently capturing the wrong +allocator). -Suggested next move: verify on iOS sim + Android via -`tools/verify-step.sh` to confirm the GlyphCache fix + Metal/Gles3 -sweep behave on those platforms, then either commit the verify-step -goldens or move to Phase 1.4. +Open follow-ups, in roughly the order they make sense: + +- **Phase 1.4a** — Thread IR `TypeId` (not just LLVM `LLVMTypeRef`) + through `valueToLLVMConst` so `heap_ptr` values from `#run` can be + serialized. Requires walking the struct/slice/primitive children + recursively; cycle detection via `(heap_id, type_id)` visited set. + Practical trigger: a `#run` that builds a `Widget.{}` and + protocol-erases via `xx`, producing a `heap_ptr` to the boxed + payload. None exists in-tree yet — surface it via a focused + regression alongside the implementation. +- **`resolveType(null) -> .s64` audit.** The silent fallback at + `lower.zig:8387` is still in place for every caller other than + `lowerComptimeGlobal`. CLAUDE.md REJECTED PATTERNS forbids this + shape. Survey callers; either make the default an error + diagnostic or thread an inferred type per call site. +- **`tools/verify-step.sh` gate.** Run iOS sim + Android to confirm + this session's GlyphCache + Metal/Gles3 sweeps + Phase 1.4 didn't + regress non-macOS platforms. ## Phase 0.3 audit findings — chess allocator usage (closed) @@ -192,7 +220,22 @@ Allocator value naturally. ## Log -- **2026-05-25 (latest)** — `List(T)` mutation API gained an optional +- **2026-05-25 (latest)** — Phase 1.4 shipped. `valueToLLVMConst` + (`emit_llvm.zig:734`) replaced the primitive-only switch with a + full serializer covering null_val, void_val, undef, func_ref, + string, and aggregate (struct + array via + `LLVMStructGetTypeAtIndex` / `LLVMGetElementType`). Unsupported + variants (heap_ptr, byte_ptr, slot_ptr, closure, type_tag) bail + loudly via `std.debug.print` with the global name. The call site + at line 676 now passes `global.name` so the diagnostic locates the + offending `#run` site. `lowerComptimeGlobal` (`lower.zig:6384`) + infers the return type from the expression when the user omits + the type annotation — closes the silent-s64 default for `NAME :: + #run expr;` bindings. The broader `resolveType(null) -> .s64` + fallback is left in place for other callers — flagged for a + follow-up audit. Regression at + `examples/134-comptime-aggregate-global.sx`. 156/156 + chess green. +- **2026-05-25 (penultimate)** — `List(T)` mutation API gained an optional trailing `alloc: Allocator = context.allocator` argument (`library/modules/std.sx`). Default-arg substitution previously only fired for identifier callees; extended to the generic-method diff --git a/examples/134-comptime-aggregate-global.sx b/examples/134-comptime-aggregate-global.sx new file mode 100644 index 0000000..f61e8c4 --- /dev/null +++ b/examples/134-comptime-aggregate-global.sx @@ -0,0 +1,31 @@ +#import "modules/std.sx"; + +// MEM Phase 1.4 regression: an aggregate (struct) returned from a `#run` +// initializer must serialize correctly into the static binary, not +// silently collapse to a zero-init constant. +// +// Before Phase 1.4 the LLVM `valueToLLVMConst` only handled int/float/bool +// and dropped everything else into `LLVMConstNull` — so a global like +// `POINT :: #run make_point();` ended up emitting `{0, 0}` regardless of +// what the interp computed. Reading POINT.x would give 0, hiding the bug. +// +// Also exercises type inference at the `NAME :: #run expr;` binding site: +// without an explicit annotation, the global's type is taken from the +// comptime expression's return shape (here, `Point`). + +Point :: struct { + x: s32; + y: s32; +} + +make_point :: () -> Point { + return Point.{ x = 7, y = 13 }; +} + +POINT :: #run make_point(); + +main :: () -> s32 { + print("POINT.x = {}\n", POINT.x); + print("POINT.y = {}\n", POINT.y); + return 0; +} diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 025e369..2f8fa18 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -673,7 +673,7 @@ pub const LLVMEmitter = struct { std.debug.print("error: comptime init of '{s}' failed: {s} (op={s}{s}{s})\n", .{ gname, @errorName(err), op, sep, detail }); break :blk .void_val; }; - const init_val = self.valueToLLVMConst(result, llvm_ty); + const init_val = self.valueToLLVMConst(result, llvm_ty, self.ir_mod.types.getString(global.name)); c.LLVMSetInitializer(llvm_global, init_val); } else if (global.init_val) |iv| { const init_val = switch (iv) { @@ -731,13 +731,74 @@ pub const LLVMEmitter = struct { } } - fn valueToLLVMConst(self: *LLVMEmitter, val: Value, llvm_ty: c.LLVMTypeRef) c.LLVMValueRef { - _ = self; + /// Serialize an interp `Value` to an LLVM constant for use as a static + /// global initializer. `global_name` is included in any diagnostic the + /// path produces, so the user can locate the offending `#run` site. + /// Returns `LLVMGetUndef` on bail — the build continues so adjacent + /// constants can still emit, but the surfaced diagnostic surfaces the + /// problem clearly. + fn valueToLLVMConst( + self: *LLVMEmitter, + val: Value, + llvm_ty: c.LLVMTypeRef, + global_name: []const u8, + ) c.LLVMValueRef { return switch (val) { .int => |v| c.LLVMConstInt(llvm_ty, @bitCast(v), 1), .float => |v| c.LLVMConstReal(llvm_ty, v), .boolean => |v| c.LLVMConstInt(llvm_ty, @intFromBool(v), 0), - else => c.LLVMConstNull(llvm_ty), + .null_val => c.LLVMConstNull(llvm_ty), + .void_val, .undef => c.LLVMGetUndef(llvm_ty), + .func_ref => |fid| self.func_map.get(fid.index()) orelse c.LLVMConstNull(llvm_ty), + .string => |s| self.emitConstStringGlobal(s), + .aggregate => |fields| blk: { + const kind = c.LLVMGetTypeKind(llvm_ty); + if (kind == c.LLVMStructTypeKind) { + const field_count = c.LLVMCountStructElementTypes(llvm_ty); + if (field_count != @as(c_uint, @intCast(fields.len))) { + std.debug.print( + "error: comptime init of '{s}' produced aggregate with {} fields but the destination type expects {}\n", + .{ global_name, fields.len, field_count }, + ); + break :blk c.LLVMGetUndef(llvm_ty); + } + var field_vals = std.ArrayList(c.LLVMValueRef).empty; + defer field_vals.deinit(self.alloc); + for (fields, 0..) |f, i| { + const field_ty = c.LLVMStructGetTypeAtIndex(llvm_ty, @intCast(i)); + field_vals.append(self.alloc, self.valueToLLVMConst(f, field_ty, global_name)) catch unreachable; + } + break :blk c.LLVMConstNamedStruct(llvm_ty, field_vals.items.ptr, @intCast(field_vals.items.len)); + } + if (kind == c.LLVMArrayTypeKind) { + const elem_ty = c.LLVMGetElementType(llvm_ty); + var elem_vals = std.ArrayList(c.LLVMValueRef).empty; + defer elem_vals.deinit(self.alloc); + for (fields) |f| { + elem_vals.append(self.alloc, self.valueToLLVMConst(f, elem_ty, global_name)) catch unreachable; + } + break :blk c.LLVMConstArray2(elem_ty, elem_vals.items.ptr, @intCast(elem_vals.items.len)); + } + std.debug.print( + "error: comptime init of '{s}' produced an aggregate but the destination LLVM type is neither struct nor array (kind={})\n", + .{ global_name, kind }, + ); + break :blk c.LLVMGetUndef(llvm_ty); + }, + // The remaining Value variants cannot become static binary + // constants. Bail loudly with the global name so the user can + // identify the offending #run site. + // - heap_ptr / byte_ptr: pointer into interp/host memory; can't survive into a binary const without type-threaded serialization (Phase 1.4a follow-up). + // - slot_ptr: frame-local; meaningless outside the call that produced it. + // - closure: env is dynamic. + // - type_tag: compile-time-only Type value. + .heap_ptr, .byte_ptr, .slot_ptr, .closure, .type_tag => blk: { + std.debug.print( + "error: comptime init of '{s}' produced a {s} value, which cannot be serialized as a static constant\n", + .{ global_name, @tagName(val) }, + ); + break :blk c.LLVMGetUndef(llvm_ty); + }, }; } diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 380f3bf..56d6bf1 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -6382,7 +6382,14 @@ pub const Lowering = struct { /// Creates a comptime function wrapping the expression (for later /// interpretation), plus a global constant to hold the result. fn lowerComptimeGlobal(self: *Lowering, name: []const u8, expr: *const Node, type_ann: ?*const Node) void { - const ret_ty = self.resolveType(type_ann); + // When the user writes `NAME :: #run expr;` with no type annotation, + // infer the global's type from the comptime expression's return + // shape. `resolveType(null)` returns `.s64` for legacy reasons — + // good for primitive helpers, silently wrong for anything else. + const ret_ty: TypeId = if (type_ann) |n| + self.resolveTypeWithBindings(n) + else + self.inferExprType(expr); const func_id = self.createComptimeFunction(name, expr, ret_ty); // Add a global constant whose initializer will be filled by the interpreter. diff --git a/tests/expected/134-comptime-aggregate-global.exit b/tests/expected/134-comptime-aggregate-global.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/tests/expected/134-comptime-aggregate-global.exit @@ -0,0 +1 @@ +0 diff --git a/tests/expected/134-comptime-aggregate-global.txt b/tests/expected/134-comptime-aggregate-global.txt new file mode 100644 index 0000000..e326866 --- /dev/null +++ b/tests/expected/134-comptime-aggregate-global.txt @@ -0,0 +1,2 @@ +POINT.x = 7 +POINT.y = 13