From e93879816d822d9ba870a9e30f127cad0decea1c Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 04:04:40 +0300 Subject: [PATCH] fix(ir): materialize global aggregate struct-literal initializers (issue 0080) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A module-global array of struct literals (`pairs : [2]Pair = .[ .{...}, .{...} ]`) was emitted as `zeroinitializer`, silently dropping every declared field — reads returned 0 with no diagnostic. Global struct literals and struct-with-array already worked; the gap was struct literals used as ARRAY elements. Root cause: `Lowering.constExprValue` (the const-aggregate serializer for global initializers) had no `.struct_literal` arm. `constArrayLiteral` serialized each element through `constExprValue`, so a struct-literal element returned null, collapsing the whole array initializer to null; `globalInitValue` then emitted no payload and the LLVM backend zero-initialized the global — the same silent-zero class as 0071/0072, one level inside an array literal. Fix: make `constExprValue` type-aware — thread the destination element/field TypeId so a struct-literal leaf routes through `constStructLiteral` and a nested array-literal through `constArrayLiteral` with the correct element type. `constArrayLiteral` derives its element type from the array TypeId; `constStructLiteral` passes each field's type. A global aggregate initializer that still does not fully reduce to a compile-time constant is now rejected loudly (`diagnoseNonConstGlobal`) instead of silently zeroing. `emitConstAggregate` already recurses over nested aggregates, so `sx run` (JIT) and `sx build` (AOT) both materialize the declared values. Regression: examples/0137-types-global-aggregate-literal-init.sx (global [N]Struct literal, global struct literal, struct-with-array, nested array-of-struct-with-array; values read back with no prior store, plus a store on top). Fails on the pre-fix compiler (array-of-struct fields read 0), passes after. Marks issues 0079 (already resolved) and 0080 RESOLVED. --- ...137-types-global-aggregate-literal-init.sx | 49 +++++++ ...7-types-global-aggregate-literal-init.exit | 1 + ...types-global-aggregate-literal-init.stderr | 1 + ...types-global-aggregate-literal-init.stdout | 7 + ...l-array-struct-literal-initializer-zero.md | 121 ++++++++++++++++++ src/ir/lower.zig | 44 +++++-- 6 files changed, 212 insertions(+), 11 deletions(-) create mode 100644 examples/0137-types-global-aggregate-literal-init.sx create mode 100644 examples/expected/0137-types-global-aggregate-literal-init.exit create mode 100644 examples/expected/0137-types-global-aggregate-literal-init.stderr create mode 100644 examples/expected/0137-types-global-aggregate-literal-init.stdout create mode 100644 issues/0080-global-array-struct-literal-initializer-zero.md diff --git a/examples/0137-types-global-aggregate-literal-init.sx b/examples/0137-types-global-aggregate-literal-init.sx new file mode 100644 index 0000000..8987220 --- /dev/null +++ b/examples/0137-types-global-aggregate-literal-init.sx @@ -0,0 +1,49 @@ +// A module-global aggregate (array of struct literals, a struct literal, and +// nested array/struct shapes) materializes its DECLARED field values into the +// global's static initializer, so reading the fields without any prior store +// returns the literal values — not zero. +// Regression (issue 0080): a global `[N]Struct` initialized with struct literals +// was emitted as `zeroinitializer`, silently dropping every field, because the +// constant-aggregate serializer had no struct-literal arm and collapsed the +// whole initializer to null. The fix threads the element/field type so struct +// and nested-array leaves serialize correctly; a genuinely non-constant +// initializer is now rejected loudly instead of silently zeroed. + +#import "modules/std.sx"; + +Pair :: struct { a: s64; b: s64; } +WithArr :: struct { id: s64; xs: [3]s64; } + +// global array of struct literals +pairs : [2]Pair = .[ .{ a = 1, b = 2 }, .{ a = 3, b = 4 } ]; +// global struct literal +solo : Pair = .{ a = 7, b = 9 }; +// global struct containing a fixed array (struct-with-array) +wa : WithArr = .{ id = 5, xs = .[ 11, 22, 33 ] }; +// nested: global array of structs each containing an array +nested : [2]WithArr = .[ .{ id = 1, xs = .[ 1, 2, 3 ] }, .{ id = 2, xs = .[ 4, 5, 6 ] } ]; + +main :: () { + // Read the declared initializer values back with NO prior store. + print("pairs={},{} {},{}\n", pairs[0].a, pairs[0].b, pairs[1].a, pairs[1].b); + print("solo={},{}\n", solo.a, solo.b); + print("wa={} xs={},{},{}\n", wa.id, wa.xs[0], wa.xs[1], wa.xs[2]); + print("nested0={} xs={},{},{}\n", nested[0].id, nested[0].xs[0], nested[0].xs[1], nested[0].xs[2]); + print("nested1={} xs={},{},{}\n", nested[1].id, nested[1].xs[0], nested[1].xs[1], nested[1].xs[2]); + + // A store on top of the materialized initializer still works (live storage). + pairs[0].a = 100; + nested[1].xs[2] = 999; + print("after-store={} {}\n", pairs[0].a, nested[1].xs[2]); + + if pairs[0].b == 2 and pairs[1].a == 3 and pairs[1].b == 4 + and solo.a == 7 and solo.b == 9 + and wa.id == 5 and wa.xs[0] == 11 and wa.xs[2] == 33 + and nested[0].id == 1 and nested[0].xs[0] == 1 and nested[0].xs[2] == 3 + and nested[1].id == 2 and nested[1].xs[0] == 4 + and pairs[0].a == 100 and nested[1].xs[2] == 999 { + print("PASS\n"); + } else { + print("FAIL: global aggregate literal initializer zeroed\n"); + } +} diff --git a/examples/expected/0137-types-global-aggregate-literal-init.exit b/examples/expected/0137-types-global-aggregate-literal-init.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0137-types-global-aggregate-literal-init.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0137-types-global-aggregate-literal-init.stderr b/examples/expected/0137-types-global-aggregate-literal-init.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0137-types-global-aggregate-literal-init.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0137-types-global-aggregate-literal-init.stdout b/examples/expected/0137-types-global-aggregate-literal-init.stdout new file mode 100644 index 0000000..02dc394 --- /dev/null +++ b/examples/expected/0137-types-global-aggregate-literal-init.stdout @@ -0,0 +1,7 @@ +pairs=1,2 3,4 +solo=7,9 +wa=5 xs=11,22,33 +nested0=1 xs=1,2,3 +nested1=2 xs=4,5,6 +after-store=100 999 +PASS diff --git a/issues/0080-global-array-struct-literal-initializer-zero.md b/issues/0080-global-array-struct-literal-initializer-zero.md new file mode 100644 index 0000000..e5dddb7 --- /dev/null +++ b/issues/0080-global-array-struct-literal-initializer-zero.md @@ -0,0 +1,121 @@ +# 0080 - global array of struct literals silently zero-initializes + +> **RESOLVED.** +> **Root cause:** `Lowering.constExprValue` (`src/ir/lower.zig`) — the constant- +> aggregate serializer for global initializers — handled primitive and nested- +> array leaves but had **no `.struct_literal` arm**. A module-global `[N]Struct` +> initialized with struct literals reached `constArrayLiteral` → `constExprValue` +> per element; each struct-literal element returned `null`, collapsing the whole +> array initializer to `null`. `globalInitValue` then emitted no payload, so the +> LLVM backend zero-initialized the global (`@pairs = ... zeroinitializer`), +> silently dropping every declared field — the same silent-zero class as +> 0071/0072, one level inside an array literal. (A global *struct* literal and a +> *struct-with-array* already worked, because `constStructLiteral` existed and was +> reached directly; the gap was specifically struct literals *as array elements*.) +> **Fix:** make `constExprValue` type-aware — thread the destination element/field +> `TypeId` so a `.struct_literal` leaf routes through `constStructLiteral` and a +> nested `.array_literal` through `constArrayLiteral` with the correct element +> type. `constArrayLiteral` derives its element type from the array `TypeId`; +> `constStructLiteral` passes each field's type. A global aggregate initializer +> that still does not fully reduce to a compile-time constant is now **rejected +> loudly** (`diagnoseNonConstGlobal`) instead of falling through to a zeroed +> global. The downstream `emitConstAggregate` already recurses over nested +> aggregates, so const/AOT (`sx build`) and JIT (`sx run`) both materialize the +> declared values. +> **Regression:** `examples/0137-types-global-aggregate-literal-init.sx` (global +> `[N]Struct` literal, global struct literal, struct-with-array, nested array-of- +> struct-with-array; values read back with no prior store, plus a store on top). +> FAILS on the pre-fix compiler (array-of-struct fields read 0), PASSES after. + +## Symptom + +A module-global fixed array whose elements are struct literals is emitted as +zero-initialized storage instead of preserving the literal fields. + +Observed: reading `pairs[0].b` and `pairs[1].a` prints `0`. +Expected: the global should contain the declared struct literal values +(`2` and `3`), or the compiler should reject the initializer loudly if this +constant shape is unsupported. + +## Reproduction + +```sx +#import "modules/std.sx"; + +Pair :: struct { + a: s64; + b: s64; +} + +pairs : [2]Pair = .[ .{ a = 1, b = 2 }, .{ a = 3, b = 4 } ]; + +main :: () -> s32 { + print("pairs[0]={},{}\n", pairs[0].a, pairs[0].b); + print("pairs[1]={},{}\n", pairs[1].a, pairs[1].b); + if pairs[0].a == 1 and pairs[0].b == 2 and pairs[1].a == 3 and pairs[1].b == 4 { + print("PASS\n"); + return 0; + } + print("FAIL: global array struct literal initializer zeroed\n"); + return 1; +} +``` + +On the current compiler this prints: + +```text +pairs[0]=0,0 +pairs[1]=0,0 +FAIL: global array struct literal initializer zeroed +``` + +`sx ir ` shows the global as: + +```llvm +@pairs = internal global [2 x { i64, i64 }] zeroinitializer +``` + +## Investigation prompt + +Fix issue 0080: a module-global array initialized with struct literal elements +silently becomes `zeroinitializer`. + +Suspected area: +- `src/ir/lower.zig`, `Lowering.globalInitValue`. +- `src/ir/lower.zig`, `Lowering.constArrayLiteral`. +- `src/ir/lower.zig`, `Lowering.constExprValue`. +- `src/ir/lower.zig`, `Lowering.constStructLiteral`. + +Likely root cause: `globalInitValue` handles a top-level `.array_literal` by +calling `constArrayLiteral`, and `constArrayLiteral` serializes each element via +`constExprValue`. `constExprValue` handles primitive literals and nested arrays, +but not `.struct_literal`, so an array whose element is a struct literal returns +`null`. That null initializer payload is later emitted as zero-initialized +storage, recreating the silent zero pattern from issues 0071/0072 one level +inside an otherwise-supported array literal. + +Likely fix: +- Thread the expected element `TypeId` into `constArrayLiteral`, or otherwise + make `constExprValue` type-aware for struct literals. +- Serialize each struct element through `constStructLiteral` with the array's + element type. +- If any element shape is still unsupported, emit a diagnostic naming the global + instead of returning `null` and allowing zero-initialization. + +Verification: +- Run the repro above and expect: + +```text +pairs[0]=1,2 +pairs[1]=3,4 +PASS +``` + +- Add a pinned regression in the `01xx` types block. +- Run: + +```sh +zig build +zig build test +bash tests/run_examples.sh +``` diff --git a/src/ir/lower.zig b/src/ir/lower.zig index e45b7e2..793d520 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -926,15 +926,15 @@ pub const Lowering = struct { .bool_literal => |bl| .{ .boolean = bl.value }, .float_literal => |fl| .{ .float = fl.value }, .string_literal => |sl| .{ .string = self.module.types.internString(sl.raw) }, - .array_literal => |al| self.constArrayLiteral(al.elements), - .struct_literal => |sl| self.constStructLiteral(&sl, var_ty), + .array_literal => |al| self.constArrayLiteral(al.elements, var_ty) orelse self.diagnoseNonConstGlobal(vd, v), + .struct_literal => |sl| self.constStructLiteral(&sl, var_ty) orelse self.diagnoseNonConstGlobal(vd, v), .identifier => |id| blk: { // A global initialized from a module constant copies the // constant's recorded value (typed module consts land in // `module_const_map` via `registerTypedModuleConst`, run in the // same pass-2 before this). if (self.program_index.module_const_map.get(id.name)) |ci| { - if (self.constExprValue(ci.value)) |cv| break :blk cv; + if (self.constExprValue(ci.value, var_ty)) |cv| break :blk cv; } if (self.diagnostics) |d| d.addFmt(.err, v.span, "global '{s}' must be initialized by a compile-time constant; '{s}' is not a usable constant here", .{ vd.name, id.name }); @@ -957,6 +957,16 @@ pub const Lowering = struct { }; } + /// A global aggregate initializer (array/struct literal) that does not fully + /// reduce to a compile-time constant is rejected loudly. Without this the + /// `null` payload would fall through to a zero-initialized global, silently + /// dropping the declared fields (issues 0071/0072/0080). + fn diagnoseNonConstGlobal(self: *Lowering, vd: *const ast.VarDecl, v: *const Node) ?inst_mod.ConstantValue { + if (self.diagnostics) |d| + d.addFmt(.err, v.span, "global '{s}' must be initialized by a compile-time constant", .{vd.name}); + return null; + } + /// Resolve identifier-RHS type aliases whose target is declared LATER in the /// file. The forward scan above only registers an alias (`A :: B`) when `B` /// is already in `type_alias_map` / the `TypeTable`; a forward target isn't @@ -993,19 +1003,30 @@ pub const Lowering = struct { } } - /// Try to convert an array literal's elements into a compile-time ConstantValue.aggregate. - /// Returns null if any element is not a compile-time constant. - fn constArrayLiteral(self: *Lowering, elements: []const *const Node) ?inst_mod.ConstantValue { + /// Try to convert an array literal's elements into a compile-time + /// ConstantValue.aggregate. `array_ty` is the array's resolved TypeId; its + /// element type drives type-aware serialization of struct-literal and + /// nested-array elements. Returns null if `array_ty` is not an array type or + /// any element is not a compile-time constant. + fn constArrayLiteral(self: *Lowering, elements: []const *const Node, array_ty: TypeId) ?inst_mod.ConstantValue { + if (array_ty.isBuiltin()) return null; + const elem_ty: TypeId = switch (self.module.types.get(array_ty)) { + .array => |a| a.element, + else => return null, + }; const vals = self.alloc.alloc(inst_mod.ConstantValue, elements.len) catch return null; for (elements, 0..) |elem, i| { - vals[i] = self.constExprValue(elem) orelse return null; + vals[i] = self.constExprValue(elem, elem_ty) orelse return null; } return .{ .aggregate = vals }; } /// Try to convert a single AST expression into a compile-time ConstantValue. - /// Returns null if the expression is not constant-foldable here. - fn constExprValue(self: *Lowering, expr: *const Node) ?inst_mod.ConstantValue { + /// `expected_ty` is the destination element/field type — it lets aggregate + /// leaves (struct literals, nested arrays) serialize with the correct shape + /// rather than collapsing to null (issue 0080). Returns null if the + /// expression is not constant-foldable here. + fn constExprValue(self: *Lowering, expr: *const Node, expected_ty: TypeId) ?inst_mod.ConstantValue { return switch (expr.data) { .int_literal => |il| .{ .int = il.value }, .bool_literal => |bl| .{ .boolean = bl.value }, @@ -1020,7 +1041,8 @@ pub const Lowering = struct { }, else => null, }, - .array_literal => |al| self.constArrayLiteral(al.elements), + .array_literal => |al| self.constArrayLiteral(al.elements, expected_ty), + .struct_literal => |sl| self.constStructLiteral(&sl, expected_ty), else => null, }; } @@ -1055,7 +1077,7 @@ pub const Lowering = struct { break :blk null; }; if (init_expr) |e| { - vals[fi] = self.constExprValue(e) orelse return null; + vals[fi] = self.constExprValue(e, sf.ty) orelse return null; } else { vals[fi] = .zeroinit; }