From 8ac6c573e8626c78ce2ea2d7f59becf461def029 Mon Sep 17 00:00:00 2001 From: agra Date: Fri, 26 Jun 2026 12:28:09 +0300 Subject: [PATCH] fix: comptime field reflection on tuples/arrays/vectors (issue 0195) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `field_count` / `field_name` were broken on every non-struct/enum aggregate: `field_count(Tuple(i64, bool))` silently returned 0 (a missing `.tuple` arm in the count switches), and `field_name(tuple/array/vector, i)` SEGFAULTED — the LLVM backend built a zero-length `[0 x string]` name array for those kinds while sizing the runtime GEP at the (often non-zero) member count, so the indexed load ran past the array. Root cause was three+ parallel switches that each had to know how to count an aggregate's members, and disagreed: `field_count` lowering and `memberCount` had struct/union/tagged_union/enum/array/vector but no `.tuple`; the backend's `field_name_get` build + GEP sizing had neither `.tuple` nor `.array`/`.vector`. Fix: - add the `.tuple` arm to `field_count` lowering (src/ir/lower/call.zig) and `TypeTable.memberCount` (src/ir/types.zig; this also backs the COMPILER-API `type_field_count` VM reader). - unify the LLVM backend onto the single source of truth: both `getOrBuildFieldNameArray` (reflection.zig) and `emitFieldNameGet`'s GEP sizing (ops.zig) now derive from `memberCount` / `memberName`, so the name-array length and the GEP array type can never diverge again — for any kind. A member with no name (positional-tuple / array / vector element) reflects as "" (one slot per member, always in-bounds); named-tuple elements recover their labels. The array/vector clone was surfaced by adversarial review of the tuple-only fix. Regression: examples/comptime/0646-comptime-field-reflect-tuple-array.sx exercises field_count/field_name/field_type over struct, enum, positional + named tuple, array, and vector. Full suite green (818/0). Unblocks the `race` synthesis, which must reflect a named tuple's labels + element types. --- ...0646-comptime-field-reflect-tuple-array.sx | 47 ++++++ ...46-comptime-field-reflect-tuple-array.exit | 1 + ...-comptime-field-reflect-tuple-array.stderr | 1 + ...-comptime-field-reflect-tuple-array.stdout | 6 + issues/0195-tuple-field-reflection-broken.md | 143 ++++++++++++++++++ src/backend/llvm/ops.zig | 15 +- src/backend/llvm/reflection.zig | 31 ++-- src/ir/lower/call.zig | 1 + src/ir/types.zig | 1 + 9 files changed, 220 insertions(+), 26 deletions(-) create mode 100644 examples/comptime/0646-comptime-field-reflect-tuple-array.sx create mode 100644 examples/comptime/expected/0646-comptime-field-reflect-tuple-array.exit create mode 100644 examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stderr create mode 100644 examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stdout create mode 100644 issues/0195-tuple-field-reflection-broken.md diff --git a/examples/comptime/0646-comptime-field-reflect-tuple-array.sx b/examples/comptime/0646-comptime-field-reflect-tuple-array.sx new file mode 100644 index 00000000..e0b63b9a --- /dev/null +++ b/examples/comptime/0646-comptime-field-reflect-tuple-array.sx @@ -0,0 +1,47 @@ +// Comptime field reflection (`field_count` / `field_name` / `field_type`) over +// ALL aggregate kinds — struct, enum, tuple (positional + named), array, vector. +// +// Regression (issue 0195): `field_count` / `field_name` were broken on tuples +// and arrays/vectors. `field_count` silently returned 0 (a missing `.tuple` arm +// in the count switches), and `field_name` SEGFAULTED — the LLVM backend built a +// zero-length name array for those kinds while sizing the GEP at the (sometimes +// non-zero) count, so `field_name(T, i)` indexed past a `[0 x string]` global. +// Fixed by driving BOTH the name-array build and the GEP sizing from the one +// source of truth (`memberCount`/`memberName`), so they can never diverge again. +// A member with no name (positional-tuple / array / vector element) reflects as +// the empty string "" — one slot per member, always in-bounds. +#import "modules/std.sx"; + +S :: struct { a: i64; b: bool; } +E :: enum { X; Y; Z; } + +main :: () -> i32 { + // struct: named fields + print("struct: fc={} fn=({},{}) ft0={}\n", + field_count(S), field_name(S, 0), field_name(S, 1), type_name(field_type(S, 0))); + + // enum: variant names + print("enum: fc={} fn=({},{},{})\n", + field_count(E), field_name(E, 0), field_name(E, 1), field_name(E, 2)); + + // positional tuple: element types, no names → "" + print("postuple: fc={} ft=({},{}) fn0=[{}]\n", + field_count(Tuple(i64, bool)), + type_name(field_type(Tuple(i64, bool), 0)), type_name(field_type(Tuple(i64, bool), 1)), + field_name(Tuple(i64, bool), 0)); + + // named tuple: element labels recovered + print("namtuple: fc={} fn=({},{})\n", + field_count(Tuple(a: i64, b: bool)), + field_name(Tuple(a: i64, b: bool), 0), field_name(Tuple(a: i64, b: bool), 1)); + + // array: length-many elements, type known, no names → "" + print("array: fc={} ft0={} fn0=[{}]\n", + field_count([4]i64), type_name(field_type([4]i64, 0)), field_name([4]i64, 0)); + + // vector: same shape as array + print("vector: fc={} ft0={} fn0=[{}]\n", + field_count(Vector(4, f32)), type_name(field_type(Vector(4, f32), 0)), field_name(Vector(4, f32), 0)); + + return 0; +} diff --git a/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.exit b/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.exit @@ -0,0 +1 @@ +0 diff --git a/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stderr b/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stderr @@ -0,0 +1 @@ + diff --git a/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stdout b/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stdout new file mode 100644 index 00000000..64af1fff --- /dev/null +++ b/examples/comptime/expected/0646-comptime-field-reflect-tuple-array.stdout @@ -0,0 +1,6 @@ +struct: fc=2 fn=(a,b) ft0=i64 +enum: fc=3 fn=(X,Y,Z) +postuple: fc=2 ft=(i64,bool) fn0=[] +namtuple: fc=2 fn=(a,b) +array: fc=4 ft0=i64 fn0=[] +vector: fc=4 ft0=f32 fn0=[] diff --git a/issues/0195-tuple-field-reflection-broken.md b/issues/0195-tuple-field-reflection-broken.md new file mode 100644 index 00000000..9d67585d --- /dev/null +++ b/issues/0195-tuple-field-reflection-broken.md @@ -0,0 +1,143 @@ +# Issue 0195 — `field_count` / `field_name` broken on tuple types (silent 0 + segfault) + +> **RESOLVED.** Fixed across both the lowering count switches and the LLVM backend. Root cause as +> diagnosed below: `field_count`/`memberCount` had no `.tuple` arm (silent 0), and the backend's +> `field_name_get` emission built a **zero-length** name array for any non-struct/enum kind while +> sizing its GEP at the real count → out-of-bounds GEP → segfault. +> +> **Scope broadened during adversarial review:** the same defect was live for `.array` / `.vector` +> (`field_count([4]i64)` returned 4 but `field_name([4]i64, 0)` segfaulted — an exact clone). Rather +> than patch each kind in each of the (then three) parallel count switches, the backend was unified to +> derive BOTH the name-array build (`getOrBuildFieldNameArray`, `src/backend/llvm/reflection.zig`) AND +> the GEP sizing (`emitFieldNameGet`, `src/backend/llvm/ops.zig`) from the **single source of truth** +> `TypeTable.memberCount` / `memberName` — so the array length and the GEP type can never disagree +> again, for any kind. Members with no name (positional-tuple / array / vector elements) reflect as +> `""` (one slot per member, always in-bounds); named-tuple elements recover their labels. +> +> **Fix sites:** `src/ir/lower/call.zig` (`field_count` `.tuple` arm) · `src/ir/types.zig` +> (`memberCount` `.tuple` arm) · `src/backend/llvm/reflection.zig` + `src/backend/llvm/ops.zig` +> (unified to `memberCount`/`memberName`). The COMPILER-API VM readers (`type_field_count` / +> `type_field_name`, `src/ir/comptime_vm.zig`) ride the same `memberCount`/`memberName` and now report +> tuples correctly (positional `type_field_name` still fails loud via `failMsg`, not a crash). +> Delivered via the worker-fix override; adversarially reviewed (the review surfaced the array/vector +> clone). Regression: `examples/comptime/0646-comptime-field-reflect-tuple-array.sx` (struct / enum / +> positional + named tuple / array / vector). Full suite green. +> +> Original writeup below. + +--- + +Status: **(historical — see RESOLVED banner).** Hit while building `race` (the A1 async deliverable), +whose comptime tuple→tagged-union synthesis must reflect the input named tuple's labels + element +types. The reflection builtins are inconsistent across tuple types: `field_type` works, but +`field_count` silently returns 0 and `field_name` segfaults. + +## Symptom + +The comptime reflection `#builtin`s (`field_count` / `field_name` / `field_type`, declared in +`library/modules/std/core.sx`) behave correctly on structs/enums but are broken on **tuple** types — +even though `field_type`'s own doc (meta.sx) says it returns "the i-th field / variant-payload / +**element** type", i.e. tuples are meant to be covered: + +| builtin | on `struct {a:i64; b:bool}` | on `Tuple(i64, bool)` | expected on tuple | +|---|---|---|---| +| `field_count(T)` | `2` ✓ | **`0`** ✗ (silent wrong default) | `2` | +| `field_type(T, i)` | `i64`/`bool` ✓ | `i64`/`bool` ✓ | (correct) | +| `field_name(T, i)` | `a`/`b` ✓ | **SEGFAULT** ✗ | the label for a named tuple (`a`/`b`), or empty/`null` for a positional tuple | + +`field_count` returning 0 is the classic forbidden silent-default (CLAUDE.md "Silent unimplemented +arms"): callers that trust the count then index out of range — which is almost certainly why +`field_name(tuple, 0)` segfaults (it is reached with a count the caller believes is 0, or the +field-name path itself lacks the tuple case). + +## Root cause (located) + +`field_type` works because `TypeTable.memberType` **has** a `.tuple` arm +(`src/ir/types.zig:585` — `.tuple => |t| if (i < t.fields.len) t.fields[i] else null`). + +`field_count` is broken because its lowering has a hardcoded switch with **`else => 0`** and **no +`.tuple` arm**: + +```zig +// src/ir/lower/call.zig:2187-2200 (field_count(T) → const_int(N)) +const count: i64 = switch (info) { + .@"struct" => |s| @intCast(s.fields.len), + .@"union" => |u| @intCast(u.fields.len), + .tagged_union => |u| @intCast(u.fields.len), + .@"enum" => |e| @intCast(e.variants.len), + .array => |a| @intCast(a.length), + .vector => |v| @intCast(v.length), + else => 0, // ← tuple falls here → silently 0 +}; +``` + +`TypeTable.memberCount` (`src/ir/types.zig:525-536`) has the **same gap** — it lists struct / union / +tagged_union / enum / array / vector then `else => null`, with no `.tuple` arm. (`memberCount` backs +the `abi(.compiler)` `type_field_count` VM reader, so the COMPILER-API reflection path is silently +wrong on tuples too, not just the `#builtin`.) + +`field_name` lowering is at `src/ir/lower/call.zig:2299` (emits a `field_name_get` instruction). +`memberName` (`src/ir/types.zig:561-569`) DOES have a `.tuple` arm (returns `t.names[i]` if the tuple +is named, else null), so the segfault is in the `field_name_get` runtime/emit path for tuples (or a +downstream consequence of the bogus count) — to be confirmed during the fix. + +## Reproduction + +```sx +#import "modules/std.sx"; + +main :: () -> i32 { + // tuple field_count silently returns 0 (should be 2): + print("tuple field_count = {}\n", field_count(Tuple(i64, bool))); // prints 0 + + // tuple field_type works fine: + print("tuple field_type 0 = {}\n", type_name(field_type(Tuple(i64, bool), 0))); // i64 + + // tuple field_name SEGFAULTS (comment the line above out is not needed; this alone crashes): + print("tuple field_name 0 = {}\n", field_name(Tuple(a: i64, b: bool), 0)); // SIGSEGV + + return 0; +} +``` + +Baseline (works): the same three builtins on `S :: struct { a: i64; b: bool; }` print `2`, `a`/`b`, +`i64`/`bool` correctly. `type_info(Tuple(...))` also already reflects a tuple correctly (see +`examples/comptime/0623-comptime-metatype-tuple.sx`), so the type table fully knows the tuple's +elements — only `field_count` / `field_name` drop the tuple case. + +## Investigation prompt (ready to paste) + +> The comptime reflection builtins `field_count` / `field_name` are broken on tuple types: +> `field_count(Tuple(i64, bool))` returns 0 instead of 2, and `field_name(Tuple(a: i64, b: bool), 0)` +> segfaults. `field_type` already works on tuples. Make `field_count` / `field_name` cover tuples the +> same way `field_type` does, including named-tuple labels. +> +> Fixes, in order: +> 1. **`src/ir/lower/call.zig:2187-2200`** (`field_count` lowering): the `switch` over the type info +> has `else => 0` and no `.tuple` arm. Add `.tuple => |t| @intCast(t.fields.len)`. (Replacing the +> `else => 0` silent default with a loud `else => @panic`/diagnostic for genuinely-unsupported kinds +> would also surface the next such gap, per CLAUDE.md's anti-silent-default rule.) +> 2. **`src/ir/types.zig:525-536`** (`TypeTable.memberCount`): same missing `.tuple` arm — add +> `.tuple => |t| @intCast(t.fields.len)`. This backs `type_field_count` (the COMPILER-API VM +> reader), so it is silently 0 on tuples too. Verify with a comptime `type_field_count` probe. +> 3. **`field_name` segfault**: `src/ir/lower/call.zig:2299` emits a `field_name_get` instruction; +> `TypeTable.memberName` (`src/ir/types.zig:561-569`) already handles tuples (named → label, else +> null). Find where the `field_name_get` path for a tuple faults — likely it indexes assuming a +> struct/enum layout, or is reached with the bogus 0 count from bug (1). A positional tuple has no +> names → decide the contract (return empty string `""`? a diagnostic?) and make it not crash. +> A named tuple must return the label (`a`/`b`). +> +> Verification: the reproduction above prints `tuple field_count = 2`, `tuple field_type 0 = i64`, +> `tuple field_name 0 = a` (named) — no segfault. Add a regression example under +> `examples/comptime/` exercising `field_count` / `field_name` / `field_type` on both a positional and +> a named tuple. Then the blocked `race` synthesis (reflect a named tuple of task handles → mint a +> tagged-union with the tuple's labels as variant names) can proceed. + +## Why this blocks `race` + +`race((a: fa, b: fb))` must, at comptime, read the input named tuple's field **labels** (`a`, `b`) to +name the synthesized `RaceResult` union's variants, and each element **type** to set the variant +payloads. `field_type` already gives the types, but without a working `field_count` (how many arms) +and `field_name` (the labels) the named-tuple synthesis cannot be written. The type-construction side +(`declare`/`define`/`make_enum`) and struct/enum reflection are all proven working +(`examples/comptime/0619-0623`); tuple field reflection is the one missing piece. diff --git a/src/backend/llvm/ops.zig b/src/backend/llvm/ops.zig index 243dd9e4..0a6ea658 100644 --- a/src/backend/llvm/ops.zig +++ b/src/backend/llvm/ops.zig @@ -2477,15 +2477,12 @@ pub const Ops = struct { const global = self.e.reflection().getOrBuildFieldNameArray(fr.struct_type); const idx = self.e.resolveRef(fr.index); const string_ty = self.e.getStringStructType(); - // Get struct field count for array type - const field_info = self.e.ir_mod.types.get(fr.struct_type); - const field_count: u32 = switch (field_info) { - .@"struct" => |s| @intCast(s.fields.len), - .@"union" => |u| @intCast(u.fields.len), - .tagged_union => |u| @intCast(u.fields.len), - .@"enum" => |e| @intCast(e.variants.len), - else => 0, - }; + // Size the GEP's array type from the SAME single source of truth + // (`memberCount`) that `getOrBuildFieldNameArray` uses to build the name + // array, so the two can never disagree (a mismatch was issue 0195: the + // array was built zero-length for tuples/arrays while this count said N → + // an out-of-bounds GEP → segfault). + const field_count: u32 = @intCast(self.e.ir_mod.types.memberCount(fr.struct_type) orelse 0); const array_ty = c.LLVMArrayType(string_ty, field_count); const zero = c.LLVMConstInt(self.e.cached_i64, 0, 0); var indices = [2]c.LLVMValueRef{ zero, idx }; diff --git a/src/backend/llvm/reflection.zig b/src/backend/llvm/reflection.zig index cb08d21c..560b204c 100644 --- a/src/backend/llvm/reflection.zig +++ b/src/backend/llvm/reflection.zig @@ -95,25 +95,22 @@ pub const Reflection = struct { pub fn getOrBuildFieldNameArray(self: Reflection, struct_type: TypeId) c.LLVMValueRef { if (self.e.field_name_arrays.get(struct_type.index())) |g| return g; - const info = self.e.ir_mod.types.get(struct_type); - - // Collect name StringIds from struct fields, union fields, or enum variants + // Collect one name StringId per member, driven by the SINGLE source of + // truth `memberCount`/`memberName` (types.zig) — NOT a per-kind switch + // here. This guarantees the array length always matches `emitFieldNameGet`'s + // GEP sizing (which also derives from `memberCount`), so a kind covered by + // one but not the other can never reappear (that mismatch was issue 0195: + // tuples/arrays counted N members but built a zero-length name array → an + // out-of-bounds GEP → segfault). A member with no name (positional tuple + // element, array/vector element) yields `.empty` → "", keeping one slot + // per member so `field_name(T, i)` is always in-bounds. + const n_members: i64 = self.e.ir_mod.types.memberCount(struct_type) orelse 0; var name_ids = std.ArrayList(StringId).empty; defer name_ids.deinit(self.e.alloc); - switch (info) { - .@"struct" => |s| { - for (s.fields) |f| name_ids.append(self.e.alloc, f.name) catch unreachable; - }, - .@"union" => |u| { - for (u.fields) |f| name_ids.append(self.e.alloc, f.name) catch unreachable; - }, - .tagged_union => |u| { - for (u.fields) |f| name_ids.append(self.e.alloc, f.name) catch unreachable; - }, - .@"enum" => |e| { - for (e.variants) |v| name_ids.append(self.e.alloc, v) catch unreachable; - }, - else => {}, + var mi: i64 = 0; + while (mi < n_members) : (mi += 1) { + const nid: StringId = self.e.ir_mod.types.memberName(struct_type, mi) orelse .empty; + name_ids.append(self.e.alloc, nid) catch unreachable; } const string_ty = self.e.getStringStructType(); diff --git a/src/ir/lower/call.zig b/src/ir/lower/call.zig index 69ef0c7c..4eba58f9 100644 --- a/src/ir/lower/call.zig +++ b/src/ir/lower/call.zig @@ -2193,6 +2193,7 @@ pub fn tryLowerReflectionCall(self: *Lowering, name: []const u8, c: *const ast.C .@"union" => |u| @intCast(u.fields.len), .tagged_union => |u| @intCast(u.fields.len), .@"enum" => |e| @intCast(e.variants.len), + .tuple => |t| @intCast(t.fields.len), .array => |a| @intCast(a.length), .vector => |v| @intCast(v.length), else => 0, diff --git a/src/ir/types.zig b/src/ir/types.zig index d85f319e..8a66dba2 100644 --- a/src/ir/types.zig +++ b/src/ir/types.zig @@ -529,6 +529,7 @@ pub const TypeTable = struct { .@"union" => |u| @intCast(u.fields.len), .tagged_union => |u| @intCast(u.fields.len), .@"enum" => |e| @intCast(e.variants.len), + .tuple => |t| @intCast(t.fields.len), .array => |a| @intCast(a.length), .vector => |v| @intCast(v.length), else => null,