From 2f0905b40723a1bc5e5730f5ed1c0373d4c90171 Mon Sep 17 00:00:00 2001 From: agra Date: Tue, 16 Jun 2026 22:24:31 +0300 Subject: [PATCH] fix(0139): reject by-value self-referential types loudly (was a segfault) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A nominal aggregate that contains itself (or a mutual peer) BY VALUE has no finite layout and infinite-recursed typeSizeBytes into a stack overflow — for SOURCE enums/structs as well as comptime-constructed types. New `checkInfiniteSize` pass (lower/decl.zig, Pass 1g — after type registration, before body lowering): walks the by-VALUE containment graph (pointer/slice/optional payloads break the cycle, so `*Self` stays valid); on a back-edge it emits a loud diagnostic — "type 'X' is infinitely sized (it contains itself by value); use a pointer ('*X') to break the cycle" — and poisons the offending field to `.unresolved` so sizing can't recurse before the build halts on the error. Covers source + declare/define types, direct + mutual recursion. examples/1178 locks the diagnostic; issue 0139 marked RESOLVED. This also completes METATYPE PLAN F5's by-value-self-reference rejection. Full suite green (675). --- ...iagnostics-infinite-size-self-reference.sx | 16 +++ ...gnostics-infinite-size-self-reference.exit | 1 + ...ostics-infinite-size-self-reference.stderr | 1 + ...ostics-infinite-size-self-reference.stdout | 1 + .../0139-byvalue-self-reference-segfault.md | 12 ++ src/ir/lower.zig | 4 + src/ir/lower/decl.zig | 103 ++++++++++++++++++ 7 files changed, 138 insertions(+) create mode 100644 examples/1178-diagnostics-infinite-size-self-reference.sx create mode 100644 examples/expected/1178-diagnostics-infinite-size-self-reference.exit create mode 100644 examples/expected/1178-diagnostics-infinite-size-self-reference.stderr create mode 100644 examples/expected/1178-diagnostics-infinite-size-self-reference.stdout diff --git a/examples/1178-diagnostics-infinite-size-self-reference.sx b/examples/1178-diagnostics-infinite-size-self-reference.sx new file mode 100644 index 00000000..8b98e2e6 --- /dev/null +++ b/examples/1178-diagnostics-infinite-size-self-reference.sx @@ -0,0 +1,16 @@ +// Diagnostic: a type that contains ITSELF by value has no finite size and must +// be rejected loudly (not infinite-loop the size computation into a crash). A +// pointer payload (`*Tree`) would break the cycle and is the fix the message +// suggests. Covers both source decls and comptime-constructed types — this is +// the source form (regression for issue 0139). +#import "modules/std.sx"; + +Tree :: enum { + node: Tree; // by-VALUE self-reference → infinitely sized + leaf; +} + +main :: () -> i32 { + t : Tree = .leaf; + return 0; +} diff --git a/examples/expected/1178-diagnostics-infinite-size-self-reference.exit b/examples/expected/1178-diagnostics-infinite-size-self-reference.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/expected/1178-diagnostics-infinite-size-self-reference.exit @@ -0,0 +1 @@ +1 diff --git a/examples/expected/1178-diagnostics-infinite-size-self-reference.stderr b/examples/expected/1178-diagnostics-infinite-size-self-reference.stderr new file mode 100644 index 00000000..ceba6432 --- /dev/null +++ b/examples/expected/1178-diagnostics-infinite-size-self-reference.stderr @@ -0,0 +1 @@ +error: type 'Tree' is infinitely sized (it contains itself by value); use a pointer ('*Tree') to break the cycle diff --git a/examples/expected/1178-diagnostics-infinite-size-self-reference.stdout b/examples/expected/1178-diagnostics-infinite-size-self-reference.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/expected/1178-diagnostics-infinite-size-self-reference.stdout @@ -0,0 +1 @@ + diff --git a/issues/0139-byvalue-self-reference-segfault.md b/issues/0139-byvalue-self-reference-segfault.md index 17fd0b10..080d0a72 100644 --- a/issues/0139-byvalue-self-reference-segfault.md +++ b/issues/0139-byvalue-self-reference-segfault.md @@ -1,5 +1,17 @@ # 0139 — by-value self-referential type segfaults (`typeSizeBytes` infinite recursion) +> **RESOLVED.** Root cause: `typeSizeBytes` (and the layout path) recursed into +> each by-value aggregate field with no cycle guard, so a by-value self/mutual +> reference looped to a stack overflow. Fix: a new `checkInfiniteSize` pass +> (`src/ir/lower/decl.zig`, Pass 1g — after type registration, before body +> lowering) walks the by-VALUE containment graph; on a back-edge it emits a loud +> diagnostic (`type 'X' is infinitely sized (it contains itself by value); use a +> pointer ('*X') to break the cycle`) and poisons the offending field to +> `.unresolved`, breaking the recursion before any sizing runs. A pointer / slice +> / optional payload breaks the cycle, so `*Self` recursion stays valid. Covers +> both source decls and comptime-constructed (`declare`/`define`) types. +> Regression test: `examples/1178-diagnostics-infinite-size-self-reference.sx`. + **Symptom** — a type whose field/variant payload is ITSELF *by value* (not behind a pointer) crashes the compiler with a stack-overflow segfault instead of a loud "infinite size" diagnostic. Observed: `Segmentation fault` inside diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 8575f63f..821686b9 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -1642,6 +1642,10 @@ pub const Lowering = struct { pub const ensureTerminator = lower_control_flow.ensureTerminator; // --- moved to lower/decl.zig (lower_decl) --- + pub const checkInfiniteSize = lower_decl.checkInfiniteSize; + pub const dfsByValueCycle = lower_decl.dfsByValueCycle; + pub const poisonAggregateField = lower_decl.poisonAggregateField; + pub const diagInfiniteSize = lower_decl.diagInfiniteSize; pub const SelectedFunc = lower_decl.SelectedFunc; pub const BareCallee = lower_decl.BareCallee; pub const VisibleStructAuthor = lower_decl.VisibleStructAuthor; diff --git a/src/ir/lower/decl.zig b/src/ir/lower/decl.zig index e9b345d5..3a4198d2 100644 --- a/src/ir/lower/decl.zig +++ b/src/ir/lower/decl.zig @@ -38,6 +38,103 @@ const topLevelTypeDecl = Lowering.topLevelTypeDecl; const isFloat = Lowering.isFloat; const isPackFn = Lowering.isPackFn; +/// Reject infinitely-sized types: a nominal aggregate (struct / enum-with-payload +/// / union) that contains ITSELF — or a mutual peer — BY VALUE has no finite +/// layout, and would otherwise infinite-recurse `typeSizeBytes` into a stack +/// overflow. Walk the by-VALUE containment graph (a pointer / slice / optional +/// payload is finite-size and breaks the cycle, so `*Self` recursion is fine); +/// on a back-edge, emit a loud diagnostic and POISON the offending field to +/// `.unresolved`, breaking the cycle so later sizing can't crash before the +/// build halts on the error. Covers both source decls and comptime-constructed +/// (`declare`/`define`) types. +pub fn checkInfiniteSize(self: *Lowering) void { + const n = self.module.types.infos.items.len; + if (n == 0) return; + const color = self.alloc.alloc(u8, n) catch return; // 0=white 1=gray 2=black + defer self.alloc.free(color); + @memset(color, 0); + var i: usize = 0; + while (i < n) : (i += 1) { + if (color[i] == 0) self.dfsByValueCycle(TypeId.fromIndex(@intCast(i)), color); + } +} + +pub fn dfsByValueCycle(self: *Lowering, tid: TypeId, color: []u8) void { + const idx = tid.index(); + if (idx >= color.len) return; + color[idx] = 1; // gray (on the current containment path) + if (byValueAggregateFields(&self.module.types, tid)) |fields| { + for (fields, 0..) |f, k| { + if (!isByValueAggregate(&self.module.types, f.ty)) continue; // pointer/slice/etc. break the cycle + const fidx = f.ty.index(); + if (fidx >= color.len) continue; + if (color[fidx] == 1) { + // Back-edge: `f.ty` is on the current path → infinitely sized. + self.diagInfiniteSize(f.ty); + self.poisonAggregateField(tid, k); + } else if (color[fidx] == 0) { + self.dfsByValueCycle(f.ty, color); + } + } + } + color[idx] = 2; // black (fully explored) +} + +/// The by-value fields of a nominal aggregate, or null for any other type. +fn byValueAggregateFields(table: *const types.TypeTable, tid: TypeId) ?[]const types.TypeInfo.StructInfo.Field { + if (tid.isBuiltin()) return null; + return switch (table.get(tid)) { + .@"struct" => |s| s.fields, + .tagged_union => |u| u.fields, + .@"union" => |u| u.fields, + else => null, + }; +} + +/// True iff a field of type `ty` contributes its FULL size by value (a nominal +/// aggregate), so a cycle through it is infinite. Pointers / slices / optionals / +/// functions are finite-size and break the cycle. +fn isByValueAggregate(table: *const types.TypeTable, ty: TypeId) bool { + if (ty.isBuiltin()) return false; + return switch (table.get(ty)) { + .@"struct", .tagged_union, .@"union" => true, + else => false, + }; +} + +/// Break a by-value cycle: replace field `k` of nominal `tid` with `.unresolved`. +/// The name + nominal id are untouched, so the intern key is stable +/// (`updatePreservingKey`). The diagnostic is the user-facing signal; this just +/// stops `typeSizeBytes` recursing before the build halts on the error. +pub fn poisonAggregateField(self: *Lowering, tid: TypeId, k: usize) void { + const table = &self.module.types; + const info = table.get(tid); + var new_info = info; + const src_fields = switch (info) { + .@"struct" => |s| s.fields, + .tagged_union => |u| u.fields, + .@"union" => |u| u.fields, + else => return, + }; + const nf = self.alloc.dupe(types.TypeInfo.StructInfo.Field, src_fields) catch return; + if (k >= nf.len) return; + nf[k].ty = .unresolved; + switch (new_info) { + .@"struct" => |*s| s.fields = nf, + .tagged_union => |*u| u.fields = nf, + .@"union" => |*u| u.fields = nf, + else => return, + } + table.updatePreservingKey(tid, new_info); +} + +pub fn diagInfiniteSize(self: *Lowering, ty: TypeId) void { + if (self.diagnostics) |d| { + const nm = self.module.types.typeName(ty); + d.addFmt(.err, null, "type '{s}' is infinitely sized (it contains itself by value); use a pointer ('*{s}') to break the cycle", .{ nm, nm }); + } +} + /// Names that must keep external LLVM linkage because the OS loader (not /// sx code) is the caller. Without this they'd default to internal and /// either DCE away or stay hidden from the dynamic symbol table. @@ -116,6 +213,12 @@ pub fn lowerRoot(self: *Lowering, root: *const Node) void { }; checker.run(decls); } + // Pass 1g: reject infinitely-sized types — a nominal aggregate that contains + // ITSELF (or a mutual peer) BY VALUE has no finite layout and would otherwise + // infinite-loop `typeSizeBytes` into a stack overflow during body lowering. + // Runs after every type is registered (source AND comptime-constructed) and + // before body lowering, which is the first consumer of type sizes. + self.checkInfiniteSize(); // Pass 2: lower main (and comptime side-effects) self.lowerMainAndComptime(decls); // Pass 3: lower deferred functions (any_to_string etc.) now that all types are registered