From b322dcfe615652190d512a20b607f0ccc1f905af Mon Sep 17 00:00:00 2001 From: agra Date: Sat, 27 Jun 2026 17:28:27 +0300 Subject: [PATCH] fix: type-safe stores + Any unbox/eq; finish multi-return deferrals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Type-checking gaps (segfault/corruption → compile errors): - 0197: reject a store into an annotated slot whose value has no modeled coercion AND a different byte width (a 16-byte string into a 4-byte i32 overran the slot and segfaulted). New checkAssignable / noneReinterpretIsUnsafe (coerce.zig, width via the LLVM-accurate typeSizeBytes) wired into every store site: var/const-decl, single + multi assignment (identifier/field/index/ element/deref), named-return defaults. Same-width reinterpretations (*T→[*]T, i64→isize, fn-ref) and explicit xx/cast stay allowed; cascades suppressed via externalErrorsExist. Examples 1205, 1206. - 0198: an implicit `Any → T` unbox is now a compile error (it blindly reinterpreted the boxed payload — silent garbage for a wrong scalar, a segfault for an aggregate). xx and compiler-generated match/pack unboxes are unaffected. Example 1207. - 0199: `Any == ` (one operand Any) aborted the LLVM verifier — the comparison arm now fires when either operand is Any, boxing the concrete side first. Example 0654. Multi-return deferrals (PLAN-MULTIRET #6 + named-order + D3 + generic): - Reorder named return elements by name instead of requiring slot order; error on unknown/duplicate/missing (value-only AND full-failable-tuple forms). Examples 0210, 0214. - Reject a bare-paren (A, B) multi-return signature in generic-arg position (return-position-only). Example 0215. - Multi-return closure types / lambda literals work via the reused tuple machinery (destructure, single-bind+field, lambda arg). Example 0216. - Generic multi-return: positional works (0217); 0200: the named-slot implicit-return form now works for generic free fns + struct methods — monomorphizeFunction now calls bindNamedReturnSlots. Example 0218. readme.md documents the annotated-store coercion rule; CHECKPOINT-MULTIRET.md updated. Full corpus green (850/0). --- current/CHECKPOINT-MULTIRET.md | 71 +++++++- .../comptime/0654-comptime-any-eq-concrete.sx | 27 +++ .../0654-comptime-any-eq-concrete.exit | 1 + .../0654-comptime-any-eq-concrete.stderr | 1 + .../0654-comptime-any-eq-concrete.stdout | 7 + ...iagnostics-annotated-init-type-mismatch.sx | 27 +++ .../1206-diagnostics-store-width-mismatch.sx | 34 ++++ ...diagnostics-any-implicit-unbox-rejected.sx | 22 +++ ...gnostics-annotated-init-type-mismatch.exit | 1 + ...ostics-annotated-init-type-mismatch.stderr | 17 ++ ...ostics-annotated-init-type-mismatch.stdout | 1 + ...1206-diagnostics-store-width-mismatch.exit | 1 + ...06-diagnostics-store-width-mismatch.stderr | 23 +++ ...06-diagnostics-store-width-mismatch.stdout | 1 + ...agnostics-any-implicit-unbox-rejected.exit | 1 + ...nostics-any-implicit-unbox-rejected.stderr | 11 ++ ...nostics-any-implicit-unbox-rejected.stdout | 1 + .../0210-types-multi-return-name-order.sx | 21 ++- .../0214-types-multi-return-name-invalid.sx | 17 ++ .../0215-types-multi-return-as-generic-arg.sx | 11 ++ .../types/0216-types-multi-return-closure.sx | 26 +++ .../types/0217-types-multi-return-generic.sx | 24 +++ .../0218-types-multi-return-generic-named.sx | 40 +++++ .../0210-types-multi-return-name-order.exit | 2 +- .../0210-types-multi-return-name-order.stderr | 10 -- .../0210-types-multi-return-name-order.stdout | 3 +- .../0214-types-multi-return-name-invalid.exit | 1 + ...214-types-multi-return-name-invalid.stderr | 11 ++ ...214-types-multi-return-name-invalid.stdout | 1 + ...215-types-multi-return-as-generic-arg.exit | 1 + ...5-types-multi-return-as-generic-arg.stderr | 5 + ...5-types-multi-return-as-generic-arg.stdout | 1 + .../0216-types-multi-return-closure.exit | 1 + .../0216-types-multi-return-closure.stderr | 1 + .../0216-types-multi-return-closure.stdout | 3 + .../0217-types-multi-return-generic.exit | 1 + .../0217-types-multi-return-generic.stderr | 1 + .../0217-types-multi-return-generic.stdout | 3 + ...0218-types-multi-return-generic-named.exit | 1 + ...18-types-multi-return-generic-named.stderr | 1 + ...18-types-multi-return-generic-named.stdout | 4 + ...tated-assignment-type-mismatch-no-check.md | 27 +++ issues/0198-unbox-any-no-tag-check.md | 98 +++++++++++ .../0199-any-eq-concrete-llvm-verify-fail.md | 61 +++++++ ...ed-generic-multi-return-implicit-return.md | 78 +++++++++ readme.md | 8 + src/ir/lower.zig | 10 ++ src/ir/lower/coerce.zig | 117 ++++++++++++- src/ir/lower/expr.zig | 30 ++-- src/ir/lower/generic.zig | 28 +++ src/ir/lower/stmt.zig | 162 ++++++++++++++++-- 51 files changed, 1000 insertions(+), 56 deletions(-) create mode 100644 examples/comptime/0654-comptime-any-eq-concrete.sx create mode 100644 examples/comptime/expected/0654-comptime-any-eq-concrete.exit create mode 100644 examples/comptime/expected/0654-comptime-any-eq-concrete.stderr create mode 100644 examples/comptime/expected/0654-comptime-any-eq-concrete.stdout create mode 100644 examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx create mode 100644 examples/diagnostics/1206-diagnostics-store-width-mismatch.sx create mode 100644 examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx create mode 100644 examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.exit create mode 100644 examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stderr create mode 100644 examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stdout create mode 100644 examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.exit create mode 100644 examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stderr create mode 100644 examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stdout create mode 100644 examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.exit create mode 100644 examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stderr create mode 100644 examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stdout create mode 100644 examples/types/0214-types-multi-return-name-invalid.sx create mode 100644 examples/types/0215-types-multi-return-as-generic-arg.sx create mode 100644 examples/types/0216-types-multi-return-closure.sx create mode 100644 examples/types/0217-types-multi-return-generic.sx create mode 100644 examples/types/0218-types-multi-return-generic-named.sx create mode 100644 examples/types/expected/0214-types-multi-return-name-invalid.exit create mode 100644 examples/types/expected/0214-types-multi-return-name-invalid.stderr create mode 100644 examples/types/expected/0214-types-multi-return-name-invalid.stdout create mode 100644 examples/types/expected/0215-types-multi-return-as-generic-arg.exit create mode 100644 examples/types/expected/0215-types-multi-return-as-generic-arg.stderr create mode 100644 examples/types/expected/0215-types-multi-return-as-generic-arg.stdout create mode 100644 examples/types/expected/0216-types-multi-return-closure.exit create mode 100644 examples/types/expected/0216-types-multi-return-closure.stderr create mode 100644 examples/types/expected/0216-types-multi-return-closure.stdout create mode 100644 examples/types/expected/0217-types-multi-return-generic.exit create mode 100644 examples/types/expected/0217-types-multi-return-generic.stderr create mode 100644 examples/types/expected/0217-types-multi-return-generic.stdout create mode 100644 examples/types/expected/0218-types-multi-return-generic-named.exit create mode 100644 examples/types/expected/0218-types-multi-return-generic-named.stderr create mode 100644 examples/types/expected/0218-types-multi-return-generic-named.stdout create mode 100644 issues/0198-unbox-any-no-tag-check.md create mode 100644 issues/0199-any-eq-concrete-llvm-verify-fail.md create mode 100644 issues/0200-named-generic-multi-return-implicit-return.md diff --git a/current/CHECKPOINT-MULTIRET.md b/current/CHECKPOINT-MULTIRET.md index 5212bb8f..b9d411de 100644 --- a/current/CHECKPOINT-MULTIRET.md +++ b/current/CHECKPOINT-MULTIRET.md @@ -76,20 +76,73 @@ An adversarial review found 8 issues; fixed the soundness + silent-wrong ones: bodies count; defer correctly does NOT, as it runs after the implicit return). ## Known limitations / next -- **#6 (design gap, NOT UB)**: a `ReturnTypeExpr` is still silently accepted in - struct-field / var-annotation / generic-arg / closure-RETURN positions (resolves - to a coherent tuple). Only the PARAM position is rejected. Rejecting the rest - needs checks at several value-resolution sites; deferred (no soundness impact). -- **Reordering named return elements by name** (vs requiring slot order) — future. -- **PRE-EXISTING**: annotated-assignment type mismatch (`x: i32 = "hi"`) segfaults - — a general type-checking gap surfaced by the review; may warrant an issue. -- Multi-return CLOSURE-TYPE values / lambda literals deferred (D3). +- ~~**#6 (design gap)**: `ReturnTypeExpr` silently accepted in non-return positions~~ + — **DONE** (2026-06-27): generic-type-arg position now rejected + (`rejectMultiReturnValueType` at both `instantiateGenericStruct` arg-resolution + sites, generic.zig). Param / field / variable already rejected. Type-alias + `T :: (A,B)` is value-parsed → already rejected. Closure-RETURN `(A,B)` is a + legitimate return position → see D3 below (works as a multi-return closure). + Lock: 0215 (negative generic-arg). +- ~~**Reordering named return elements by name** (vs requiring slot order)~~ — + **DONE** (2026-06-27): `reorderNamedReturn` (stmt.zig) permutes a fully-named + multi-return list to slot order by name (value-only AND full-failable-tuple + forms); errors on unknown / duplicate / missing-slot names; positional & mixed + lists pass through unchanged. `validateMultiReturn`'s old slot-order check was + removed. Adversarial review caught a silent mis-permute in the full-failable- + tuple named form (now reordered/validated, not positionally dropped). Lock: + 0210 (positive reorder, incl. failable) + 0214 (negative: unknown / duplicate). +- ~~**PRE-EXISTING**: annotated-assignment type mismatch (`x: i32 = "hi"`) segfaults~~ + — **RESOLVED** as issue 0197 (2026-06-27): width-mismatch guard + (`checkAssignable` / `noneReinterpretIsUnsafe`, coerce.zig) at every + annotated-slot store site; the named-return-default guard now shares it. Locked + by `examples/diagnostics/1205` + `1206`. +- ~~Multi-return CLOSURE-TYPE values / lambda literals deferred (D3).~~ — + **RESOLVED** (2026-06-27): they ALREADY WORK via the reused tuple machinery. A + `Closure() -> (A, B)` value's call result destructures (`a, b := cb()`), + single-binds + field-accesses (`c := cb(); c.0`), and a `() => { return v1, v2; }` + lambda literal satisfies a multi-return closure param — verified identical to + the function-decl surface. NO `ClosureInfo.multi_return` marker needed (the + destructure-only rule was reversed, so there's nothing extra to enforce). Lock: + 0216. +- **Generic multi-return (Task 2d): DONE.** POSITIONAL works — `(a: $T, b: $U) -> (T, U)` + (inferred) and `($T: Type, …) -> (T, U)` (explicit); lock 0217. NAMED-slot + implicit-return form now works too (issue **0200 RESOLVED** — + `monomorphizeFunction` now calls `bindNamedReturnSlots`; covers free fns + + generic struct methods, defaults, failable); lock 0218. - Docs: readme.md / specs.md not yet updated for multi-return (docs-track rule). ## Known issues -- (none yet) +- ~~**issue 0198**: implicit `Any → T` unbox unchecked (segfault / silent garbage)~~ + — **RESOLVED** (2026-06-27): implicit `Any → T` is now a compile error + (`coerceMode` `.unbox_any` arm, mode == .implicit); `xx` + match dispatch + unaffected. Locked by `examples/diagnostics/1207`. +- ~~**issue 0199**: `Any == ` aborts the LLVM verifier~~ — **RESOLVED** + (2026-06-27): the `Any`-shaped `==`/`!=` arm (expr.zig) now fires when EITHER + operand is `.any`, boxing the concrete side first. Lock 0654. +- ~~**issue 0200**: NAMED generic multi-return implicit-return "produces no value"~~ + — **RESOLVED** (2026-06-27): `monomorphizeFunction` now calls + `bindNamedReturnSlots` (it previously bound params but skipped named-return + slots). Covers generic free fns + struct methods, defaults, failable. Lock 0218. ## Log +- **2026-06-27 session** (handover: issue 0197 → finish multi-return → Io Phase 3): + - **issue 0197 RESOLVED** — width-mismatch guard at every annotated-slot store + site (var/const-decl, single + multi assignment for identifier/field/index/ + element/deref, named-return defaults). Examples 1205 + 1206. Adversarial review + caught & fixed: a bare-fn-ref false-positive (size-discriminator via + `typeSizeBytes`, not the wrong fn-ref typing) and an aggregate-overrun + false-negative (sx-padded `sizeOf` → LLVM-accurate `typeSizeBytes`); cascade + suppression via `externalErrorsExist` (guard tallies its own diagnostics). + - **issue 0198 RESOLVED** — implicit `Any → T` unbox is now a compile error + (reviewer-confirmed sound). Example 1207. **issue 0199 FILED** (Any==concrete + LLVM-verify abort, loud, open). + - **multi-return Task 2 DONE** (2a reorder 0210/0214; 2b reject in generic-arg + 0215; 2c D3 closures already work 0216; 2d positional generic works 0217 + + named-generic gap filed as 0200). Multi-return feature surface complete. + - **REMAINING** (next session): **Task 3 Io-unification Phase 3** (the + capture-typing blocker below + true cancellation — needs fresh context + both + macOS & aarch64-linux validation per PLAN-IO-UNIFY.md). (0198/0199/0200 all + resolved this session; no open multi-return/type-check issues remain.) - Pivoted here from the Io-unification Phase 3 (true cancellation), which is PAUSED at its blocker: capturing a failable closure into a nested closure loses its failability (`worker() catch` → operand type 'unresolved'; repro diff --git a/examples/comptime/0654-comptime-any-eq-concrete.sx b/examples/comptime/0654-comptime-any-eq-concrete.sx new file mode 100644 index 00000000..4905555f --- /dev/null +++ b/examples/comptime/0654-comptime-any-eq-concrete.sx @@ -0,0 +1,27 @@ +// Comparing an `Any` against a concrete value (a MIXED `Any == `, in +// either operand order) compares the boxed value words — the same value-identity +// the both-`Any` comparison uses. Boxing the concrete side first keeps the +// operands shape-compatible. +// +// Regression (issue 0199): a mixed `Any == ` fell through to a plain +// `icmp` on a 16-byte `{tag, value}` aggregate vs a scalar, aborting the LLVM +// verifier ("Both operands to ICmp are not of the same type"). The both-`Any` +// form already worked; this extends it to one-sided `Any` comparisons. + +#import "modules/std.sx"; + +main :: () -> i64 { + x : Any = 5; + print("{}\n", x == 5); // true + print("{}\n", x == 6); // false + print("{}\n", x != 6); // true + print("{}\n", 5 == x); // true (concrete on the left) + + b : Any = true; + print("{}\n", b == true); // true + print("{}\n", b == false); // false + + y : Any = 5; + print("{}\n", x == y); // true (both Any — unchanged) + return 0; +} diff --git a/examples/comptime/expected/0654-comptime-any-eq-concrete.exit b/examples/comptime/expected/0654-comptime-any-eq-concrete.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/comptime/expected/0654-comptime-any-eq-concrete.exit @@ -0,0 +1 @@ +0 diff --git a/examples/comptime/expected/0654-comptime-any-eq-concrete.stderr b/examples/comptime/expected/0654-comptime-any-eq-concrete.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/comptime/expected/0654-comptime-any-eq-concrete.stderr @@ -0,0 +1 @@ + diff --git a/examples/comptime/expected/0654-comptime-any-eq-concrete.stdout b/examples/comptime/expected/0654-comptime-any-eq-concrete.stdout new file mode 100644 index 00000000..68f2ff65 --- /dev/null +++ b/examples/comptime/expected/0654-comptime-any-eq-concrete.stdout @@ -0,0 +1,7 @@ +true +false +true +true +true +false +true diff --git a/examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx b/examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx new file mode 100644 index 00000000..149fb00c --- /dev/null +++ b/examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx @@ -0,0 +1,27 @@ +// Initializing (or reassigning) an explicitly-annotated slot with a value +// whose type has NO coercion to the annotation is a type error, diagnosed at +// lowering with a located message. +// +// Regression (issue 0197): `x : i32 = "hi"` was accepted with no diagnostic — +// the incompatible value passed through a `.none` coercion plan UNCHANGED, so a +// 16-byte `string` was stored into a 4-byte `i32` slot, bit-mangling the slot +// and SIGSEGV'ing at run time (`sx ir` lowered fine; only the run crashed). The +// guard (`checkAssignable`) now rejects an un-coercible initializer at every +// store-into-annotated-slot site — var-decl, body-local const-decl, and +// reassignment — emitting a diagnostic and aborting the build cleanly (exit 1). +// +// The explicit `xx` / `cast(T)` escape hatch is unaffected: a deliberate +// reinterpretation (pointer↔int, etc.) still passes through. + +#import "modules/std.sx"; + +main :: () -> i64 { + x : i32 = "hi"; // error: cannot initialize 'x' (string ↛ i32) + + y : i32 = 0; + y = "nope"; // error: cannot reassign 'y' (string ↛ i32) + + C : i32 : "also"; // error: cannot initialize 'C' (string ↛ i32) + + return 0; +} diff --git a/examples/diagnostics/1206-diagnostics-store-width-mismatch.sx b/examples/diagnostics/1206-diagnostics-store-width-mismatch.sx new file mode 100644 index 00000000..2875c3b1 --- /dev/null +++ b/examples/diagnostics/1206-diagnostics-store-width-mismatch.sx @@ -0,0 +1,34 @@ +// A store into ANY annotated slot whose value type has no coercion to the slot +// AND a different byte width is a type error — the raw `.none` passthrough would +// overrun the slot and corrupt adjacent memory (issue 0197). The guard covers +// every store site, not just plain var-decls: a struct field, an array element, +// a pointer deref, and a multi-assignment target. +// +// Regression (issue 0197): `struct{a:i32; b:i32}` is 8 bytes, but a 16-byte +// `string` stored raw into one of its fields (or into an i32 array element, or +// through an `*i32`) overran the slot and SIGSEGV'd / clobbered neighbors. The +// discriminator is BYTE WIDTH (via `typeSizeBytes`), so a same-width +// reinterpretation (`*T → [*]T`, a bare fn-ref into a function slot) still +// passes — only a genuine width mismatch is rejected. + +#import "modules/std.sx"; + +S :: struct { a: i32; b: i32; } + +main :: () -> i64 { + s : S = ---; + s.a = 1; s.b = 2; + s.a = "field"; // error: struct field, string ↛ i32 + + arr := i32.[1, 2, 3]; + arr[0] = "elem"; // error: array element, string ↛ i32 + + n : i32 = 0; + p : *i32 = @n; + p.* = "deref"; // error: pointer deref, string ↛ i32 + + u : i32 = 0; v : i32 = 0; + u, v = "multi", 9; // error: multi-assign target, string ↛ i32 + + return 0; +} diff --git a/examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx b/examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx new file mode 100644 index 00000000..7359b80b --- /dev/null +++ b/examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx @@ -0,0 +1,22 @@ +// An `Any` does not IMPLICITLY unbox to a concrete type. A blind unbox +// reinterprets the boxed payload word as the target with NO runtime tag check, +// so a wrong target silently yields garbage (a scalar) or dereferences the +// payload as a pointer and segfaults (an aggregate). sx rejects the implicit +// unbox at compile time — like the no-implicit-optional-unwrap rule — and +// directs the user to `match` on the value's type or an explicit `xx`. +// +// Regression (issue 0198): `s : S = some_any` segfaulted and `f : f64 = some_any` +// silently produced 0.0; both are now compile errors. The fix is in `coerceMode` +// (`.unbox_any` arm, mode == .implicit). The `xx` escape hatch and the +// compiler-generated type-dispatch / pack-extraction unboxes are unaffected. + +#import "modules/std.sx"; + +S :: struct { a: i64; } + +main :: () -> i64 { + x : Any = 5; + n : i64 = x; // error: 'Any' does not implicitly unbox to 'i64' + s : S = x; // error: 'Any' does not implicitly unbox to 'S' + return 0; +} diff --git a/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.exit b/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.exit @@ -0,0 +1 @@ +1 diff --git a/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stderr b/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stderr new file mode 100644 index 00000000..240a025a --- /dev/null +++ b/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stderr @@ -0,0 +1,17 @@ +error: cannot initialize 'x' of type 'i32' with a value of type 'string' + --> examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx:19:15 + | +19 | x : i32 = "hi"; // error: cannot initialize 'x' (string ↛ i32) + | ^^^^ + +error: cannot reassign 'y' of type 'i32' with a value of type 'string' + --> examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx:22:9 + | +22 | y = "nope"; // error: cannot reassign 'y' (string ↛ i32) + | ^^^^^^ + +error: cannot initialize 'C' of type 'i32' with a value of type 'string' + --> examples/diagnostics/1205-diagnostics-annotated-init-type-mismatch.sx:24:15 + | +24 | C : i32 : "also"; // error: cannot initialize 'C' (string ↛ i32) + | ^^^^^^ diff --git a/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stdout b/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/diagnostics/expected/1205-diagnostics-annotated-init-type-mismatch.stdout @@ -0,0 +1 @@ + diff --git a/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.exit b/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.exit @@ -0,0 +1 @@ +1 diff --git a/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stderr b/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stderr new file mode 100644 index 00000000..dcb996f3 --- /dev/null +++ b/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stderr @@ -0,0 +1,23 @@ +error: cannot assign 'a' of type 'i32' with a value of type 'string' + --> examples/diagnostics/1206-diagnostics-store-width-mismatch.sx:21:11 + | +21 | s.a = "field"; // error: struct field, string ↛ i32 + | ^^^^^^^ + +error: cannot assign 'element' of type 'i64' with a value of type 'string' + --> examples/diagnostics/1206-diagnostics-store-width-mismatch.sx:24:14 + | +24 | arr[0] = "elem"; // error: array element, string ↛ i32 + | ^^^^^^ + +error: cannot assign 'target' of type 'i32' with a value of type 'string' + --> examples/diagnostics/1206-diagnostics-store-width-mismatch.sx:28:11 + | +28 | p.* = "deref"; // error: pointer deref, string ↛ i32 + | ^^^^^^^ + +error: cannot assign 'u' of type 'i32' with a value of type 'string' + --> examples/diagnostics/1206-diagnostics-store-width-mismatch.sx:31:12 + | +31 | u, v = "multi", 9; // error: multi-assign target, string ↛ i32 + | ^^^^^^^ diff --git a/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stdout b/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/diagnostics/expected/1206-diagnostics-store-width-mismatch.stdout @@ -0,0 +1 @@ + diff --git a/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.exit b/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.exit @@ -0,0 +1 @@ +1 diff --git a/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stderr b/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stderr new file mode 100644 index 00000000..0f16356b --- /dev/null +++ b/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stderr @@ -0,0 +1,11 @@ +error: an 'Any' does not implicitly unbox to 'i64': the boxed type is not checked, so a wrong target reinterprets the payload (a wrong scalar silently yields garbage; an aggregate dereferences it and crashes). Dispatch on the value's type with `match`, or force it with `xx` if you know the boxed type. + --> examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx:19:5 + | +19 | n : i64 = x; // error: 'Any' does not implicitly unbox to 'i64' + | ^^^^^^^^^^^^ + +error: an 'Any' does not implicitly unbox to 'S': the boxed type is not checked, so a wrong target reinterprets the payload (a wrong scalar silently yields garbage; an aggregate dereferences it and crashes). Dispatch on the value's type with `match`, or force it with `xx` if you know the boxed type. + --> examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx:20:5 + | +20 | s : S = x; // error: 'Any' does not implicitly unbox to 'S' + | ^^^^^^^^^^ diff --git a/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stdout b/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/diagnostics/expected/1207-diagnostics-any-implicit-unbox-rejected.stdout @@ -0,0 +1 @@ + diff --git a/examples/types/0210-types-multi-return-name-order.sx b/examples/types/0210-types-multi-return-name-order.sx index 648b0ffc..20659e1f 100644 --- a/examples/types/0210-types-multi-return-name-order.sx +++ b/examples/types/0210-types-multi-return-name-order.sx @@ -1,8 +1,19 @@ -// Negative: named return elements must be given in SLOT ORDER. A mismatched -// name would otherwise be matched positionally and silently produce the wrong -// result, so it is rejected. (Here `b` is given where slot `a` is expected.) +// Named return elements may be given in ANY order — they are matched to the +// return slots BY NAME and permuted to slot order before lowering. Here `b` is +// given before `a`; the result still destructures as (a, b). #import "modules/std.sx"; pair :: (n: i32) -> (a: i32, b: i32) { - return b = n, a = n + 1; // out of slot order + return b = n, a = n + 1; // out of slot order — reordered by name +} +// Works through the value-carrying-failable channel too (error slot is implicit). +ErrX :: error { Bad } +fpair :: (n: i32) -> (a: i32, b: i32, !) { + return b = n, a = n + 1; +} +main :: () -> i64 { + x, y := pair(5); // a = 6, b = 5 + print("{} {}\n", x, y); + r := fpair(10) catch { return 9; }; + print("{} {}\n", r.a, r.b); // a = 11, b = 10 + return 0; } -main :: () -> i64 { x, y := pair(5); print("{} {}\n", x, y); return 0; } diff --git a/examples/types/0214-types-multi-return-name-invalid.sx b/examples/types/0214-types-multi-return-name-invalid.sx new file mode 100644 index 00000000..a6f42eef --- /dev/null +++ b/examples/types/0214-types-multi-return-name-invalid.sx @@ -0,0 +1,17 @@ +// Negative: named return elements are matched to slots BY NAME (any order), so +// a name that matches NO slot, or a slot named MORE THAN ONCE, is a hard error +// (rather than a silent positional mismatch). Missing/extra arity is caught +// separately. Here `c` names no slot and `a` would be duplicated. +#import "modules/std.sx"; +bad_unknown :: (n: i32) -> (a: i32, b: i32) { + return a = n, c = n + 1; // error: 'c' names no return slot +} +bad_dup :: (n: i32) -> (a: i32, b: i32) { + return a = n, a = n + 1; // error: 'a' given more than once +} +main :: () -> i64 { + x, y := bad_unknown(5); + p, q := bad_dup(5); + print("{} {} {} {}\n", x, y, p, q); + return 0; +} diff --git a/examples/types/0215-types-multi-return-as-generic-arg.sx b/examples/types/0215-types-multi-return-as-generic-arg.sx new file mode 100644 index 00000000..42d82595 --- /dev/null +++ b/examples/types/0215-types-multi-return-as-generic-arg.sx @@ -0,0 +1,11 @@ +// Negative: a bare-paren `(A, B)` is a MULTI-RETURN signature — valid ONLY as a +// function/closure return type, never as a value type. As a generic type +// argument it is rejected (a tuple-valued argument uses `Tuple(A, B)`). This +// completes the return-position-only gating (param / field / variable positions +// were already rejected; 0213 covers those). +#import "modules/std.sx"; + +main :: () -> i64 { + xs : List((i32, bool)) = ---; // error: multi-return signature, not a type + return 0; +} diff --git a/examples/types/0216-types-multi-return-closure.sx b/examples/types/0216-types-multi-return-closure.sx new file mode 100644 index 00000000..c68a8cdd --- /dev/null +++ b/examples/types/0216-types-multi-return-closure.sx @@ -0,0 +1,26 @@ +// Multi-return CLOSURE types and lambda literals work via the same tuple +// machinery as function multi-returns (D3): a `Closure() -> (A, B)` value's call +// result destructures (`a, b := cb()`), single-binds with field access +// (`c := cb(); c.0`), and a `() => { return v1, v2; }` lambda literal satisfies a +// multi-return closure parameter. No dedicated ClosureInfo marker is needed — +// the return slots ride as the reused `.tuple` TypeId, consistent with the +// function-decl multi-return surface. +#import "modules/std.sx"; + +apply :: (cb: Closure() -> (i32, bool)) -> i32 { + a, b := cb(); // destructure a multi-return closure result + return if b { a } else { 0 }; +} + +main :: () -> i64 { + cb : Closure() -> (i32, bool) = () => { return 7, true; }; + x, y := cb(); + print("{} {}\n", x, y); // 7 true + + c := cb(); // single-bind + positional field access + print("{} {}\n", c.0, c.1); // 7 true + + r := apply(() => { return 9, true; }); // lambda literal as the closure arg + print("{}\n", r); // 9 + return 0; +} diff --git a/examples/types/0217-types-multi-return-generic.sx b/examples/types/0217-types-multi-return-generic.sx new file mode 100644 index 00000000..7c9525df --- /dev/null +++ b/examples/types/0217-types-multi-return-generic.sx @@ -0,0 +1,24 @@ +// Generic multi-return: a positional multi-return whose slots are generic type +// params resolves with the inferred (or explicit) bindings — `-> (T, U)` with +// `a: $T, b: $U` infers from the args; an explicit `$T: Type` form also works. +// (The NAMED-slot implicit-return form with generics is a separate gap — issue +// 0200; the positional explicit-`return` form here is the supported surface.) +#import "modules/std.sx"; + +// inferred type params from the value args +pair :: (a: $T, b: $U) -> (T, U) { return a, b; } + +// explicit comptime type params +mk :: ($T: Type, $U: Type, a: T, b: U) -> (T, U) { return a, b; } + +main :: () -> i64 { + x, y := pair(7, true); + print("{} {}\n", x, y); // 7 true + + s, n := pair("hi", 42); + print("{} {}\n", s, n); // hi 42 + + p, q := mk(i32, bool, 3, false); + print("{} {}\n", p, q); // 3 false + return 0; +} diff --git a/examples/types/0218-types-multi-return-generic-named.sx b/examples/types/0218-types-multi-return-generic-named.sx new file mode 100644 index 00000000..ceacccfc --- /dev/null +++ b/examples/types/0218-types-multi-return-generic-named.sx @@ -0,0 +1,40 @@ +// A NAMED multi-return whose slots are generic type params, using the implicit +// return (assign the named slot locals, no explicit `return`), works for both a +// generic free function and a generic struct method. +// +// Regression (issue 0200): the generic monomorph path (`monomorphizeFunction`) +// bound params but never called `bindNamedReturnSlots`, so `named_return_names` +// stayed null and the implicit-return synthesis didn't fire — the body wrongly +// reported "produces no value". Now the binder runs on the generic path too +// (mirroring `lowerFunctionBodyInto`), incl. with defaults and the failable +// error channel. +#import "modules/std.sx"; + +ErrX :: error { Bad } + +// generic free function, named slots, implicit return, mixed inference + default +split :: (a: $T, b: $U) -> (first: T, second: U) { first = a; second = b; } +withd :: (a: $T) -> (x: T, y: i32 = 99) { x = a; } +fallible :: (a: $T, b: $U) -> (x: T, y: U, !) { x = a; y = b; } + +// generic struct method, named slots, implicit return +Box :: struct ($T: Type) { + v: T; + pair :: (self: *Box(T)) -> (a: T, b: T) { a = self.v; b = self.v + 1; } +} + +main :: () -> i64 { + x, y := split(7, true); + print("{} {}\n", x, y); // 7 true + + p, q := withd(5); + print("{} {}\n", p, q); // 5 99 + + r := fallible(3, false) catch { return 9; }; + print("{} {}\n", r.x, r.y); // 3 false + + bx := Box(i32).{ v = 10 }; + m, n := bx.pair(); + print("{} {}\n", m, n); // 10 11 + return 0; +} diff --git a/examples/types/expected/0210-types-multi-return-name-order.exit b/examples/types/expected/0210-types-multi-return-name-order.exit index d00491fd..573541ac 100644 --- a/examples/types/expected/0210-types-multi-return-name-order.exit +++ b/examples/types/expected/0210-types-multi-return-name-order.exit @@ -1 +1 @@ -1 +0 diff --git a/examples/types/expected/0210-types-multi-return-name-order.stderr b/examples/types/expected/0210-types-multi-return-name-order.stderr index 28ba9199..8b137891 100644 --- a/examples/types/expected/0210-types-multi-return-name-order.stderr +++ b/examples/types/expected/0210-types-multi-return-name-order.stderr @@ -1,11 +1 @@ -error: named return element 'b' does not match the slot 'a' at position 0 — name the elements in slot order - --> examples/types/0210-types-multi-return-name-order.sx:6:5 - | - 6 | return b = n, a = n + 1; // out of slot order - | ^^^^^^^^^^^^^^^^^^^^^^^^ -error: named return element 'a' does not match the slot 'b' at position 1 — name the elements in slot order - --> examples/types/0210-types-multi-return-name-order.sx:6:5 - | - 6 | return b = n, a = n + 1; // out of slot order - | ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/types/expected/0210-types-multi-return-name-order.stdout b/examples/types/expected/0210-types-multi-return-name-order.stdout index 8b137891..c6806cbc 100644 --- a/examples/types/expected/0210-types-multi-return-name-order.stdout +++ b/examples/types/expected/0210-types-multi-return-name-order.stdout @@ -1 +1,2 @@ - +6 5 +11 10 diff --git a/examples/types/expected/0214-types-multi-return-name-invalid.exit b/examples/types/expected/0214-types-multi-return-name-invalid.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/types/expected/0214-types-multi-return-name-invalid.exit @@ -0,0 +1 @@ +1 diff --git a/examples/types/expected/0214-types-multi-return-name-invalid.stderr b/examples/types/expected/0214-types-multi-return-name-invalid.stderr new file mode 100644 index 00000000..423a6869 --- /dev/null +++ b/examples/types/expected/0214-types-multi-return-name-invalid.stderr @@ -0,0 +1,11 @@ +error: named return element 'c' does not name any return slot + --> examples/types/0214-types-multi-return-name-invalid.sx:7:5 + | + 7 | return a = n, c = n + 1; // error: 'c' names no return slot + | ^^^^^^^^^^^^^^^^^^^^^^^^ + +error: named return element 'a' is given more than once + --> examples/types/0214-types-multi-return-name-invalid.sx:10:5 + | +10 | return a = n, a = n + 1; // error: 'a' given more than once + | ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/types/expected/0214-types-multi-return-name-invalid.stdout b/examples/types/expected/0214-types-multi-return-name-invalid.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0214-types-multi-return-name-invalid.stdout @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0215-types-multi-return-as-generic-arg.exit b/examples/types/expected/0215-types-multi-return-as-generic-arg.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/types/expected/0215-types-multi-return-as-generic-arg.exit @@ -0,0 +1 @@ +1 diff --git a/examples/types/expected/0215-types-multi-return-as-generic-arg.stderr b/examples/types/expected/0215-types-multi-return-as-generic-arg.stderr new file mode 100644 index 00000000..03660b38 --- /dev/null +++ b/examples/types/expected/0215-types-multi-return-as-generic-arg.stderr @@ -0,0 +1,5 @@ +error: a bare-paren `(A, B)` is a multi-return signature, valid only as a return type; a tuple-valued generic type argument uses `Tuple(…)` + --> examples/types/0215-types-multi-return-as-generic-arg.sx:9:15 + | + 9 | xs : List((i32, bool)) = ---; // error: multi-return signature, not a type + | ^^^^^^^^^^^ diff --git a/examples/types/expected/0215-types-multi-return-as-generic-arg.stdout b/examples/types/expected/0215-types-multi-return-as-generic-arg.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0215-types-multi-return-as-generic-arg.stdout @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0216-types-multi-return-closure.exit b/examples/types/expected/0216-types-multi-return-closure.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/types/expected/0216-types-multi-return-closure.exit @@ -0,0 +1 @@ +0 diff --git a/examples/types/expected/0216-types-multi-return-closure.stderr b/examples/types/expected/0216-types-multi-return-closure.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0216-types-multi-return-closure.stderr @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0216-types-multi-return-closure.stdout b/examples/types/expected/0216-types-multi-return-closure.stdout new file mode 100644 index 00000000..4d0ec3d4 --- /dev/null +++ b/examples/types/expected/0216-types-multi-return-closure.stdout @@ -0,0 +1,3 @@ +7 true +7 true +9 diff --git a/examples/types/expected/0217-types-multi-return-generic.exit b/examples/types/expected/0217-types-multi-return-generic.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/types/expected/0217-types-multi-return-generic.exit @@ -0,0 +1 @@ +0 diff --git a/examples/types/expected/0217-types-multi-return-generic.stderr b/examples/types/expected/0217-types-multi-return-generic.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0217-types-multi-return-generic.stderr @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0217-types-multi-return-generic.stdout b/examples/types/expected/0217-types-multi-return-generic.stdout new file mode 100644 index 00000000..4bf6aeaf --- /dev/null +++ b/examples/types/expected/0217-types-multi-return-generic.stdout @@ -0,0 +1,3 @@ +7 true +hi 42 +3 false diff --git a/examples/types/expected/0218-types-multi-return-generic-named.exit b/examples/types/expected/0218-types-multi-return-generic-named.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/types/expected/0218-types-multi-return-generic-named.exit @@ -0,0 +1 @@ +0 diff --git a/examples/types/expected/0218-types-multi-return-generic-named.stderr b/examples/types/expected/0218-types-multi-return-generic-named.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0218-types-multi-return-generic-named.stderr @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0218-types-multi-return-generic-named.stdout b/examples/types/expected/0218-types-multi-return-generic-named.stdout new file mode 100644 index 00000000..c8aa3277 --- /dev/null +++ b/examples/types/expected/0218-types-multi-return-generic-named.stdout @@ -0,0 +1,4 @@ +7 true +5 99 +3 false +10 11 diff --git a/issues/0197-annotated-assignment-type-mismatch-no-check.md b/issues/0197-annotated-assignment-type-mismatch-no-check.md index e8236cae..8cb29df1 100644 --- a/issues/0197-annotated-assignment-type-mismatch-no-check.md +++ b/issues/0197-annotated-assignment-type-mismatch-no-check.md @@ -1,3 +1,30 @@ +> **RESOLVED** (2026-06-27). Root cause: a value whose type has NO modeled +> coercion to the destination slot (`classify == .none`) was passed through the +> `coerceMode` `.no_op, .none => return val` arm UNCHANGED — a raw reinterpreting +> store. When the value's byte width differed from the slot's (a 16-byte `string` +> into a 4-byte `i32`), the store overran the slot and corrupted memory / SIGSEGV'd. +> +> Fix: a shared guard `checkAssignable` / `noneReinterpretIsUnsafe` +> (`src/ir/lower/coerce.zig`) rejects a `.none` store ONLY when the byte WIDTHS +> differ (`typeSizeBytes`, the LLVM-accurate ABI size — NOT the field-padded +> `sizeOf`). A same-width `.none` is a legitimate bit-compatible reinterpretation +> (`*T → [*]T`, `i64 → isize`, a bare fn-ref into a function slot) and stays +> allowed; an explicit `xx`/`cast` always passes (the escape hatch). Cascades are +> suppressed via `externalErrorsExist()` (the guard tallies its own diagnostics, +> so a pre-lowering error — an unknown annotation type — or a failed initializer +> doesn't trigger a pile-on, while independent mismatches each still report). +> Wired into EVERY annotated-slot store site: var-decl, body-local const-decl, +> scalar reassignment (local + global), struct/tuple field, array/slice/pointer +> element, pointer deref, multi-assignment targets, and named-return defaults. +> (`destructure-decl` infers target types from the RHS, so it has no annotation +> to mismatch.) Regression tests: `examples/diagnostics/1205` (var/const/reassign) +> + `examples/diagnostics/1206` (field/element/deref/multi-assign width overrun). +> +> NOTE: a sibling runtime-safety gap surfaced during the fix's adversarial +> review — unboxing an `Any` to a mismatched type is unchecked (silent-wrong / +> segfault). That is a DIFFERENT code path (`unbox_any`, not the `.none` +> passthrough) and is filed separately as **issue 0198**. + # 0197 — annotated assignment with an incompatible type is unchecked (segfaults) **Symptom** — A variable / constant declared with an explicit type annotation and diff --git a/issues/0198-unbox-any-no-tag-check.md b/issues/0198-unbox-any-no-tag-check.md new file mode 100644 index 00000000..bd461466 --- /dev/null +++ b/issues/0198-unbox-any-no-tag-check.md @@ -0,0 +1,98 @@ +> **RESOLVED** (2026-06-27). Fix: an IMPLICIT `Any → T` unbox is now a COMPILE +> ERROR (`coerceMode`'s `.unbox_any` arm, `mode == .implicit`, in +> `src/ir/lower/coerce.zig`). sx prevents this unsafe class at compile time — +> like the no-implicit-optional-unwrap rule — rather than with a runtime trap +> (the LLVM backend has no runtime-abort infra by design; compiled code relies on +> compile-time flow analysis). The escape hatches are unaffected: an explicit +> `xx some_any` (handled by `lowerXX`'s own unbox arm) and the compiler-generated +> type-dispatch / variadic-pack-extraction unboxes (which emit `.unbox_any` +> directly, not via `coerceMode`) all still work, as do `print`/`type_name`/`{}` +> formatting of an `Any`. So both 0198 cases are fixed: `s : S = some_any` (was a +> segfault) and `f : f64 = some_any` (was a silent `0.0`) now emit a clean +> compile error. Adversarial review found no false-positive (every legitimate +> `Any` pattern still works) and no surviving silent/segfault path. Regression +> test: `examples/diagnostics/1207-diagnostics-any-implicit-unbox-rejected.sx`. +> +> A SEPARATE pre-existing bug surfaced during the review — `Any == ` +> (one operand `Any`) aborts the LLVM verifier — filed as **issue 0199**. + +# 0198 — unboxing an `Any` to a mismatched type is unchecked (silent-wrong / segfaults) + +**Symptom** — Extracting a concrete value from an `Any` (the implicit +`Any → T` unbox, `classify == .unbox_any`) does NO runtime tag check: if the +boxed type does not match the unbox target `T`, the boxed bits are reinterpreted +blindly. For a scalar mismatch this silently produces garbage; for an aggregate +target it treats the boxed scalar as a pointer and dereferences it, **segfaulting**. + +- Observed: + - `Any(boxed i64 5) → i64` → `5` (correct). + - `Any(boxed i64 5) → f64` → `0.000000` (silent garbage — raw bit reinterpret, no diagnostic). + - `Any(boxed i64 5) → struct{a:i32; b:i32}` → **Segmentation fault** (the i64 `5` + is treated as a struct pointer and dereferenced). +- Expected: a runtime trap / clean diagnostic on a tag mismatch (the `Any` box + carries a type tag in field 0 — `{i64 tag, i64 value}` — so a checked unbox is + feasible), OR at minimum no memory-unsafe dereference. + +This is DISTINCT from issue 0197 (the compile-time `.none` annotated-assignment +gap, now fixed): here the static types `Any → T` are a *legal* unbox, so the +mismatch is only knowable at runtime via the tag. It was surfaced by the +adversarial review of the 0197 fix — the 0197 size guard correctly does NOT +fire here because `classify(Any, T) == .unbox_any`, not `.none`. + +## Reproduction + +```sx +#import "modules/std.sx"; + +S :: struct { a: i32; b: i32; } + +main :: () -> i64 { + x : Any = 5; // boxes an i64 + s : S = x; // Any → S unbox: NO tag check + print("unreached\n"); + return 0; +} +``` + +`./zig-out/bin/sx run repro.sx` → `Segmentation fault`. `sx ir` lowers fine. + +A non-crashing but silently-wrong variant: change `s : S = x;` to +`f : f64 = x;` — prints `0.000000` with no diagnostic. + +## Investigation prompt + +The unbox is lowered as `Op.unbox_any` (coerce.zig, the `.unbox_any` arm of +`coerceMode` / `lowerXX`) and emitted by `emitUnboxAny` +(`src/backend/llvm/ops.zig:2462`): + +```zig +pub fn emitUnboxAny(self: Ops, instruction: *const Inst, un: UnaryOp) void { + const any_val = self.e.resolveRef(un.operand); + const any_kind = c.LLVMGetTypeKind(c.LLVMTypeOf(any_val)); + if (any_kind == c.LLVMStructTypeKind) { + const raw = c.LLVMBuildExtractValue(self.e.builder, any_val, 1, "ua.raw"); // field 1 = boxed value (i64) + const target_ty = self.e.toLLVMType(instruction.ty); + self.e.mapRef(self.e.coerceFromI64(raw, target_ty)); // ← no tag check; struct target derefs the scalar + } else { + self.e.mapRef(c.LLVMGetUndef(self.e.toLLVMType(instruction.ty))); + } +} +``` + +The `Any` box is `{ i64 type_tag, i64 value }`. Field 0 is the type tag (the +boxing site stores the source `TypeId`). The fix likely needs `emitUnboxAny` to +compare field 0 against `instruction.ty`'s tag and, on mismatch, trap with a +located runtime diagnostic (mirror the optional-unwrap / bounds-check trap +pattern) rather than `coerceFromI64`-ing arbitrary bits. For an aggregate target +the current `coerceFromI64` path is itself wrong (a >8-byte boxed value is +heap-stored as a pointer in field 1; a fits-in-8 scalar is stored inline) — the +unbox must distinguish the two by the boxed type, which the tag enables. + +Decision needed: does sx want `Any` unbox to be CHECKED (trap on mismatch, the +safe default) or remain an unchecked escape hatch (then `xx`/an explicit +checked-cast builtin should be the only spelling, and the implicit +`T x = some_any` unbox should at least not dereference a scalar as a pointer)? +See `specs.md` for the intended `Any` semantics before choosing. + +Verification: run the repro; expect a clean trap/diagnostic (or a checked-cast +requirement), NOT a segfault, and the `f64` variant to not silently yield `0.0`. diff --git a/issues/0199-any-eq-concrete-llvm-verify-fail.md b/issues/0199-any-eq-concrete-llvm-verify-fail.md new file mode 100644 index 00000000..1350be8c --- /dev/null +++ b/issues/0199-any-eq-concrete-llvm-verify-fail.md @@ -0,0 +1,61 @@ +> **RESOLVED** (2026-06-27). Fix: the `Any`-shaped `==`/`!=` arm in +> `src/ir/lower/expr.zig` now fires when EITHER operand is `.any` (was both). A +> concrete operand is boxed to `Any` (`builder.boxAny`) first, so both sides are +> 16-byte boxes; then both unbox to their `.i64` value words and compare — the +> same value-identity the both-`Any` path uses (tags not compared). An +> already-errored `.unresolved` / `.void` operand falls through (no cascade). +> Verified: `x == 5`, `x == 6`, `x != 6`, `5 == x` (reversed), bool `Any`, and the +> both-`Any` form all work; no verifier abort. Regression test: +> `examples/comptime/0654-comptime-any-eq-concrete.sx`. (Aggregate-`Any` +> comparison still uses value-word identity — the same limitation the both-`Any` +> path always had; orthogonal to this verifier fix.) + +# 0199 — `Any == ` (one operand `Any`) fails LLVM verification + +**Symptom** — An equality / inequality comparison where exactly ONE operand is +`Any` and the other is a concrete type is not handled: it falls through to a +plain `icmp` on a 16-byte `{tag, value}` aggregate vs a scalar and aborts the +LLVM verifier. + +- Observed: `x : Any = 5; if x == 5 { ... }` → + `error: Both operands to ICmp are not of the same type! {i64,i64} vs i64`, + `LLVM verification failed`, exit 1 (loud — not a segfault / silent miscompile). +- Expected: either box the concrete operand to `Any` (then compare as `Any == + Any`, the path that already works) consulting the tag, OR a clean located + compile diagnostic (e.g. "compare an 'Any' against a value of its boxed type, + or `xx` the Any first"). Not an LLVM verifier abort. + +Distinct from issue 0198 (the implicit `Any → T` unbox). Surfaced by the +adversarial review of the 0198 fix. `Any == Any` works correctly. + +## Reproduction + +```sx +#import "modules/std.sx"; + +main :: () -> i64 { + x : Any = 5; + if x == 5 { return 1; } // error: ICmp operand type mismatch {i64,i64} vs i64 + return 0; +} +``` + +`./zig-out/bin/sx run repro.sx` → `LLVM verification failed`, exit 1. + +## Investigation prompt + +The `Any` equality path is in `src/ir/lower/expr.zig` (~3201-3215), gated on +`lhs_ty == .any and rhs_ty == .any` — it `unbox_any`s both sides to `.i64` and +`cmp_eq`s the value words. When only ONE side is `.any`, that guard is false and +the comparison falls through to the generic numeric/`icmp` path, which emits an +`icmp` between the 16-byte `Any` aggregate and the scalar → verifier abort. + +The fix likely adds a mixed-operand arm: when exactly one operand is `.any` and +the other is a concrete type `T`, box the concrete operand to `Any` +(`self.builder.boxAny(concrete, T)`) and reuse the existing `Any == Any` +value-word comparison — OR, if comparing only the payload word is unsound across +types (a `5:i64` and a `5.0:f64` would compare equal by bits), gate on the tag +too / emit a diagnostic. Decide whether `Any == concrete` should compare by +(tag AND value) or be disallowed; mirror whatever `Any == Any` semantics are +documented. Verify: the repro compiles and `x == 5` is true, OR a clean +diagnostic is emitted — never an LLVM verifier abort. diff --git a/issues/0200-named-generic-multi-return-implicit-return.md b/issues/0200-named-generic-multi-return-implicit-return.md new file mode 100644 index 00000000..65542fe4 --- /dev/null +++ b/issues/0200-named-generic-multi-return-implicit-return.md @@ -0,0 +1,78 @@ +> **RESOLVED** (2026-06-27). Root cause exactly as hypothesized: the generic +> monomorph path `monomorphizeFunction` (`src/ir/lower/generic.zig`) bound params +> and lowered the body via `lowerValueBody`, but NEVER called +> `bindNamedReturnSlots` — so `named_return_names` stayed null and the +> implicit-return synthesis (`lowerValueBody`, stmt.zig) didn't fire. (The +> non-generic decl path `lowerFunctionBodyInto` already called it.) Fix: call +> `bindNamedReturnSlots(fd, ret_ty, &scope)` in `monomorphizeFunction` after +> param-binding, with the same `named_return_names`/`named_return_defaults` +> save/restore. Covers generic free functions AND generic struct methods (the +> instance-method path shares the monomorph), with defaults and the failable +> error channel. Regression test: `examples/types/0218-types-multi-return-generic-named.sx`. + +# 0200 — named-return locals don't synthesize the implicit return in a GENERIC multi-return function + +**Symptom** — A generic function with a NAMED multi-return (`-> (first: $T, second: $U)`) +that relies on the implicit return (assigns the named slot locals, no explicit +`return`) fails to compile: the named-return-locals synthesis does not fire for +the monomorphized instance, so it reports "body produces no value". + +- Observed: `pair :: (a: $T, b: $U) -> (first: T, second: U) { first = a; second = b; }` + → `error: function returns '(first: i64, second: bool)' but its body produces + no value — end it with a trailing expression (no ';') or an explicit 'return'`. +- Expected: the named slot locals (`first`, `second`) are bound and the implicit + return is synthesized from them, exactly as for a NON-generic named + multi-return. + +Note the diagnostic shows the return type RESOLVED to concrete types +(`(first: i64, second: bool)`) — so binding/return-type resolution ran; only the +named-return-LOCALS path (`bindNamedReturnSlots` → `self.named_return_names`) did +not take effect for the generic instance. + +WORKS (so this is narrow): the POSITIONAL generic multi-return with an explicit +return is fine — `(a: $T, b: $U) -> (T, U) { return a, b; }` and explicit-type +`pair(i32, bool, 7, true)` both run correctly. Only the named-slot IMPLICIT-return +form × generic monomorph is broken. Workaround: use an explicit `return a, b`. + +## Reproduction + +```sx +#import "modules/std.sx"; + +pair :: (a: $T, b: $U) -> (first: T, second: U) { + first = a; + second = b; // implicit return from named slots — never synthesized +} + +main :: () -> i64 { + x, y := pair(7, true); + print("{} {}\n", x, y); + return 0; +} +``` + +`./zig-out/bin/sx run repro.sx` → the "produces no value" error, exit 1. + +## Investigation prompt + +The implicit-return-from-named-slots synthesis (`lowerValueBody` in +`src/ir/lower/stmt.zig` ~line 172: `if (self.named_return_names) |names| { … }`) +only fires when `self.named_return_names` is set by `bindNamedReturnSlots` +(`src/ir/lower/stmt.zig` ~258). That binder is called from `lowerFunctionBodyInto` +(`src/ir/lower/decl.zig:2729`). `bindNamedReturnSlots` early-returns unless +`fd.return_type.?.data == .return_type_expr`. + +The likely cause: the generic-FREE-function monomorph lowers the instance with a +SUBSTITUTED return-type node (the `$T`/`$U` resolved into a concrete +`tuple_type_expr` or a resolved TypeId), so `fd.return_type.data` is no longer +`.return_type_expr` → `bindNamedReturnSlots` early-returns → `named_return_names` +stays null → the implicit return isn't synthesized. Confirm by checking the +generic free-function instantiation path (search `instantiateGeneric` / +`lazyLowerFunction` / the monomorph that rewrites `fd` for free functions): does +it preserve the original `ReturnTypeExpr` AST node (binding via `type_bindings`), +or rewrite it? The fix likely keys `bindNamedReturnSlots` off the ORIGINAL +template `fd.return_type` (which carries `field_names`), or threads the +field-names through the monomorph. Generic STRUCT methods may have the same gap — +test `Box(T)` with a named multi-return method. + +Verify: the repro prints `7 true`, exit 0. Add a positive generics example. diff --git a/readme.md b/readme.md index a24df750..8a180d0a 100644 --- a/readme.md +++ b/readme.md @@ -71,6 +71,14 @@ Options: A fixed array `[N]T` coerces to a slice `[]T` (its length is known); a `[*]T` many-pointer carries no length, so slice it explicitly with `ptr[0..len]`. +Storing a value into a typed slot (a `:`-annotated binding, a field, an array +element, a deref, an assignment target) requires a coercion to exist. A value +with no coercion to the slot type *and* a different byte width — e.g. +`x : i32 = "hi"` — is a **compile error** rather than a silent reinterpreting +store. Same-width reinterpretations (`*T → [*]T`, `i64 → isize`) are allowed, and +an explicit `xx` / `cast(T)` is always the escape hatch for a deliberate +reinterpretation. + **Numeric limits.** A field access on a builtin integer type folds to a compile-time constant: `i64.max`, `u8.min`, `[u8.max]T` (a 255-element array). Floats expose `.min` / `.max` plus `.epsilon`, `.min_positive`, `.true_min`, diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 769520ba..667431c1 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -246,6 +246,13 @@ pub const Lowering = struct { resolved_root: ?*const Node = null, // full AST root (for building comptime modules) comptime_param_nodes: ?std.StringHashMap(*const Node) = null, // active comptime substitutions target_type: ?TypeId = null, // target type for struct/enum literals without explicit names + // Count of diagnostics emitted by the annotated-store assignability guard + // (`checkAssignable` / the named-return-default guard, issue 0197). Lets the + // guard skip when ANY OTHER error already exists (`errorCount() > this`) — + // suppressing cascades onto a pre-lowering error (an unknown annotation + // type) or a failed initializer, while still reporting multiple INDEPENDENT + // mismatches (each of those is one of the guard's OWN errors, not external). + assignability_error_count: usize = 0, lowered_functions: std.StringHashMap(void), // tracks which functions have been fully lowered /// Identity map: authoring `*const ast.FnDecl` → the FuncId `declareFunction` /// created for it. The name-keyed function table (`resolveFuncByName`) returns @@ -2044,6 +2051,9 @@ pub const Lowering = struct { pub const lowerCoercedDefault = lower_coerce.lowerCoercedDefault; pub const coerceToType = lower_coerce.coerceToType; pub const coerceExplicit = lower_coerce.coerceExplicit; + pub const checkAssignable = lower_coerce.checkAssignable; + pub const noneReinterpretIsUnsafe = lower_coerce.noneReinterpretIsUnsafe; + pub const externalErrorsExist = lower_coerce.externalErrorsExist; pub const coerceMode = lower_coerce.coerceMode; pub const diagNonIntegralNarrow = lower_coerce.diagNonIntegralNarrow; pub const promoteCVariadicArgs = lower_coerce.promoteCVariadicArgs; diff --git a/src/ir/lower/coerce.zig b/src/ir/lower/coerce.zig index 3b713677..c0d8fcde 100644 --- a/src/ir/lower/coerce.zig +++ b/src/ir/lower/coerce.zig @@ -603,13 +603,126 @@ pub fn coerceExplicit(self: *Lowering, val: Ref, src_ty: TypeId, dst_ty: TypeId) return self.coerceMode(val, src_ty, dst_ty, .explicit); } +/// Is `node` an explicit cast — `xx expr` or `cast(T) expr`? Such a value is +/// the user's deliberate opt-in to a reinterpretation that has no standard +/// coercion (e.g. pointer↔int, function↔fn-pointer): the `.none` passthrough +/// in `coerceMode` is the intended escape hatch there, so the assignability +/// guard must NOT fire for it. +fn initIsExplicitCast(node: *const Node) bool { + return switch (node.data) { + .unary_op => |u| u.op == .xx, + .call => |c| c.callee.data == .identifier and std.mem.eql(u8, c.callee.data.identifier.name, "cast"), + else => false, + }; +} + +/// Guard a store into an explicitly-annotated slot against a silent bit-mangle. +/// When the initializer/RHS type `src_ty` has NO modeled coercion to the +/// destination slot type `dst_ty`, the classifier yields `.none` and +/// `coerceMode`'s `.no_op, .none => return val` arm passes the value through +/// UNCHANGED — a raw reinterpreting store. That is only DANGEROUS when the +/// value's byte width differs from the slot's: a 16-byte `string` written into +/// a 4-byte `i32` slot overruns it, corrupting memory and segfaulting at run +/// time (issue 0197). A SAME-width `.none` is a bit-compatible reinterpretation +/// sx's passthrough has always performed for legitimate pairs that the +/// classifier doesn't model — `*T → [*]T`, `i64 → isize`, `*void ← *T`, a bare +/// fn-ref into a function slot — so it must stay allowed. +/// +/// Reject ONLY a width mismatch: emit a diagnostic and return false so the +/// caller stores a safe default instead of the overrunning value. Returns true +/// when the store is sound (a no-op, a modeled conversion, a same-width +/// reinterpretation, or a deliberate `xx`/`cast`). `init_node` is the +/// initializer expression (null when none); `verb`/`name` shape the message. +pub fn checkAssignable(self: *Lowering, src_ty: TypeId, dst_ty: TypeId, span: ast.Span, verb: []const u8, name: []const u8, init_node: ?*const Node) bool { + if (src_ty == dst_ty) return true; + // Suppress a cascade onto an error that is NOT this guard's own: a + // pre-lowering "unknown type" (the annotation resolved to a poison stub) or + // a failed initializer leaves an unreliable type here. `errorCount()` minus + // the guard's own tally is >0 exactly when some other diagnostic fired — an + // errored build never runs, so the bit-mangle can't reach run time anyway. + // Independent mismatches in a clean file are each the guard's OWN error, so + // they are NOT suppressed (the tally cancels them out). + if (self.externalErrorsExist()) return true; + // An unresolved operand was already diagnosed at its origin. + if (src_ty == .unresolved or dst_ty == .unresolved) return true; + if (src_ty == .void or dst_ty == .void) return true; + // An explicit `xx`/`cast` is the user opting into a reinterpretation that + // has no standard coercion — leave the escape hatch intact, width be damned. + if (init_node) |n| if (initIsExplicitCast(n)) return true; + if (!self.noneReinterpretIsUnsafe(src_ty, dst_ty)) return true; + if (self.diagnostics) |d| { + d.addFmt(.err, span, "cannot {s} '{s}' of type '{s}' with a value of type '{s}'", .{ verb, name, self.formatTypeName(dst_ty), self.formatTypeName(src_ty) }); + self.assignability_error_count += 1; + } + return false; +} + +/// True when a diagnostic OTHER than this guard's own assignability errors has +/// already been emitted — the signal to suppress a cascade (see +/// `checkAssignable`). The guard tracks its own emissions in +/// `assignability_error_count`, so `errorCount() > that` means "an external +/// error exists", independent of how many mismatches the guard itself reported. +pub fn externalErrorsExist(self: *Lowering) bool { + const d = self.diagnostics orelse return false; + return d.errorCount() > self.assignability_error_count; +} + +/// The core unsafe-store predicate shared by `checkAssignable` and the +/// named-return-default guard: a store of `src_ty` into a `dst_ty` slot has NO +/// modeled coercion (`coerceMode` would pass it through UNCHANGED) AND the two +/// differ in byte width — so the raw store overruns / under-fills the slot, +/// corrupting memory (issue 0197). A same-width `.none` is a legitimate +/// bit-compatible reinterpretation (`*T → [*]T`, `i64 → isize`, `*void ← *T`), +/// which stays allowed. Callers should have already cleared the cheap +/// cascade/escape-hatch cases (unresolved operands, explicit `xx`/`cast`). +pub fn noneReinterpretIsUnsafe(self: *Lowering, src_ty: TypeId, dst_ty: TypeId) bool { + if (src_ty == dst_ty) return false; + if (self.coercionResolver().classify(src_ty, dst_ty) != .none) return false; + return !sameStoreWidth(self, src_ty, dst_ty); +} + +/// ABI/store width of `a` and `b` are equal — the safety test for an unmodeled +/// (`.none`) reinterpreting store (see `noneReinterpretIsUnsafe`). Uses +/// `typeSizeBytes` (the LLVM-accurate ABI size, with natural field alignment), +/// NOT `sizeOf` (which pads every aggregate field to ≥8 and would report +/// `struct{i32,i32}` as 16 — coincidentally matching a 16-byte `string` and +/// letting the raw store overrun the real 8-byte slot). Comptime-only `pack` +/// types have no runtime layout; a pack reaching a store site is a separate, +/// already-diagnosed misuse, so treat it as "same width" to avoid a spurious +/// second error. +fn sameStoreWidth(self: *Lowering, a: TypeId, b: TypeId) bool { + if (self.module.types.get(a) == .pack or self.module.types.get(b) == .pack) return true; + return self.module.types.typeSizeBytes(a) == self.module.types.typeSizeBytes(b); +} + pub fn coerceMode(self: *Lowering, val: Ref, src_ty: TypeId, dst_ty: TypeId, mode: CoerceMode) Ref { // PLANNING: classify the built-in coercion (conversions.zig). // EMISSION: each arm below reproduces the original lowering. switch (self.coercionResolver().classify(src_ty, dst_ty)) { .no_op, .none => return val, - // Unbox Any → concrete type - .unbox_any => return self.builder.emit(.{ .unbox_any = .{ .operand = val } }, dst_ty), + // Unbox Any → concrete type. An IMPLICIT unbox (`s : S = some_any`) is + // rejected (issue 0198): the unbox blindly reinterprets the boxed payload + // word as `dst_ty` with NO runtime tag check, so a wrong target silently + // yields garbage (`f64 = any_holding_i64` → 0.0) or — for an aggregate + // target — dereferences the payload word as a pointer and segfaults. sx + // prevents this class at compile time (like the no-implicit-optional-unwrap + // rule) rather than with a runtime trap: dispatch on the value's type + // (`match` / `type_name`), or force it with an explicit `xx` if the boxed + // type is known. An EXPLICIT `xx` (mode == .explicit, and `lowerXX`'s own + // unbox arm) stays the acknowledged escape hatch; compiler-generated + // type-dispatch / pack-extraction unboxes emit `.unbox_any` DIRECTLY (not + // through this arm), so they are unaffected. + .unbox_any => { + if (mode == .implicit) { + if (self.diagnostics) |d| { + const cs = self.builder.current_span; + d.addFmt(.err, ast.Span{ .start = cs.start, .end = cs.end }, "an 'Any' does not implicitly unbox to '{s}': the boxed type is not checked, so a wrong target reinterprets the payload (a wrong scalar silently yields garbage; an aggregate dereferences it and crashes). Dispatch on the value's type with `match`, or force it with `xx` if you know the boxed type.", .{self.formatTypeName(dst_ty)}); + } + // Diagnosed — `hasErrors()` aborts the build before run time; the + // emitted op is never executed. + } + return self.builder.emit(.{ .unbox_any = .{ .operand = val } }, dst_ty); + }, // Box concrete → Any .box_any => return self.builder.boxAny(val, src_ty), // Closure VALUE → bare function-pointer slot: not soundly representable. diff --git a/src/ir/lower/expr.zig b/src/ir/lower/expr.zig index 5b87fd40..e47fb5a7 100644 --- a/src/ir/lower/expr.zig +++ b/src/ir/lower/expr.zig @@ -3191,19 +3191,29 @@ pub fn lowerBinaryOp(self: *Lowering, bop: *const ast.BinaryOp) Ref { } } - // Any-shaped `==` (e.g. `t == i64` where `t: Type`): both - // operands are 16-byte `{tag, value}` aggregates. LLVM - // doesn't accept `icmp` on aggregates directly. Decompose - // via `unbox_any` (which extracts the value field at - // `.i64`) and compare the i64s. Tag fields are stable - // across compilations of the same source so value-only - // identity is enough. + // `Any`-shaped `==` (e.g. `t == i64` where `t: Type`, or `av == 5`): an + // `Any` is a 16-byte `{tag, value}` aggregate, which LLVM won't `icmp` + // directly. Decompose via `unbox_any` (extracts the value word at `.i64`) and + // compare the i64s — tags are stable across a compilation, so value-only + // identity is enough. When only ONE operand is `Any` (a MIXED + // `Any == `), box the concrete side to `Any` first; otherwise it + // fell through to a plain `icmp` on the 16-byte aggregate vs a scalar and + // aborted the LLVM verifier (issue 0199). if (bop.op == .eq or bop.op == .neq) { const lhs_ty = self.inferExprType(bop.lhs); const rhs_ty = self.inferExprType(bop.rhs); - if (lhs_ty == .any and rhs_ty == .any) { - const lhs = self.lowerExpr(bop.lhs); - const rhs = self.lowerExpr(bop.rhs); + const lhs_any = lhs_ty == .any; + const rhs_any = rhs_ty == .any; + // Need a boxable type on any non-Any side; an already-errored + // `.unresolved` / `.void` operand falls through (no spurious cascade). + if ((lhs_any or rhs_any) and + lhs_ty != .unresolved and rhs_ty != .unresolved and + lhs_ty != .void and rhs_ty != .void) + { + var lhs = self.lowerExpr(bop.lhs); + var rhs = self.lowerExpr(bop.rhs); + if (!lhs_any) lhs = self.builder.boxAny(lhs, lhs_ty); + if (!rhs_any) rhs = self.builder.boxAny(rhs, rhs_ty); const lhs_val = self.builder.emit(.{ .unbox_any = .{ .operand = lhs } }, .i64); const rhs_val = self.builder.emit(.{ .unbox_any = .{ .operand = rhs } }, .i64); if (bop.op == .eq) { diff --git a/src/ir/lower/generic.zig b/src/ir/lower/generic.zig index 97c828ea..a20c4f91 100644 --- a/src/ir/lower/generic.zig +++ b/src/ir/lower/generic.zig @@ -152,6 +152,23 @@ pub fn monomorphizeFunction(self: *Lowering, fd: *const ast.FnDecl, mangled_name } } + // Named multi-return (`-> (x: A, y: B)`): bind the slots as in-scope locals + // for the body to assign; `lowerValueBody` then synthesizes the implicit + // return from them. The decl path (`lowerFunctionBodyInto`) does this too — + // without it a GENERIC named multi-return never sets `named_return_names`, so + // the implicit return isn't synthesized and the body wrongly reports + // "produces no value" (issue 0200). Save/restore the state so a monomorph + // doesn't leak its named-return slots to the enclosing lowering. + const saved_nrn_mono = self.named_return_names; + const saved_nrd_mono = self.named_return_defaults; + self.named_return_names = null; + self.named_return_defaults = null; + defer { + self.named_return_names = saved_nrn_mono; + self.named_return_defaults = saved_nrd_mono; + } + if (fd.abi != .naked) self.bindNamedReturnSlots(fd, ret_ty, &scope); + // Handle builtin function bodies (e.g. #builtin sqrt monomorphized to sqrt__f32) if (fd.body.data == .builtin_expr) { // Emit builtin call with param 0, then return @@ -396,6 +413,12 @@ pub fn isTypeReturningCallNode(self: *Lowering, node: *const Node) bool { } pub fn resolveTypeArg(self: *Lowering, node: *const Node) TypeId { + // A bare-paren `(A, B)` is a MULTI-RETURN signature — valid only as a + // function/closure return type, never as a generic type argument (a + // tuple-valued arg uses `Tuple(…)`). Without this it silently resolved to a + // reused tuple TypeId (`List((A, B))` ≡ `List(Tuple(A, B))`), eroding the + // "multi-return is not a tuple, return-position-only" rule. + if (self.rejectMultiReturnValueType(node, "generic type argument")) return .unresolved; // Pack-index access in a type-arg slot (e.g. `type_name($args[0])` // or `type_eq($args[i], i64)`). Same shape as the // `resolveTypeWithBindings` arm — looks up the bound pack types @@ -1820,6 +1843,8 @@ pub fn instantiateGenericStruct(self: *Lowering, tmpl: *const StructTemplate, ar continue; } } + // Multi-return signature is return-only, not a type-pack arg. + if (self.rejectMultiReturnValueType(a, "generic type argument")) return .unresolved; const ty = self.resolveTypeWithBindings(a); pack_tys.append(self.alloc, ty) catch {}; name_parts.appendSlice(self.alloc, "__") catch {}; @@ -1832,6 +1857,9 @@ pub fn instantiateGenericStruct(self: *Lowering, tmpl: *const StructTemplate, ar name_parts.appendSlice(self.alloc, "__") catch {}; if (tp.is_type_param) { + // A bare-paren `(A, B)` multi-return signature is return-position-only, + // never a generic type argument (`List((A,B))` — use `Tuple(…)`). + if (self.rejectMultiReturnValueType(args[i], "generic type argument")) return .unresolved; const ty = self.resolveTypeWithBindings(args[i]); tb.put(tp.name, ty) catch {}; const tname = self.formatTypeName(ty); diff --git a/src/ir/lower/stmt.zig b/src/ir/lower/stmt.zig index df6ac22f..134aa428 100644 --- a/src/ir/lower/stmt.zig +++ b/src/ir/lower/stmt.zig @@ -291,13 +291,16 @@ pub fn bindNamedReturnSlots(self: *Lowering, fd: *const ast.FnDecl, ret_ty: Type const dval = self.lowerExpr(dn); self.target_type = saved_target; const dval_ty = self.builder.getRefType(dval); - // Reject a default whose type has NO coercion to the slot type (e.g. - // `sum: i32 = "hi"`) — a `.none` plan would pass the value through - // unchanged and bit-mangle / segfault. (The same hole exists for any - // annotated assignment `x: i32 = "hi"` — a broader pre-existing gap.) - if (dval_ty != .unresolved and self.coercionResolver().classify(dval_ty, fty) == .none and dval_ty != fty) { + // Reject a default whose type has NO coercion to the slot type and a + // mismatched byte width (e.g. `sum: i32 = "hi"`) — a `.none` plan + // would pass the value through unchanged and overrun / under-fill the + // slot, corrupting memory (the same guard as plain annotated + // assignment, issue 0197). A same-width `.none` (`p: *void = typed_ptr`) + // is a legitimate reinterpretation and stays allowed. + if (!self.externalErrorsExist() and dval_ty != .unresolved and self.noneReinterpretIsUnsafe(dval_ty, fty)) { if (self.diagnostics) |d| { d.addFmt(.err, dn.span, "named return '{s}' has a default of type '{s}' that does not match its declared type '{s}'", .{ nm, self.formatTypeName(dval_ty), self.formatTypeName(fty) }); + self.assignability_error_count += 1; } self.builder.store(slot, self.buildDefaultValue(fty)); } else { @@ -577,6 +580,17 @@ pub fn lowerVarDecl(self: *Lowering, vd: *const ast.VarDecl) void { { const ref_ty = self.builder.getRefType(ref); if (ref_ty != ty and ref_ty != .void and ty != .void) { + // An initializer with NO coercion to the annotated slot type + // (`x : i32 = "hi"`) would otherwise pass through unchanged and + // bit-mangle the slot (issue 0197). Diagnose and store a safe + // default so the build aborts cleanly instead of segfaulting. + if (!self.checkAssignable(ref_ty, ty, val.span, "initialize", vd.name, val)) { + self.builder.store(slot, self.buildDefaultValue(ty)); + if (self.scope) |scope| { + scope.put(vd.name, .{ .ref = slot, .ty = ty, .is_alloca = true }); + } + return; + } ref = self.coerceToType(ref, ref_ty, ty); } } @@ -685,6 +699,13 @@ pub fn lowerConstDecl(self: *Lowering, cd: *const ast.ConstDecl) void { else self.builder.getRefType(ref); + // An annotated constant whose initializer cannot coerce to the declared type + // would be bound under a type its bytes don't match (issue 0197) — diagnose + // rather than let a later read reinterpret the wrong-shape value. + if (cd.type_annotation != null) { + _ = self.checkAssignable(self.builder.getRefType(ref), ty, cd.value.span, "initialize", cd.name, cd.value); + } + if (self.scope) |scope| { scope.put(cd.name, .{ .ref = ref, .ty = ty, .is_alloca = false }); } @@ -726,17 +747,10 @@ pub fn validateMultiReturn(self: *Lowering, value_node: *const Node, ret_ty: Typ diags.addFmt(.err, value_node.span, "this function returns {d} values, but {d} {s} given", .{ value_count, els.len, if (els.len == 1) @as([]const u8, "is") else @as([]const u8, "are") }); return; } - // Named elements must line up with the slots positionally. - if (ti.tuple.names) |slot_names| { - for (els, 0..) |e, idx| { - const en = e.name orelse continue; - if (idx >= slot_names.len) continue; - const sn = self.module.types.getString(slot_names[idx]); - if (sn.len != 0 and !std.mem.eql(u8, en, sn)) { - diags.addFmt(.err, value_node.span, "named return element '{s}' does not match the slot '{s}' at position {d} — name the elements in slot order", .{ en, sn, idx }); - } - } - } + // Named elements no longer need to be in slot order — `reorderNamedReturn` + // (called from `lowerReturn` before lowering) permutes them to match the + // slots and diagnoses unknown / duplicate / missing names. Arity is + // checked above; nothing more to validate here. } else { // A bare value (not a comma list) where ≥2 are required is valid only if // it already PRODUCES the whole multi-value tuple — forwarding another @@ -751,6 +765,87 @@ pub fn validateMultiReturn(self: *Lowering, value_node: *const Node, ret_ty: Typ } } +/// Permute a FULLY-NAMED multi-return tuple literal (`return b = …, a = …`) so +/// its elements line up with the function's return slots BY NAME, returning a +/// fresh reordered `tuple_literal`. Positional / mixed lists, non-tuple returns, +/// and arity mismatches (diagnosed in `validateMultiReturn`) pass through +/// unchanged. Diagnoses a name that matches no slot, a duplicate, or a missing +/// value slot — returning the original node after diagnosing (the build aborts +/// via `hasErrors`, so the unpermuted node never reaches run time). +fn reorderNamedReturn(self: *Lowering, value_node: *const Node, ret_ty: TypeId) *const Node { + if (value_node.data != .tuple_literal) return value_node; + if (ret_ty.isBuiltin()) return value_node; + const ti = self.module.types.get(ret_ty); + if (ti != .tuple) return value_node; + const slot_names = ti.tuple.names orelse return value_node; + const els = value_node.data.tuple_literal.elements; + if (els.len == 0) return value_node; + // Reorder only a FULLY-named list; positional/mixed keeps positional order. + for (els) |e| if (e.name == null) return value_node; + const is_failable = self.errorChannelOf(ret_ty) != null; + const fields_len = ti.tuple.fields.len; + const value_count = if (is_failable) fields_len - 1 else fields_len; + // Two accepted shapes (anything else is an arity error diagnosed by + // `validateMultiReturn` — pass through): the VALUE-ONLY list (one element per + // value slot, the ergonomic `return a = …, b = …` form) and the FULL-TUPLE + // list (a trailing element for the error slot too, `els.len == fields_len`). + // BOTH must be reordered/validated — otherwise a fully-named full-tuple + // failable return silently lands values positionally (regression found in + // review). `match_count` slots participate; the error slot (when present) + // joins by its own slot name. + const match_count = els.len; + if (match_count != value_count and match_count != fields_len) return value_node; + if (match_count > slot_names.len) return value_node; + + // Validate element names FIRST (clearer diagnostics than a downstream + // "missing slot"): every name must match a participating slot, no duplicates. + for (els, 0..) |e, ei| { + const en = e.name.?; + var matches_slot = false; + var s: usize = 0; + while (s < match_count) : (s += 1) { + const sn = self.module.types.getString(slot_names[s]); + if (sn.len != 0 and std.mem.eql(u8, en, sn)) { + matches_slot = true; + break; + } + } + if (!matches_slot) { + if (self.diagnostics) |d| d.addFmt(.err, value_node.span, "named return element '{s}' does not name any return slot", .{en}); + return value_node; + } + for (els[ei + 1 ..]) |e2| { + if (std.mem.eql(u8, en, e2.name.?)) { + if (self.diagnostics) |d| d.addFmt(.err, value_node.span, "named return element '{s}' is given more than once", .{en}); + return value_node; + } + } + } + // All names are distinct participating-slot names and arity matches, so the + // mapping is a bijection: every slot has exactly one matching element. + const reordered = self.alloc.alloc(ast.TupleElement, match_count) catch return value_node; + var slot: usize = 0; + while (slot < match_count) : (slot += 1) { + const sn = self.module.types.getString(slot_names[slot]); + var filled = false; + for (els) |e| { + if (std.mem.eql(u8, e.name.?, sn)) { + reordered[slot] = e; + filled = true; + break; + } + } + // Validation above guarantees a bijection, so every slot is filled. If a + // slot is somehow unmatched (e.g. an empty/unnamed slot in a full-tuple + // form), bail rather than lower an uninitialized element. + if (!filled) return value_node; + } + + const node = self.alloc.create(Node) catch return value_node; + node.* = .{ .span = value_node.span, .data = .{ .tuple_literal = .{ .elements = reordered } } }; + return node; +} + pub fn lowerReturn(self: *Lowering, rs: *const ast.ReturnStmt) void { if (rs.value) |val| { if (val.data == .identifier and self.isPackName(val.data.identifier.name)) { @@ -789,8 +884,12 @@ pub fn lowerReturn(self: *Lowering, rs: *const ast.ReturnStmt) void { // comptime-body return path too (iri.ret_ty is the failable tuple there). const target_for_value = self.failableReturnTarget(ret_ty_for_target, rs.value); if (target_for_value != .void) self.target_type = target_for_value; - // Evaluate return value first (before defers) - const ret_val = if (rs.value) |val| self.lowerExpr(val) else null; + // Evaluate return value first (before defers). A fully-named multi-return + // list is permuted to slot order by name (`return b = …, a = …`) before + // lowering — `reorderNamedReturn` is a no-op for positional / non-tuple + // returns and for the inline-comptime case (ret_ty_for_target carries the + // right tuple either way). + const ret_val = if (rs.value) |val| self.lowerExpr(reorderNamedReturn(self, val, ret_ty_for_target)) else null; self.target_type = old_target; // Inlined-comptime-body return: store into the slot the inliner @@ -1167,6 +1266,10 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void { var store_val = val; const val_ty = self.builder.getRefType(val); if (val_ty != binding.ty and val_ty != .void and binding.ty != .void) { + // A reassignment with no coercion to the slot type + // (`x = "hi"` for `x: i32`) would pass through and + // bit-mangle the slot (issue 0197) — diagnose instead. + if (!self.checkAssignable(val_ty, binding.ty, asgn.value.span, "reassign", id.name, asgn.value)) return; store_val = self.coerceToType(val, val_ty, binding.ty); } self.builder.store(binding.ref, store_val); @@ -1186,6 +1289,10 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void { if (self.resolveGlobalRef(id.name, asgn.target.span)) |gi| { if (asgn.op == .assign) { const val_ty = self.builder.getRefType(val); + if (val_ty != gi.ty and val_ty != .void and gi.ty != .void) { + // No coercion to the global's type — bit-mangle guard (issue 0197). + if (!self.checkAssignable(val_ty, gi.ty, asgn.value.span, "reassign", id.name, asgn.value)) return; + } const store_val = if (val_ty != gi.ty and val_ty != .void and gi.ty != .void) self.coerceToType(val, val_ty, gi.ty) else @@ -1267,6 +1374,11 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void { // *field_ty (the store handler unwraps one pointer level); // fl.ty is the value type to coerce the rhs to. const src_ty = self.builder.getRefType(val); + // Guard a width-mismatched `.none` store into the field slot + // (`w.s = "hi"` for a struct field `s`) — it would overrun the + // slot and corrupt neighbors (issue 0197). Plain `=` only; + // compound ops load-op-store through the field type. + if (asgn.op == .assign and !self.checkAssignable(src_ty, fl.ty, asgn.value.span, "assign", fa.field, asgn.value)) return; const coerced = self.coerceToType(val, src_ty, fl.ty); self.storeOrCompound(fl.ptr, coerced, asgn.op, fl.ty); } else { @@ -1295,6 +1407,7 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void { const fld_ty = tinfo.fields[fi]; const base = self.getExprAlloca(ie.object) orelse self.lowerExprAsPtr(ie.object); const gep = self.builder.structGepTyped(base, fi, self.module.types.ptrTo(fld_ty), obj_ty); + if (asgn.op == .assign and !self.checkAssignable(self.builder.getRefType(val), fld_ty, asgn.value.span, "assign", "element", asgn.value)) return; const coerced = self.coerceToType(val, self.builder.getRefType(val), fld_ty); self.storeOrCompound(gep, coerced, asgn.op, fld_ty); return; @@ -1310,6 +1423,10 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void { const idx = self.lowerExpr(ie.index); const elem_ty = self.ptrToArrayElem(obj_ty) orelse self.getElementType(obj_ty); const ptr_ty = self.module.types.ptrTo(elem_ty); + // Guard a width-mismatched `.none` store into an element slot + // (`arr[0] = "hi"` for an i32 array) — it would overrun the element + // and corrupt neighbors (issue 0197). Plain `=` only. + if (asgn.op == .assign and !self.checkAssignable(self.builder.getRefType(val), elem_ty, asgn.value.span, "assign", "element", asgn.value)) return; // For fixed-size array assignment targets, use the alloca pointer directly // so that the store modifies the original variable (not a loaded copy). const is_array = !obj_ty.isBuiltin() and self.module.types.get(obj_ty) == .array; @@ -1342,6 +1459,9 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void { break :blk ptr_ty; }; const val_ty = self.builder.getRefType(val); + // Guard a width-mismatched `.none` store through the pointer + // (`p.* = "hi"` for a `*i32`) — overruns the pointee (issue 0197). + if (!self.checkAssignable(val_ty, pointee_ty, asgn.value.span, "assign", "target", asgn.value)) return; const store_val = if (val_ty != pointee_ty and val_ty != .void and pointee_ty != .void) self.coerceToType(val, val_ty, pointee_ty) else @@ -1961,6 +2081,8 @@ pub fn lowerMultiAssign(self: *Lowering, ma: *const ast.MultiAssign) void { if (scope.lookup(id.name)) |binding| { if (binding.is_alloca) { const val_ty = self.builder.getRefType(val); + // Width-mismatched `.none` store guard (issue 0197). + if (!self.checkAssignable(val_ty, binding.ty, ma.values[i].span, "assign", id.name, ma.values[i])) continue; const store_val = if (val_ty != binding.ty and val_ty != .void and binding.ty != .void) self.coerceToType(val, val_ty, binding.ty) else @@ -1986,6 +2108,7 @@ pub fn lowerMultiAssign(self: *Lowering, ma: *const ast.MultiAssign) void { const base = self.getExprAlloca(ie.object) orelse self.lowerExprAsPtr(ie.object); const gep = self.builder.structGepTyped(base, fi, self.module.types.ptrTo(fld_ty), obj_ty); const v_ty = self.builder.getRefType(val); + if (!self.checkAssignable(v_ty, fld_ty, ma.values[i].span, "assign", "element", ma.values[i])) continue; const sv = if (v_ty != fld_ty and v_ty != .void and fld_ty != .void) self.coerceToType(val, v_ty, fld_ty) else val; self.builder.store(gep, sv); continue; @@ -2005,6 +2128,7 @@ pub fn lowerMultiAssign(self: *Lowering, ma: *const ast.MultiAssign) void { const elem_ty = self.ptrToArrayElem(obj_ty) orelse self.getElementType(obj_ty); const ptr_ty = self.module.types.ptrTo(elem_ty); const val_ty = self.builder.getRefType(val); + if (!self.checkAssignable(val_ty, elem_ty, ma.values[i].span, "assign", "element", ma.values[i])) continue; const store_val = if (val_ty != elem_ty and val_ty != .void and elem_ty != .void) self.coerceToType(val, val_ty, elem_ty) else @@ -2037,6 +2161,7 @@ pub fn lowerMultiAssign(self: *Lowering, ma: *const ast.MultiAssign) void { // (or panicked at LLVM emission). if (self.fieldLvaluePtr(obj_ptr, obj_ty, fa.field)) |r| { const val_ty = self.builder.getRefType(val); + if (!self.checkAssignable(val_ty, r.ty, ma.values[i].span, "assign", fa.field, ma.values[i])) continue; const store_val = if (val_ty != r.ty and val_ty != .void and r.ty != .void) self.coerceToType(val, val_ty, r.ty) else @@ -2057,6 +2182,7 @@ pub fn lowerMultiAssign(self: *Lowering, ma: *const ast.MultiAssign) void { break :blk ptr_ty; }; const val_ty = self.builder.getRefType(val); + if (!self.checkAssignable(val_ty, pointee_ty, ma.values[i].span, "assign", "target", ma.values[i])) continue; const store_val = if (val_ty != pointee_ty and val_ty != .void and pointee_ty != .void) self.coerceToType(val, val_ty, pointee_ty) else