diff --git a/examples/diagnostics/1191-diagnostics-union-literal-overlap.sx b/examples/diagnostics/1191-diagnostics-union-literal-overlap.sx new file mode 100644 index 00000000..d7ea06de --- /dev/null +++ b/examples/diagnostics/1191-diagnostics-union-literal-overlap.sx @@ -0,0 +1,13 @@ +// A union struct-literal may set only ONE arm — a single direct member, or +// several promoted members of the same anonymous-struct arm. Naming two +// members that overlay the same storage is a compile error (a later store +// would otherwise silently clobber an earlier one). This guards that +// diagnostic. (Companion: examples/types/0194 covers the valid forms.) +#import "modules/std.sx"; + +Overlay :: union { f: f32; i: i32; } + +main :: () { + o : Overlay = .{ f = 3.14, i = 7 }; // ERROR: f and i overlay the same bytes + print("{}\n", o.i); +} diff --git a/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.exit b/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.exit @@ -0,0 +1 @@ +1 diff --git a/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.stderr b/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.stderr new file mode 100644 index 00000000..9726559a --- /dev/null +++ b/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.stderr @@ -0,0 +1,5 @@ +error: a union literal may set only one member, or several members of the same anonymous-struct arm — 'Overlay's members overlay the same storage + --> examples/diagnostics/1191-diagnostics-union-literal-overlap.sx:11:19 + | +11 | o : Overlay = .{ f = 3.14, i = 7 }; // ERROR: f and i overlay the same bytes + | ^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.stdout b/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/diagnostics/expected/1191-diagnostics-union-literal-overlap.stdout @@ -0,0 +1 @@ + diff --git a/examples/types/0194-types-union-literal-init.sx b/examples/types/0194-types-union-literal-init.sx new file mode 100644 index 00000000..9e6c3d59 --- /dev/null +++ b/examples/types/0194-types-union-literal-init.sx @@ -0,0 +1,36 @@ +// Union struct-literal initialization — a union may be built with a struct +// literal that sets a SINGLE arm: one direct member, or several promoted +// members of the same anonymous-struct arm. Equivalent to the `--- `+per-field +// form, and the two views stay consistent (type-punning still works). +// +// Regression (issue 0158): a union literal previously fell through the generic +// struct-literal path (getStructFields returns empty for a union), building a +// malformed structInit whose overlapping zero-fill clobbered the named member — +// `b : Overlay = .{ f = 3.14 }` silently read back 0.0. Now it lowers like the +// `--- `+member-write form: each named member is written into a union-sized slot. +#import "modules/std.sx"; + +Overlay :: union { f: f32; i: i32; } +Vec2 :: union { data: [2]f32; struct { x, y: f32; }; } + +main :: () { + // Single-member literal — the bug case. Reads back the value, and a + // type-punning read of the other member sees the same bytes. + b : Overlay = .{ f = 3.14 }; + print("b.f={} b.i={}\n", b.f, b.i); + + // Equivalence: the `--- `+member-write form produces identical bytes. + a : Overlay = ---; + a.f = 3.14; + print("a.f={} a.i={}\n", a.f, a.i); + + // Promoted members of the anonymous-struct arm: both belong to the same + // arm (offsets 0 and 4), so naming both is valid; the `data` view overlays. + v : Vec2 = .{ x = 1.0, y = 2.0 }; + print("v.x={} v.y={} d0={} d1={}\n", v.x, v.y, v.data[0], v.data[1]); + + // Empty literal → undefined union (same as `---`); set a member after. + e : Overlay = .{}; + e.i = 9; + print("e.i={}\n", e.i); +} diff --git a/examples/types/expected/0194-types-union-literal-init.exit b/examples/types/expected/0194-types-union-literal-init.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/types/expected/0194-types-union-literal-init.exit @@ -0,0 +1 @@ +0 diff --git a/examples/types/expected/0194-types-union-literal-init.stderr b/examples/types/expected/0194-types-union-literal-init.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0194-types-union-literal-init.stderr @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0194-types-union-literal-init.stdout b/examples/types/expected/0194-types-union-literal-init.stdout new file mode 100644 index 00000000..cc6fb13f --- /dev/null +++ b/examples/types/expected/0194-types-union-literal-init.stdout @@ -0,0 +1,4 @@ +b.f=3.140000 b.i=1078523331 +a.f=3.140000 a.i=1078523331 +v.x=1.000000 v.y=2.000000 d0=1.000000 d1=2.000000 +e.i=9 diff --git a/issues/0158-union-struct-literal-silently-miscompiles.md b/issues/0158-union-struct-literal-silently-miscompiles.md new file mode 100644 index 00000000..128b3b37 --- /dev/null +++ b/issues/0158-union-struct-literal-silently-miscompiles.md @@ -0,0 +1,95 @@ +# issue 0158 — a union struct-literal `.{ member = v }` silently miscompiles (wrong value / segfault) instead of being rejected + +> **RESOLVED.** Root cause: a plain `union` literal fell through the generic +> struct-literal path (`getStructFields` returns empty for a union → +> `lowerStructLiteral` built a malformed `structInit` whose overlapping +> zero-fill clobbered the named member). Fix: chose to MAKE IT WORK (vs reject) — +> `lowerStructLiteral` now detects a plain-union target and dispatches to a new +> `lowerUnionLiteral` (`src/ir/lower/stmt.zig`) that writes each named member +> into a union-sized slot via the same lvalue resolver the `u.member = v` +> assignment path uses, then loads the union value back. Validity: the named +> members must share ONE arm (a single direct member, or several promoted +> members of the same anonymous-struct variant) — naming overlapping members, or +> members from different arms, is rejected with a diagnostic (no silent +> last-wins); a positional union literal is rejected as ambiguous; `.{}` yields +> an undefined union. specs.md §Union/Initialization updated. Regression: +> `examples/types/0194-types-union-literal-init.sx` (valid forms) + +> `examples/diagnostics/1191-diagnostics-union-literal-overlap.sx` (rejection). + +## Symptom + +A union initialized with a **struct literal** is silently accepted by the +compiler and produces the **wrong value** — with no diagnostic. + +specs.md (§Union Types → Initialization) is explicit: + +> Unions must be initialized with `---` (undefined) and then assigned per-field. + +So a `.{ member = v }` literal is not a valid union initializer. But instead of +rejecting it, the compiler miscompiles it: + +``` +uninit form (correct): 3.140000 ← a : Overlay = ---; a.f = 3.14; +literal form (wrong): 0.000000 ← b : Overlay = .{ f = 3.14 }; (should be 3.14, or an error) +``` + +Observed: the named member's value is dropped (reads back `0.0`). A +type-punning read after the literal (`print("{}", b.i)`) additionally +**segfaults**, indicating the literal store corrupts/zeroes the slot rather than +writing the named member — the same silent-frame-corruption class as issue 0154. + +Expected: either (preferred) a clean compile-time diagnostic — "a union must be +initialized with `--- ` then assigned per-field (see specs.md); struct-literal +init is not supported for unions" — or correct lowering that stores `v` into the +named member. A silently-wrong value (and a conditional segfault) is the +forbidden silent-corruption outcome. + +## Reproduction + +```sx +#import "modules/std.sx"; +Overlay :: union { f: f32; i: i32; } +main :: () -> i64 { + a : Overlay = ---; // spec-mandated form — correct + a.f = 3.14; + print("correct: {}\n", a.f); // 3.140000 + + b : Overlay = .{ f = 3.14 }; // union struct-literal — silently MISCOMPILES + print("wrong: {}\n", b.f); // 0.000000 ← bug + return 0; +} +``` + +(repro: `issues/0158-union-struct-literal-silently-miscompiles.sx`) + +## Investigation prompt + +> The sx compiler silently miscompiles a union initialized with a struct literal +> (`b : Overlay = .{ f = 3.14 }` reads back `0.0` instead of `3.14`; a +> type-punning read afterwards segfaults). Per specs.md (§Union Types → +> Initialization) unions MUST be initialized with `--- ` then assigned per-field, +> so a struct literal is not a valid union initializer — but it is currently +> accepted and miscompiled rather than diagnosed. Repro: +> `issues/0158-union-struct-literal-silently-miscompiles.sx`. +> +> Trace the struct-literal lowering path (`src/ir/lower/` — the `.struct_literal` +> arm in expr/stmt lowering, and `lowerAssignment` in `src/ir/lower/stmt.zig` +> where a `name : T = .{...}` decl is lowered). At the point the literal's target +> type is known, check whether it resolves to a **union** TypeId +> (`module.types.get(ty) == .union_type` or equivalent). Decide the intended +> behavior: +> - **Preferred (matches the spec):** emit a diagnostic via +> `self.diagnostics.addFmt(.err, span, "a union must be initialized with `--- ` +> then assigned per-field; struct-literal init is not supported for unions +> (see specs.md)", .{})` and do not lower the bad store. This makes the spec +> rule enforced instead of silently violated. +> - **Alternative (if union literals are wanted later):** lower a single-member +> union literal correctly — store the one named member at offset 0 with the +> member's type/size (NOT a whole-union-sized zero/aggregate store, which is +> what currently drops the value and corrupts the slot — cf. issue 0154's +> oversized-store class). Reject a literal naming ≥2 overlapping members. +> +> Verify: `sx run` the repro — expect either a clean compile error (preferred) or +> `wrong: 3.140000`, never a silent `0.0` and never a segfault. If diagnosing, +> add a `1xxx-diagnostics-union-struct-literal-rejected` example; if lowering, +> promote the repro to a regression under `examples/types/`. diff --git a/specs.md b/specs.md index e310aeec..13a4527b 100644 --- a/specs.md +++ b/specs.md @@ -452,12 +452,26 @@ Vec2 :: union { Access promoted members directly: `v.x`, `v.y` — these are zero-cost GEPs into the same underlying memory as `v.data[0]`, `v.data[1]`. #### Initialization -Unions must be initialized with `---` (undefined) and then assigned per-field: +A union may be initialized either with `---` (undefined) then assigned +per-field, or with a struct literal that sets a **single arm**: ```sx -o :Overlay = ---; +o :Overlay = ---; // undefined, then set a member o.f = 3.14; -print("{}\n", o.i); // reinterpret bits as i32 +print("{}\n", o.i); // reinterpret bits as i32 + +p :Overlay = .{ f = 3.14 }; // equivalent single-member literal ``` +Because a union's members overlay the same storage, a literal may name **only +one member** — or, for a union with an anonymous-struct arm, several members of +the *same* arm (they do not overlap each other): +```sx +v :Vec2 = .{ x = 1.0, y = 2.0 }; // OK — both belong to the { x, y } arm +``` +Naming two overlapping members (`.{ f = 3.14, i = 7 }`) or members from +different arms (`.{ data = ..., x = ... }`) is a compile error — the literal +would otherwise silently let a later store clobber an earlier one. An empty +`.{}` yields an undefined union (same as `---`). A positional union literal +(`.{ 3.14 }`) is rejected as ambiguous. #### Restrictions - Pattern matching (`if x == { case ... }`) is not supported on unions. diff --git a/src/ir/lower.zig b/src/ir/lower.zig index 13ffb6dd..ba82f29a 100644 --- a/src/ir/lower.zig +++ b/src/ir/lower.zig @@ -1661,6 +1661,7 @@ pub const Lowering = struct { pub const lowerAssignment = lower_stmt.lowerAssignment; pub const fieldLvalueResolve = lower_stmt.fieldLvalueResolve; pub const fieldLvaluePtr = lower_stmt.fieldLvaluePtr; + pub const lowerUnionLiteral = lower_stmt.lowerUnionLiteral; pub const diagTaggedUnionVariantWrite = lower_stmt.diagTaggedUnionVariantWrite; pub const lowerExprAsPtr = lower_stmt.lowerExprAsPtr; pub const storeOrCompound = lower_stmt.storeOrCompound; diff --git a/src/ir/lower/expr.zig b/src/ir/lower/expr.zig index 7008f6dd..96fd955d 100644 --- a/src/ir/lower/expr.zig +++ b/src/ir/lower/expr.zig @@ -86,6 +86,15 @@ pub fn lowerStructLiteral(self: *Lowering, sl: *const ast.StructLiteral, span: a self.resolveTypeWithBindings(te) else self.target_type orelse .unresolved; + // Plain (untagged) union target: build by writing each named member into a + // union-sized slot. `getStructFields` returns empty for a union, so the + // generic struct path below would emit a malformed `structInit` whose + // overlapping zero-fill clobbers the named member (issue 0158). Tagged + // unions were already handled above. + if (!ty.isBuiltin() and self.module.types.get(ty) == .@"union") { + return self.lowerUnionLiteral(sl, ty, span); + } + // Get struct field types for coercion and ordering const struct_fields = self.getStructFields(ty); diff --git a/src/ir/lower/stmt.zig b/src/ir/lower/stmt.zig index ce8d1647..8f7a3da8 100644 --- a/src/ir/lower/stmt.zig +++ b/src/ir/lower/stmt.zig @@ -1004,6 +1004,77 @@ pub fn fieldLvaluePtr(self: *Lowering, obj_ptr: Ref, obj_ty: TypeId, field: []co } } +/// Lower a plain (untagged) `union` struct-literal `.{ member = value, ... }`. +/// The generic struct-literal path can't build a union — `getStructFields` +/// returns empty for a union, so a union literal would fall through to a +/// malformed `structInit` whose overlapping zero-fill clobbers the named member +/// (issue 0158). Instead, mirror the spec's `--- `+per-field form: write each +/// named member into an (otherwise-undefined) union-sized slot via the SAME +/// lvalue resolver the assignment path uses, then load the union value back. +/// +/// Validity: union members overlay one storage slot, so the named members must +/// all belong to ONE arm — either a single direct member (`.{ f = 3.14 }`) or +/// several promoted members of the SAME anonymous-struct variant +/// (`.{ x = 1.0, y = 2.0 }`). Naming two direct members, or members from +/// different arms, would silently let a later store clobber an earlier one — +/// reject it loudly (no silent last-wins). `tagged_union`s never reach here +/// (handled earlier in `lowerStructLiteral`). +pub fn lowerUnionLiteral(self: *Lowering, sl: *const ast.StructLiteral, ty: TypeId, span: ast.Span) Ref { + // Empty `.{}` → an undefined union value (matches the spec's `--- ` form; + // `zeroValue` of a union is `constUndef`). + if (sl.field_inits.len == 0) return self.zeroValue(ty); + + // Validate every member is named and all share one arm. + const Arm = struct { promoted: bool, index: u32 }; + var arm: ?Arm = null; + for (sl.field_inits) |fi| { + const fname = fi.name orelse { + if (self.diagnostics) |d| + d.addFmt(.err, span, "a union literal must name its member(s): `.{{ member = value }}` (positional union init is ambiguous)", .{}); + return self.zeroValue(ty); + }; + const res = self.fieldLvalueResolve(ty, fname) orelse { + _ = self.emitFieldError(ty, fname, span); + return self.zeroValue(ty); + }; + const cur: Arm = switch (res) { + .union_direct => |u| .{ .promoted = false, .index = u.index }, + .union_promoted => |u| .{ .promoted = true, .index = u.variant_index }, + // A union name never resolves to `.indexed`, but be safe rather + // than silently mis-store. + .indexed => { + _ = self.emitFieldError(ty, fname, span); + return self.zeroValue(ty); + }, + }; + if (arm) |a| { + // Allowed only when BOTH are promoted members of the SAME variant. + if (!a.promoted or !cur.promoted or a.index != cur.index) { + if (self.diagnostics) |d| + d.addFmt(.err, span, "a union literal may set only one member, or several members of the same anonymous-struct arm — '{s}'s members overlay the same storage", .{self.formatTypeName(ty)}); + return self.zeroValue(ty); + } + } else { + arm = cur; + } + } + + // Construct: write each member at its lvalue into an undefined union slot. + const slot = self.builder.alloca(ty); + for (sl.field_inits) |fi| { + const fname = fi.name.?; // validated above + const member_ty = (self.fieldLvalueResolve(ty, fname) orelse unreachable).valueType(); + const saved_tt = self.target_type; + self.target_type = member_ty; + const val = self.lowerExpr(fi.value); + self.target_type = saved_tt; + const fl = self.fieldLvaluePtr(slot, ty, fname) orelse unreachable; + const coerced = self.coerceToType(val, self.builder.getRefType(val), fl.ty); + self.builder.store(fl.ptr, coerced); + } + return self.builder.load(slot, ty); +} + /// True (and emits the diagnostic) when `obj.field` names a DIRECT variant of a /// tagged union — a store target that would set the payload but NOT the tag /// (issue 0136): a tagged union is laid out `{ tag, payload }`, the write path