fix: union struct-literal init (issue 0158)
A plain union initialized with a struct literal (b : Overlay = .{ f = 3.14 })
silently miscompiled — it fell through the generic struct-literal path
(getStructFields returns empty for a union), building a malformed structInit
whose overlapping zero-fill clobbered the named member, so it read back 0.0
(and a type-pun read segfaulted).
lowerStructLiteral now detects a plain-union target and dispatches to a new
lowerUnionLiteral, which writes each named member into a union-sized slot via
the same lvalue resolver the u.member = v assignment path uses, then loads the
union value back. Validity: the named members must share one arm — a single
direct member, or several promoted members of the same anonymous-struct variant.
Overlapping members, members from different arms, and positional union literals
are rejected with a diagnostic (no silent last-wins); an empty .{} yields an
undefined union (matching the --- form).
specs.md updated. Regressions: examples/types/0194 (valid forms) +
examples/diagnostics/1191 (overlap rejection).
This commit is contained in:
@@ -0,0 +1,13 @@
|
|||||||
|
// A union struct-literal may set only ONE arm — a single direct member, or
|
||||||
|
// several promoted members of the same anonymous-struct arm. Naming two
|
||||||
|
// members that overlay the same storage is a compile error (a later store
|
||||||
|
// would otherwise silently clobber an earlier one). This guards that
|
||||||
|
// diagnostic. (Companion: examples/types/0194 covers the valid forms.)
|
||||||
|
#import "modules/std.sx";
|
||||||
|
|
||||||
|
Overlay :: union { f: f32; i: i32; }
|
||||||
|
|
||||||
|
main :: () {
|
||||||
|
o : Overlay = .{ f = 3.14, i = 7 }; // ERROR: f and i overlay the same bytes
|
||||||
|
print("{}\n", o.i);
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
1
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
error: a union literal may set only one member, or several members of the same anonymous-struct arm — 'Overlay's members overlay the same storage
|
||||||
|
--> examples/diagnostics/1191-diagnostics-union-literal-overlap.sx:11:19
|
||||||
|
|
|
||||||
|
11 | o : Overlay = .{ f = 3.14, i = 7 }; // ERROR: f and i overlay the same bytes
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
36
examples/types/0194-types-union-literal-init.sx
Normal file
36
examples/types/0194-types-union-literal-init.sx
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
// Union struct-literal initialization — a union may be built with a struct
|
||||||
|
// literal that sets a SINGLE arm: one direct member, or several promoted
|
||||||
|
// members of the same anonymous-struct arm. Equivalent to the `--- `+per-field
|
||||||
|
// form, and the two views stay consistent (type-punning still works).
|
||||||
|
//
|
||||||
|
// Regression (issue 0158): a union literal previously fell through the generic
|
||||||
|
// struct-literal path (getStructFields returns empty for a union), building a
|
||||||
|
// malformed structInit whose overlapping zero-fill clobbered the named member —
|
||||||
|
// `b : Overlay = .{ f = 3.14 }` silently read back 0.0. Now it lowers like the
|
||||||
|
// `--- `+member-write form: each named member is written into a union-sized slot.
|
||||||
|
#import "modules/std.sx";
|
||||||
|
|
||||||
|
Overlay :: union { f: f32; i: i32; }
|
||||||
|
Vec2 :: union { data: [2]f32; struct { x, y: f32; }; }
|
||||||
|
|
||||||
|
main :: () {
|
||||||
|
// Single-member literal — the bug case. Reads back the value, and a
|
||||||
|
// type-punning read of the other member sees the same bytes.
|
||||||
|
b : Overlay = .{ f = 3.14 };
|
||||||
|
print("b.f={} b.i={}\n", b.f, b.i);
|
||||||
|
|
||||||
|
// Equivalence: the `--- `+member-write form produces identical bytes.
|
||||||
|
a : Overlay = ---;
|
||||||
|
a.f = 3.14;
|
||||||
|
print("a.f={} a.i={}\n", a.f, a.i);
|
||||||
|
|
||||||
|
// Promoted members of the anonymous-struct arm: both belong to the same
|
||||||
|
// arm (offsets 0 and 4), so naming both is valid; the `data` view overlays.
|
||||||
|
v : Vec2 = .{ x = 1.0, y = 2.0 };
|
||||||
|
print("v.x={} v.y={} d0={} d1={}\n", v.x, v.y, v.data[0], v.data[1]);
|
||||||
|
|
||||||
|
// Empty literal → undefined union (same as `---`); set a member after.
|
||||||
|
e : Overlay = .{};
|
||||||
|
e.i = 9;
|
||||||
|
print("e.i={}\n", e.i);
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
0
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
b.f=3.140000 b.i=1078523331
|
||||||
|
a.f=3.140000 a.i=1078523331
|
||||||
|
v.x=1.000000 v.y=2.000000 d0=1.000000 d1=2.000000
|
||||||
|
e.i=9
|
||||||
95
issues/0158-union-struct-literal-silently-miscompiles.md
Normal file
95
issues/0158-union-struct-literal-silently-miscompiles.md
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
# issue 0158 — a union struct-literal `.{ member = v }` silently miscompiles (wrong value / segfault) instead of being rejected
|
||||||
|
|
||||||
|
> **RESOLVED.** Root cause: a plain `union` literal fell through the generic
|
||||||
|
> struct-literal path (`getStructFields` returns empty for a union →
|
||||||
|
> `lowerStructLiteral` built a malformed `structInit` whose overlapping
|
||||||
|
> zero-fill clobbered the named member). Fix: chose to MAKE IT WORK (vs reject) —
|
||||||
|
> `lowerStructLiteral` now detects a plain-union target and dispatches to a new
|
||||||
|
> `lowerUnionLiteral` (`src/ir/lower/stmt.zig`) that writes each named member
|
||||||
|
> into a union-sized slot via the same lvalue resolver the `u.member = v`
|
||||||
|
> assignment path uses, then loads the union value back. Validity: the named
|
||||||
|
> members must share ONE arm (a single direct member, or several promoted
|
||||||
|
> members of the same anonymous-struct variant) — naming overlapping members, or
|
||||||
|
> members from different arms, is rejected with a diagnostic (no silent
|
||||||
|
> last-wins); a positional union literal is rejected as ambiguous; `.{}` yields
|
||||||
|
> an undefined union. specs.md §Union/Initialization updated. Regression:
|
||||||
|
> `examples/types/0194-types-union-literal-init.sx` (valid forms) +
|
||||||
|
> `examples/diagnostics/1191-diagnostics-union-literal-overlap.sx` (rejection).
|
||||||
|
|
||||||
|
## Symptom
|
||||||
|
|
||||||
|
A union initialized with a **struct literal** is silently accepted by the
|
||||||
|
compiler and produces the **wrong value** — with no diagnostic.
|
||||||
|
|
||||||
|
specs.md (§Union Types → Initialization) is explicit:
|
||||||
|
|
||||||
|
> Unions must be initialized with `---` (undefined) and then assigned per-field.
|
||||||
|
|
||||||
|
So a `.{ member = v }` literal is not a valid union initializer. But instead of
|
||||||
|
rejecting it, the compiler miscompiles it:
|
||||||
|
|
||||||
|
```
|
||||||
|
uninit form (correct): 3.140000 ← a : Overlay = ---; a.f = 3.14;
|
||||||
|
literal form (wrong): 0.000000 ← b : Overlay = .{ f = 3.14 }; (should be 3.14, or an error)
|
||||||
|
```
|
||||||
|
|
||||||
|
Observed: the named member's value is dropped (reads back `0.0`). A
|
||||||
|
type-punning read after the literal (`print("{}", b.i)`) additionally
|
||||||
|
**segfaults**, indicating the literal store corrupts/zeroes the slot rather than
|
||||||
|
writing the named member — the same silent-frame-corruption class as issue 0154.
|
||||||
|
|
||||||
|
Expected: either (preferred) a clean compile-time diagnostic — "a union must be
|
||||||
|
initialized with `--- ` then assigned per-field (see specs.md); struct-literal
|
||||||
|
init is not supported for unions" — or correct lowering that stores `v` into the
|
||||||
|
named member. A silently-wrong value (and a conditional segfault) is the
|
||||||
|
forbidden silent-corruption outcome.
|
||||||
|
|
||||||
|
## Reproduction
|
||||||
|
|
||||||
|
```sx
|
||||||
|
#import "modules/std.sx";
|
||||||
|
Overlay :: union { f: f32; i: i32; }
|
||||||
|
main :: () -> i64 {
|
||||||
|
a : Overlay = ---; // spec-mandated form — correct
|
||||||
|
a.f = 3.14;
|
||||||
|
print("correct: {}\n", a.f); // 3.140000
|
||||||
|
|
||||||
|
b : Overlay = .{ f = 3.14 }; // union struct-literal — silently MISCOMPILES
|
||||||
|
print("wrong: {}\n", b.f); // 0.000000 ← bug
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
(repro: `issues/0158-union-struct-literal-silently-miscompiles.sx`)
|
||||||
|
|
||||||
|
## Investigation prompt
|
||||||
|
|
||||||
|
> The sx compiler silently miscompiles a union initialized with a struct literal
|
||||||
|
> (`b : Overlay = .{ f = 3.14 }` reads back `0.0` instead of `3.14`; a
|
||||||
|
> type-punning read afterwards segfaults). Per specs.md (§Union Types →
|
||||||
|
> Initialization) unions MUST be initialized with `--- ` then assigned per-field,
|
||||||
|
> so a struct literal is not a valid union initializer — but it is currently
|
||||||
|
> accepted and miscompiled rather than diagnosed. Repro:
|
||||||
|
> `issues/0158-union-struct-literal-silently-miscompiles.sx`.
|
||||||
|
>
|
||||||
|
> Trace the struct-literal lowering path (`src/ir/lower/` — the `.struct_literal`
|
||||||
|
> arm in expr/stmt lowering, and `lowerAssignment` in `src/ir/lower/stmt.zig`
|
||||||
|
> where a `name : T = .{...}` decl is lowered). At the point the literal's target
|
||||||
|
> type is known, check whether it resolves to a **union** TypeId
|
||||||
|
> (`module.types.get(ty) == .union_type` or equivalent). Decide the intended
|
||||||
|
> behavior:
|
||||||
|
> - **Preferred (matches the spec):** emit a diagnostic via
|
||||||
|
> `self.diagnostics.addFmt(.err, span, "a union must be initialized with `--- `
|
||||||
|
> then assigned per-field; struct-literal init is not supported for unions
|
||||||
|
> (see specs.md)", .{})` and do not lower the bad store. This makes the spec
|
||||||
|
> rule enforced instead of silently violated.
|
||||||
|
> - **Alternative (if union literals are wanted later):** lower a single-member
|
||||||
|
> union literal correctly — store the one named member at offset 0 with the
|
||||||
|
> member's type/size (NOT a whole-union-sized zero/aggregate store, which is
|
||||||
|
> what currently drops the value and corrupts the slot — cf. issue 0154's
|
||||||
|
> oversized-store class). Reject a literal naming ≥2 overlapping members.
|
||||||
|
>
|
||||||
|
> Verify: `sx run` the repro — expect either a clean compile error (preferred) or
|
||||||
|
> `wrong: 3.140000`, never a silent `0.0` and never a segfault. If diagnosing,
|
||||||
|
> add a `1xxx-diagnostics-union-struct-literal-rejected` example; if lowering,
|
||||||
|
> promote the repro to a regression under `examples/types/`.
|
||||||
20
specs.md
20
specs.md
@@ -452,12 +452,26 @@ Vec2 :: union {
|
|||||||
Access promoted members directly: `v.x`, `v.y` — these are zero-cost GEPs into the same underlying memory as `v.data[0]`, `v.data[1]`.
|
Access promoted members directly: `v.x`, `v.y` — these are zero-cost GEPs into the same underlying memory as `v.data[0]`, `v.data[1]`.
|
||||||
|
|
||||||
#### Initialization
|
#### Initialization
|
||||||
Unions must be initialized with `---` (undefined) and then assigned per-field:
|
A union may be initialized either with `---` (undefined) then assigned
|
||||||
|
per-field, or with a struct literal that sets a **single arm**:
|
||||||
```sx
|
```sx
|
||||||
o :Overlay = ---;
|
o :Overlay = ---; // undefined, then set a member
|
||||||
o.f = 3.14;
|
o.f = 3.14;
|
||||||
print("{}\n", o.i); // reinterpret bits as i32
|
print("{}\n", o.i); // reinterpret bits as i32
|
||||||
|
|
||||||
|
p :Overlay = .{ f = 3.14 }; // equivalent single-member literal
|
||||||
```
|
```
|
||||||
|
Because a union's members overlay the same storage, a literal may name **only
|
||||||
|
one member** — or, for a union with an anonymous-struct arm, several members of
|
||||||
|
the *same* arm (they do not overlap each other):
|
||||||
|
```sx
|
||||||
|
v :Vec2 = .{ x = 1.0, y = 2.0 }; // OK — both belong to the { x, y } arm
|
||||||
|
```
|
||||||
|
Naming two overlapping members (`.{ f = 3.14, i = 7 }`) or members from
|
||||||
|
different arms (`.{ data = ..., x = ... }`) is a compile error — the literal
|
||||||
|
would otherwise silently let a later store clobber an earlier one. An empty
|
||||||
|
`.{}` yields an undefined union (same as `---`). A positional union literal
|
||||||
|
(`.{ 3.14 }`) is rejected as ambiguous.
|
||||||
|
|
||||||
#### Restrictions
|
#### Restrictions
|
||||||
- Pattern matching (`if x == { case ... }`) is not supported on unions.
|
- Pattern matching (`if x == { case ... }`) is not supported on unions.
|
||||||
|
|||||||
@@ -1661,6 +1661,7 @@ pub const Lowering = struct {
|
|||||||
pub const lowerAssignment = lower_stmt.lowerAssignment;
|
pub const lowerAssignment = lower_stmt.lowerAssignment;
|
||||||
pub const fieldLvalueResolve = lower_stmt.fieldLvalueResolve;
|
pub const fieldLvalueResolve = lower_stmt.fieldLvalueResolve;
|
||||||
pub const fieldLvaluePtr = lower_stmt.fieldLvaluePtr;
|
pub const fieldLvaluePtr = lower_stmt.fieldLvaluePtr;
|
||||||
|
pub const lowerUnionLiteral = lower_stmt.lowerUnionLiteral;
|
||||||
pub const diagTaggedUnionVariantWrite = lower_stmt.diagTaggedUnionVariantWrite;
|
pub const diagTaggedUnionVariantWrite = lower_stmt.diagTaggedUnionVariantWrite;
|
||||||
pub const lowerExprAsPtr = lower_stmt.lowerExprAsPtr;
|
pub const lowerExprAsPtr = lower_stmt.lowerExprAsPtr;
|
||||||
pub const storeOrCompound = lower_stmt.storeOrCompound;
|
pub const storeOrCompound = lower_stmt.storeOrCompound;
|
||||||
|
|||||||
@@ -86,6 +86,15 @@ pub fn lowerStructLiteral(self: *Lowering, sl: *const ast.StructLiteral, span: a
|
|||||||
self.resolveTypeWithBindings(te)
|
self.resolveTypeWithBindings(te)
|
||||||
else self.target_type orelse .unresolved;
|
else self.target_type orelse .unresolved;
|
||||||
|
|
||||||
|
// Plain (untagged) union target: build by writing each named member into a
|
||||||
|
// union-sized slot. `getStructFields` returns empty for a union, so the
|
||||||
|
// generic struct path below would emit a malformed `structInit` whose
|
||||||
|
// overlapping zero-fill clobbers the named member (issue 0158). Tagged
|
||||||
|
// unions were already handled above.
|
||||||
|
if (!ty.isBuiltin() and self.module.types.get(ty) == .@"union") {
|
||||||
|
return self.lowerUnionLiteral(sl, ty, span);
|
||||||
|
}
|
||||||
|
|
||||||
// Get struct field types for coercion and ordering
|
// Get struct field types for coercion and ordering
|
||||||
const struct_fields = self.getStructFields(ty);
|
const struct_fields = self.getStructFields(ty);
|
||||||
|
|
||||||
|
|||||||
@@ -1004,6 +1004,77 @@ pub fn fieldLvaluePtr(self: *Lowering, obj_ptr: Ref, obj_ty: TypeId, field: []co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lower a plain (untagged) `union` struct-literal `.{ member = value, ... }`.
|
||||||
|
/// The generic struct-literal path can't build a union — `getStructFields`
|
||||||
|
/// returns empty for a union, so a union literal would fall through to a
|
||||||
|
/// malformed `structInit` whose overlapping zero-fill clobbers the named member
|
||||||
|
/// (issue 0158). Instead, mirror the spec's `--- `+per-field form: write each
|
||||||
|
/// named member into an (otherwise-undefined) union-sized slot via the SAME
|
||||||
|
/// lvalue resolver the assignment path uses, then load the union value back.
|
||||||
|
///
|
||||||
|
/// Validity: union members overlay one storage slot, so the named members must
|
||||||
|
/// all belong to ONE arm — either a single direct member (`.{ f = 3.14 }`) or
|
||||||
|
/// several promoted members of the SAME anonymous-struct variant
|
||||||
|
/// (`.{ x = 1.0, y = 2.0 }`). Naming two direct members, or members from
|
||||||
|
/// different arms, would silently let a later store clobber an earlier one —
|
||||||
|
/// reject it loudly (no silent last-wins). `tagged_union`s never reach here
|
||||||
|
/// (handled earlier in `lowerStructLiteral`).
|
||||||
|
pub fn lowerUnionLiteral(self: *Lowering, sl: *const ast.StructLiteral, ty: TypeId, span: ast.Span) Ref {
|
||||||
|
// Empty `.{}` → an undefined union value (matches the spec's `--- ` form;
|
||||||
|
// `zeroValue` of a union is `constUndef`).
|
||||||
|
if (sl.field_inits.len == 0) return self.zeroValue(ty);
|
||||||
|
|
||||||
|
// Validate every member is named and all share one arm.
|
||||||
|
const Arm = struct { promoted: bool, index: u32 };
|
||||||
|
var arm: ?Arm = null;
|
||||||
|
for (sl.field_inits) |fi| {
|
||||||
|
const fname = fi.name orelse {
|
||||||
|
if (self.diagnostics) |d|
|
||||||
|
d.addFmt(.err, span, "a union literal must name its member(s): `.{{ member = value }}` (positional union init is ambiguous)", .{});
|
||||||
|
return self.zeroValue(ty);
|
||||||
|
};
|
||||||
|
const res = self.fieldLvalueResolve(ty, fname) orelse {
|
||||||
|
_ = self.emitFieldError(ty, fname, span);
|
||||||
|
return self.zeroValue(ty);
|
||||||
|
};
|
||||||
|
const cur: Arm = switch (res) {
|
||||||
|
.union_direct => |u| .{ .promoted = false, .index = u.index },
|
||||||
|
.union_promoted => |u| .{ .promoted = true, .index = u.variant_index },
|
||||||
|
// A union name never resolves to `.indexed`, but be safe rather
|
||||||
|
// than silently mis-store.
|
||||||
|
.indexed => {
|
||||||
|
_ = self.emitFieldError(ty, fname, span);
|
||||||
|
return self.zeroValue(ty);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
if (arm) |a| {
|
||||||
|
// Allowed only when BOTH are promoted members of the SAME variant.
|
||||||
|
if (!a.promoted or !cur.promoted or a.index != cur.index) {
|
||||||
|
if (self.diagnostics) |d|
|
||||||
|
d.addFmt(.err, span, "a union literal may set only one member, or several members of the same anonymous-struct arm — '{s}'s members overlay the same storage", .{self.formatTypeName(ty)});
|
||||||
|
return self.zeroValue(ty);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
arm = cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct: write each member at its lvalue into an undefined union slot.
|
||||||
|
const slot = self.builder.alloca(ty);
|
||||||
|
for (sl.field_inits) |fi| {
|
||||||
|
const fname = fi.name.?; // validated above
|
||||||
|
const member_ty = (self.fieldLvalueResolve(ty, fname) orelse unreachable).valueType();
|
||||||
|
const saved_tt = self.target_type;
|
||||||
|
self.target_type = member_ty;
|
||||||
|
const val = self.lowerExpr(fi.value);
|
||||||
|
self.target_type = saved_tt;
|
||||||
|
const fl = self.fieldLvaluePtr(slot, ty, fname) orelse unreachable;
|
||||||
|
const coerced = self.coerceToType(val, self.builder.getRefType(val), fl.ty);
|
||||||
|
self.builder.store(fl.ptr, coerced);
|
||||||
|
}
|
||||||
|
return self.builder.load(slot, ty);
|
||||||
|
}
|
||||||
|
|
||||||
/// True (and emits the diagnostic) when `obj.field` names a DIRECT variant of a
|
/// True (and emits the diagnostic) when `obj.field` names a DIRECT variant of a
|
||||||
/// tagged union — a store target that would set the payload but NOT the tag
|
/// tagged union — a store target that would set the payload but NOT the tag
|
||||||
/// (issue 0136): a tagged union is laid out `{ tag, payload }`, the write path
|
/// (issue 0136): a tagged union is laid out `{ tag, payload }`, the write path
|
||||||
|
|||||||
Reference in New Issue
Block a user