fix: reject direct assignment to a tagged-union variant member
A tagged union (enum-with-payload) is laid out { tag, payload }, but a
direct member write `s.rect = payload` lowered to a payload-only store
(union_gep into field 1) with no tag store — the discriminant went stale,
so a later match/== took the wrong arm with no diagnostic (issue 0136).
The read path already distinguishes tagged unions (enum_payload/enum_tag);
the write path treated them like plain unions.
A variant is set via construction (`s = .variant(payload)`, which writes
both tag and payload). A direct member write can't safely set the tag (the
active variant isn't known at the write site), so it is now rejected with a
diagnostic pointing to construction. A new diagTaggedUnionVariantWrite guard
— reusing the shared fieldLvalueResolve matcher, applied at both store sites
(lowerAssignment, lowerMultiAssign) — fires only for a whole-variant write
on a tagged union. Plain `union` writes and nested sub-field writes
(`s.rect.w = ...`) are unaffected.
Resolves issue 0136. Tests: examples/0185 (rejected), 0186 (nested write +
construction still work). specs.md / readme.md updated.
This commit is contained in:
20
examples/0185-types-tagged-union-member-assign-rejected.sx
Normal file
20
examples/0185-types-tagged-union-member-assign-rejected.sx
Normal file
@@ -0,0 +1,20 @@
|
||||
// A direct write to a tagged-union (enum-with-payload) variant member is
|
||||
// rejected: a tagged union is laid out `{ tag, payload }`, and a member write
|
||||
// would set the payload but leave the tag stale. The variant is set via
|
||||
// construction (`s = .rect(...)`), which writes both tag and payload.
|
||||
//
|
||||
// Regression (issue 0136): `s.rect = .{...}` used to silently store the payload
|
||||
// only, desyncing the tag so a later `match` took the wrong arm. It now errors.
|
||||
|
||||
#import "modules/std.sx";
|
||||
|
||||
Shape :: enum {
|
||||
circle: f32;
|
||||
rect: struct { w, h: f32; };
|
||||
}
|
||||
|
||||
main :: () {
|
||||
s : Shape = .circle(1.0);
|
||||
s.rect = .{ w = 4.0, h = 2.0 }; // rejected — use `s = .rect(.{...})` instead
|
||||
print("unreachable: {}\n", s.rect.w);
|
||||
}
|
||||
22
examples/0186-types-tagged-union-nested-field-write.sx
Normal file
22
examples/0186-types-tagged-union-nested-field-write.sx
Normal file
@@ -0,0 +1,22 @@
|
||||
// A write to a sub-field of a tagged-union variant's payload (`s.rect.w = ...`)
|
||||
// is NOT rejected: the immediate object is the payload struct, so it mutates a
|
||||
// field of the already-active variant in place and leaves the tag alone. This
|
||||
// pins the scope of issue 0136's guard — only a WHOLE-variant member write
|
||||
// (`s.rect = ...`) is rejected; nested sub-field writes keep working.
|
||||
|
||||
#import "modules/std.sx";
|
||||
|
||||
Shape :: enum {
|
||||
circle: f32;
|
||||
rect: struct { w, h: f32; };
|
||||
}
|
||||
|
||||
main :: () {
|
||||
s : Shape = .rect(.{ w = 1.0, h = 2.0 });
|
||||
s.rect.w = 9.0; // nested sub-field write — allowed
|
||||
r := s.rect;
|
||||
print("w={} h={}\n", r.w, r.h); // 9 2
|
||||
|
||||
s = .circle(3.5); // construction reassign — allowed
|
||||
print("c={}\n", s.circle); // 3.5
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
1
|
||||
@@ -0,0 +1,5 @@
|
||||
error: cannot assign to tagged-union variant 'rect' directly — a member write sets the payload but leaves the tag stale; construct the variant instead (e.g. `x = .rect(...)`)
|
||||
--> examples/0185-types-tagged-union-member-assign-rejected.sx:18:5
|
||||
|
|
||||
18 | s.rect = .{ w = 4.0, h = 2.0 }; // rejected — use `s = .rect(.{...})` instead
|
||||
| ^^^^^^
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
0
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
w=9.000000 h=2.000000
|
||||
c=3.500000
|
||||
139
issues/0136-tagged-union-member-write-does-not-set-tag.md
Normal file
139
issues/0136-tagged-union-member-write-does-not-set-tag.md
Normal file
@@ -0,0 +1,139 @@
|
||||
# 0136 — direct write to a tagged-union member updates the payload but not the tag
|
||||
|
||||
> **RESOLVED (2026-06-13).** Root cause: the read path distinguishes a tagged
|
||||
> union (`enum_payload`/`enum_tag`, field 1/field 0) but the write path treated
|
||||
> it like a plain union (`fieldLvalueResolve` → `.union_direct` → `union_gep`),
|
||||
> storing the payload (field 1) with no tag store — so the discriminant went
|
||||
> stale. Fix (chosen option 1 — reject; the spec only blesses construction /
|
||||
> read / match for tagged unions, and no corpus relied on member writes): a new
|
||||
> `diagTaggedUnionVariantWrite` guard (`src/ir/lower/stmt.zig`, reusing the
|
||||
> shared `fieldLvalueResolve` matcher, registered in `src/ir/lower.zig`) rejects
|
||||
> a direct whole-variant member assignment at both store sites (`lowerAssignment`
|
||||
> and `lowerMultiAssign`) with a diagnostic pointing to `s = .variant(...)`.
|
||||
> Plain `union` writes and nested sub-field writes (`s.rect.w = ...`) are
|
||||
> unaffected (they don't resolve to `.union_direct` on a tagged union).
|
||||
> Regression tests: `examples/0185-types-tagged-union-member-assign-rejected.sx`
|
||||
> (rejected), `examples/0186-types-tagged-union-nested-field-write.sx` (nested
|
||||
> write + construction still work). Spec/readme updated (enum section).
|
||||
|
||||
## Symptom
|
||||
|
||||
One-line: `s.rect = .{ ... }` on a tagged union (`enum`-with-payload) stores
|
||||
into the payload area but leaves the discriminant (tag) untouched, so a later
|
||||
`match`/`==` reads the STALE tag while the payload holds the new variant — a
|
||||
silent tag/payload desync with no diagnostic.
|
||||
|
||||
- **Observed:** after `s : Shape = .circle(1.0); s.rect = .{ w=4, h=2 };`, a
|
||||
`match` on `s` takes the `.circle` arm (tag never updated) even though the
|
||||
payload now holds the rect. The wrong-variant payload read (`s.rect`) returns
|
||||
the written bytes, masking the inconsistency.
|
||||
- **Expected:** either a compile error directing the user to construction
|
||||
(`s = .rect(...)`), or the member write sets the tag too so `s` becomes the
|
||||
`.rect` variant and `match` sees `.rect`.
|
||||
|
||||
Same-variant write is fine (`s.circle = 9.0` while the tag is already
|
||||
`circle`): the payload updates and the tag already matched. Only a write whose
|
||||
variant differs from the current tag desyncs — and the compiler can't know the
|
||||
runtime tag at the write site, so the danger is inherent to the operation.
|
||||
|
||||
## Reproduction
|
||||
|
||||
`issues/0136-tagged-union-member-write-does-not-set-tag.sx` (standalone, only
|
||||
`modules/std.sx`):
|
||||
|
||||
```sx
|
||||
#import "modules/std.sx";
|
||||
|
||||
Shape :: enum {
|
||||
circle: f32;
|
||||
rect: struct { w, h: f32; };
|
||||
}
|
||||
|
||||
main :: () {
|
||||
s : Shape = .circle(1.0); // tag = circle, payload = 1.0
|
||||
s.rect = .{ w = 4.0, h = 2.0 }; // writes rect payload; tag stays circle
|
||||
if s == {
|
||||
case .circle: print("tag=circle (STALE — wrote rect)\n");
|
||||
case .rect: print("tag=rect (correct)\n");
|
||||
}
|
||||
r := s.rect;
|
||||
print("rect.w={} rect.h={}\n", r.w, r.h);
|
||||
}
|
||||
```
|
||||
|
||||
Run: `./zig-out/bin/sx run issues/0136-tagged-union-member-write-does-not-set-tag.sx`
|
||||
→ today prints `tag=circle (STALE — wrote rect)` then `rect.w=4 rect.h=2`.
|
||||
|
||||
## Root cause (traced)
|
||||
|
||||
A tagged union is laid out `{ tag (field 0), payload (field 1) }`
|
||||
(`src/ir/types.zig` sizeOf: `tag_sz + max_field`). The READ and WRITE paths
|
||||
treat it asymmetrically:
|
||||
|
||||
- **Read** distinguishes a tagged union: `lowerFieldAccess`
|
||||
(`src/ir/lower/expr.zig`) emits `enum_payload` → `emitEnumPayload`
|
||||
(`src/backend/llvm/ops.zig:1392`) GEPs **field 1** (payload); `.tag` /
|
||||
`==` use `enum_tag` (field 0).
|
||||
- **Write** treats a tagged union like a plain union: `fieldLvalueResolve`
|
||||
(`src/ir/lower/stmt.zig`) maps a tagged-union member to `.union_direct` →
|
||||
`fieldLvaluePtr` emits `union_gep` → `emitUnionGep`
|
||||
(`src/backend/llvm/ops.zig:1430`) GEPs **field 1** (payload) and stores there.
|
||||
|
||||
So the payload OFFSET is correct (both use field 1 — not an out-of-bounds /
|
||||
clobber bug), but the write never emits the tag (field 0) store that
|
||||
construction does. Construction `s = .rect(...)` lowers via `enum_init`
|
||||
(`src/ir/inst.zig:170`), which writes BOTH tag and payload; the member-write
|
||||
path emits only the payload store.
|
||||
|
||||
## Investigation prompt
|
||||
|
||||
> A direct write to a tagged-union member (`s.rect = .{...}`) updates the
|
||||
> payload but not the discriminant, silently desyncing tag and payload. Repro:
|
||||
> `issues/0136-tagged-union-member-write-does-not-set-tag.sx` (today prints the
|
||||
> STALE tag; the fix should make it either a compile error or set the tag).
|
||||
>
|
||||
> Root: `fieldLvalueResolve` (`src/ir/lower/stmt.zig`) maps a tagged-union
|
||||
> member to `.union_direct`, so `fieldLvaluePtr` emits a bare `union_gep`
|
||||
> (payload pointer, field 1) and `lowerAssignment` stores only the payload. The
|
||||
> tag (field 0) is never written. Construction (`enum_init`) writes both.
|
||||
>
|
||||
> Two reasonable fixes (pick one — both beat the silent desync):
|
||||
> 1. **Reject** direct assignment to a tagged-union variant member with a
|
||||
> diagnostic ("set a tagged-union variant via `s = .rect(...)`; direct
|
||||
> member assignment can't set the discriminant"). Simplest and safe — the
|
||||
> construction syntax already covers the intent, and the compiler can't
|
||||
> know the runtime tag to validate a same-variant write anyway. Detect in
|
||||
> `lowerAssignment`'s `.field_access` arm when the object type is a
|
||||
> `tagged_union` and the field is a variant.
|
||||
> 2. **Set the tag too**: make `s.rect = payload` equivalent to
|
||||
> `s = .rect(payload)` — emit a tag store (the variant discriminant, which
|
||||
> `fieldLvalueResolve` already knows as the variant index) alongside the
|
||||
> payload store. More ergonomic but more plumbing (the assignment path
|
||||
> must emit two stores, and compound ops like `s.rect.w += 1` need
|
||||
> thought). Mirror how `enum_init` sets the tag.
|
||||
> Do NOT leave the silent payload-only write. Note the read/write asymmetry
|
||||
> (read uses `enum_payload`, write uses `union_gep`) is the structural root;
|
||||
> whichever fix, keep plain `union` (no tag) working — only `tagged_union`
|
||||
> needs the new behavior.
|
||||
>
|
||||
> Verification: the repro errors (option 1) or `match` sees `.rect` (option 2);
|
||||
> then `zig build && zig build test` green. Add coverage under
|
||||
> `examples/01xx-types-...` (a tagged-union member write: rejected, or
|
||||
> tag-setting round-trips through `match`).
|
||||
|
||||
## Notes
|
||||
|
||||
- PRE-EXISTING and orthogonal to issues 0133 / 0135. Surfaced while reviewing
|
||||
the 0133 fix (which unified the lvalue field resolver): the read path
|
||||
special-cases `tagged_union` but the write path does not. The 0133 fix itself
|
||||
is about PLAIN `union` (no tag) and did not introduce or change this — it
|
||||
preserved the existing `union_direct → union_gep` routing for tagged-union
|
||||
members.
|
||||
- Related read-side gap (probably same fix family, not verified here): reading
|
||||
the WRONG variant (`s.rect` while tag is `circle`) also returns raw payload
|
||||
bytes with no tag check (`emitEnumPayload` doesn't check the discriminant).
|
||||
A variant-safe accessor / checked read is a separate consideration.
|
||||
- Sites: write — `fieldLvalueResolve`/`fieldLvaluePtr` (`src/ir/lower/stmt.zig`),
|
||||
`emitUnionGep` (`src/backend/llvm/ops.zig:1430`); read — `emitEnumPayload`
|
||||
(`src/backend/llvm/ops.zig:1392`); construction — `enum_init`
|
||||
(`src/ir/inst.zig:170`); layout — `src/ir/types.zig` (tagged_union sizeOf).
|
||||
@@ -254,6 +254,11 @@ Perms :: enum flags { read; write; execute; }
|
||||
rw := Perms.read | Perms.write;
|
||||
```
|
||||
|
||||
Set a variant by construction (`s = .circle(2.0)`), which writes the tag and
|
||||
payload together. Direct member assignment to a variant (`s.circle = 2.0`) is
|
||||
rejected — it would set the payload but not the tag. Mutating a sub-field of the
|
||||
active variant in place (`s.rect.w = 9.0`) is fine.
|
||||
|
||||
### Optionals
|
||||
|
||||
```sx
|
||||
|
||||
8
specs.md
8
specs.md
@@ -394,6 +394,14 @@ s = Shape.rect(42); // explicit prefix
|
||||
r := s.circle; // load payload as f32 (undefined behavior if wrong variant active)
|
||||
```
|
||||
|
||||
#### Setting a Variant
|
||||
A variant is set by construction — `s = .rect(payload)` — which writes both the
|
||||
tag and the payload together. Direct member assignment to a variant
|
||||
(`s.rect = payload`) is **rejected at compile time**: it would store the payload
|
||||
but not the tag, leaving the two desynced so a later `match` takes the wrong arm.
|
||||
Mutating a sub-field of the *active* variant's payload in place is allowed
|
||||
(`s.rect.w = 9.0`).
|
||||
|
||||
#### Pattern Matching
|
||||
```sx
|
||||
if s == {
|
||||
|
||||
@@ -1609,6 +1609,7 @@ pub const Lowering = struct {
|
||||
pub const lowerAssignment = lower_stmt.lowerAssignment;
|
||||
pub const fieldLvalueResolve = lower_stmt.fieldLvalueResolve;
|
||||
pub const fieldLvaluePtr = lower_stmt.fieldLvaluePtr;
|
||||
pub const diagTaggedUnionVariantWrite = lower_stmt.diagTaggedUnionVariantWrite;
|
||||
pub const lowerExprAsPtr = lower_stmt.lowerExprAsPtr;
|
||||
pub const storeOrCompound = lower_stmt.storeOrCompound;
|
||||
pub const emitCompoundOp = lower_stmt.emitCompoundOp;
|
||||
|
||||
@@ -741,6 +741,10 @@ pub fn lowerAssignment(self: *Lowering, asgn: *const ast.Assignment) void {
|
||||
}
|
||||
}
|
||||
|
||||
// Reject a direct write to a tagged-union variant (issue 0136): it
|
||||
// sets the payload but not the tag. Construct via `x = .variant(...)`.
|
||||
if (self.diagTaggedUnionVariantWrite(obj_ty, fa.field, asgn.target.span)) return;
|
||||
|
||||
// Special .len/.ptr handling only for slices, strings, arrays — NOT structs
|
||||
const is_special_container = obj_ty == .string or (if (!obj_ty.isBuiltin()) blk: {
|
||||
const obj_info = self.module.types.get(obj_ty);
|
||||
@@ -993,6 +997,33 @@ pub fn fieldLvaluePtr(self: *Lowering, obj_ptr: Ref, obj_ty: TypeId, field: []co
|
||||
}
|
||||
}
|
||||
|
||||
/// True (and emits the diagnostic) when `obj.field` names a DIRECT variant of a
|
||||
/// tagged union — a store target that would set the payload but NOT the tag
|
||||
/// (issue 0136): a tagged union is laid out `{ tag, payload }`, the write path
|
||||
/// emits a `union_gep` into the payload only, so the discriminant goes stale and
|
||||
/// a later `match`/`==` takes the wrong arm. The variant is set via construction
|
||||
/// (`x = .variant(...)`, which writes both), so a direct member write is rejected.
|
||||
///
|
||||
/// Returns false (keeps working) for: plain `union` (no tag); promoted / nested
|
||||
/// sub-field writes (`s.rect.w = ...`, where the immediate object is the payload
|
||||
/// struct, resolving to `.indexed`/`.union_promoted`, not `.union_direct`); and
|
||||
/// non-aggregates. Derefs one pointer level so a `*TaggedUnion` receiver is
|
||||
/// caught too. Uses the shared `fieldLvalueResolve` matcher, so the guard can't
|
||||
/// drift from the store path's notion of which member a name resolves to.
|
||||
pub fn diagTaggedUnionVariantWrite(self: *Lowering, obj_ty: TypeId, field: []const u8, span: ast.Span) bool {
|
||||
var ty = obj_ty;
|
||||
if (!ty.isBuiltin()) {
|
||||
const info = self.module.types.get(ty);
|
||||
if (info == .pointer) ty = info.pointer.pointee;
|
||||
}
|
||||
if (ty.isBuiltin() or self.module.types.get(ty) != .tagged_union) return false;
|
||||
const res = self.fieldLvalueResolve(ty, field) orelse return false;
|
||||
if (res != .union_direct) return false;
|
||||
if (self.diagnostics) |d|
|
||||
d.addFmt(.err, span, "cannot assign to tagged-union variant '{s}' directly — a member write sets the payload but leaves the tag stale; construct the variant instead (e.g. `x = .{s}(...)`)", .{ field, field });
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Get the pointer (alloca ref) for an lvalue expression, without loading.
|
||||
pub fn lowerExprAsPtr(self: *Lowering, node: *const Node) Ref {
|
||||
switch (node.data) {
|
||||
@@ -1287,6 +1318,8 @@ pub fn lowerMultiAssign(self: *Lowering, ma: *const ast.MultiAssign) void {
|
||||
.field_access => |fa| {
|
||||
const obj_ptr = self.lowerExprAsPtr(fa.object);
|
||||
const obj_ty = self.inferExprType(fa.object);
|
||||
// Reject a direct write to a tagged-union variant (issue 0136).
|
||||
if (self.diagTaggedUnionVariantWrite(obj_ty, fa.field, target.span)) continue;
|
||||
// Resolve the target field via the shared lvalue resolver —
|
||||
// the same one address-of uses — so a missing field emits a
|
||||
// diagnostic instead of defaulting to field 0 / field_ty
|
||||
|
||||
Reference in New Issue
Block a user