fix(ir): reflection builtins on an Any read its runtime tag, not payload [F0.8]

`type_name` / `type_is_unsigned` on an `Any` argument unconditionally read
the Any's payload as a TypeId index. That is correct only when the Any holds
a Type value (`{ .any, tid }`); for an Any holding a runtime *value*
(`av : Any = 6`, tag s64, payload 6) it returned `types[6]` — `type_name(av)`
gave "u8" and `type_is_unsigned(av)` gave true.

Both backends now branch on the Any's runtime type-tag: tag == `.any` → the
box is a Type value, use the payload as the TypeId; otherwise the tag IS the
held value's type. So `type_name(av)` → "s64", `type_is_unsigned(av)` → false,
while `type_name(type_of(x))` still names the held type. The `{}` formatter is
unchanged (it already passed `type_of(val)`, a proper Type value).

- src/ir/interp.zig: shared `Value.reflectTypeId` tag-branching resolver; the
  `type_name` / `type_is_unsigned` interp arms route through it.
- src/backend/llvm/ops.zig: shared `Ops.reflectArgTypeId` emits
  extractvalue-tag / icmp-eq-.any / select for the runtime path; both
  reflection arms route through it. The two backends agree.
- examples/0164-types-reflection-any-tag.sx: regression pinning type_name /
  type_is_unsigned / print on an Any holding a value vs a Type.
- src/ir/interp.test.zig: unit test for `reflectTypeId`.
- 22 .ir snapshots: the new select appears in every std-importing program's
  IR (any_to_string embeds these builtins) — benign, verified structurally
  identical apart from the three new instructions.
- issues/0090, specs.md: documented the Any-tag rule.
This commit is contained in:
agra
2026-06-05 12:09:52 +03:00
parent b053c64149
commit 5f64ee4426
31 changed files with 3379 additions and 3105 deletions

View File

@@ -35,6 +35,7 @@ const BlockParam = ir_inst.BlockParam;
const FieldReflect = ir_inst.FieldReflect;
const TypeId = ir_types.TypeId;
const StringId = ir_types.StringId;
const Ref = ir_inst.Ref;
/// Instruction-emission handlers for `emitInst`: every opcode group — the
/// constant, arithmetic, bitwise, comparison, logical, memory, globals,
@@ -972,6 +973,35 @@ pub const Ops = struct {
}
// ── Call extensions ───────────────────────────────────────
/// Resolve the `TypeId` (as a runtime `i64`) that a dynamic
/// `type_name` / `type_is_unsigned` must operate on. A reflection
/// builtin reads an `Any`'s runtime TYPE-TAG, never its raw payload:
/// - `.bare`: a `Type` value already lowered to a bare i64 `TypeId`
/// index (an unboxed direct call site) → the value itself.
/// - `.boxed`: an `Any` aggregate `{ tag, value }`. When the tag is
/// `.any`, the box carries a *Type value* (the `{ .any, tid }` shape
/// `const_type` / `type_of` produce) → the TypeId is the payload.
/// Otherwise the box carries a *runtime value* whose type IS the tag
/// → use the tag as the TypeId. This is what makes `type_name(av)`
/// for `av : Any = 6` report `s64` (the held value's type), while
/// `type_name(type_of(x))` still names the held type.
/// `.unresolved` is a hard tripwire: a type-resolution failure reached
/// emission without a diagnostic.
fn reflectArgTypeId(self: Ops, arg_ref: Ref, comptime label: []const u8) c.LLVMValueRef {
const arg_val = self.e.resolveRef(arg_ref);
return switch (self.e.reflectArgRepr(arg_ref)) {
.unresolved => @panic(label ++ ": reflection arg IR-type unresolved — a type-resolution failure reached LLVM emission without a diagnostic"),
.bare => arg_val,
.boxed => blk: {
const tag = c.LLVMBuildExtractValue(self.e.builder, arg_val, 0, "refl.tag");
const payload = c.LLVMBuildExtractValue(self.e.builder, arg_val, 1, "refl.val");
const any_tag = c.LLVMConstInt(self.e.cached_i64, @intCast(TypeId.any.index()), 0);
const holds_type = c.LLVMBuildICmp(self.e.builder, c.LLVMIntEQ, tag, any_tag, "refl.istype");
break :blk c.LLVMBuildSelect(self.e.builder, holds_type, payload, tag, "refl.tid");
},
};
}
pub fn emitCallBuiltin(self: Ops, instruction: *const Inst, bi: BuiltinCall) void {
// Builtins that map to libc functions or LLVM intrinsics
switch (bi.builtin) {
@@ -1010,26 +1040,12 @@ pub const Ops = struct {
self.e.advanceRefCounter();
},
.type_name => {
// Dynamic `type_name(t)` at runtime: extract
// the TypeId from the arg (an Any-boxed Type
// value: tag=`.s64.index()`, value=tid), GEP
// into the compiler-emitted `__sx_type_names`
// global, load the string. The arg's LLVM
// shape is the `{i64, i64}` Any aggregate
// (because the IR-side arg type is `.any`
// when boxed); for unboxed direct call sites
// (the arg IR type is `.s64` from
// `const_type`), the value IS the TypeId
// index directly.
const arg_ref = bi.args[0];
const arg_val = self.e.resolveRef(arg_ref);
const tid_idx = switch (self.e.reflectArgRepr(arg_ref)) {
.unresolved => @panic("type_name: reflection arg IR-type unresolved — a type-resolution failure reached LLVM emission without a diagnostic"),
// Boxed: extract value field from the Any aggregate.
.boxed => c.LLVMBuildExtractValue(self.e.builder, arg_val, 1, "tn.tid"),
// Bare i64 (TypeId index).
.bare => arg_val,
};
// Dynamic `type_name(t)` at runtime: resolve the TypeId
// the arg denotes (reading an `Any`'s runtime type-tag,
// not its payload — see `reflectArgTypeId`), GEP into the
// compiler-emitted `__sx_type_names` global, load the
// string.
const tid_idx = self.reflectArgTypeId(bi.args[0], "type_name");
const arr_global = self.e.reflection().getOrBuildTypeNameArray();
const arr_len = self.e.type_name_array_len;
const string_ty = self.e.getStringStructType();
@@ -1065,17 +1081,12 @@ pub const Ops = struct {
self.e.mapRef(eq_res);
},
.type_is_unsigned => {
// Dynamic `type_is_unsigned(t)`: extract the TypeId from
// the arg (Any-boxed Type → value field, or bare i64
// index), GEP into the `__sx_type_is_unsigned` table, load
// the i1. Mirrors the `type_name` runtime lookup.
const arg_ref = bi.args[0];
const arg_val = self.e.resolveRef(arg_ref);
const tid_idx = switch (self.e.reflectArgRepr(arg_ref)) {
.unresolved => @panic("type_is_unsigned: reflection arg IR-type unresolved — a type-resolution failure reached LLVM emission without a diagnostic"),
.boxed => c.LLVMBuildExtractValue(self.e.builder, arg_val, 1, "tiu.tid"),
.bare => arg_val,
};
// Dynamic `type_is_unsigned(t)`: resolve the TypeId the arg
// denotes (reading an `Any`'s runtime type-tag, not its
// payload — see `reflectArgTypeId`), GEP into the
// `__sx_type_is_unsigned` table, load the i1. Mirrors the
// `type_name` runtime lookup.
const tid_idx = self.reflectArgTypeId(bi.args[0], "type_is_unsigned");
const arr_global = self.e.reflection().getOrBuildTypeIsUnsignedArray();
const arr_len = self.e.type_is_unsigned_array_len;
const arr_ty = c.LLVMArrayType(self.e.cached_i1, arr_len);

View File

@@ -804,3 +804,38 @@ test "comptime: type_eq builtin on type_tag values" {
const result = try interp.call(FuncId.fromIndex(0), &.{});
try std.testing.expectEqual(true, result.asBool().?);
}
// ── Test: reflectTypeId reads an Any's runtime TYPE-TAG, not its payload ──
// A reflection builtin on an Any must report the type OF a held value (the
// tag) and only read the payload when the Any holds a Type value (tag ==
// `.any`). Regression for issue 0090 (attempt 3): a boxed value like
// `av : Any = 6` (`{ tag = s64, value = 6 }`) must resolve to `s64`, NOT
// `types[6]` (`u8`).
test "reflect: reflectTypeId branches on the Any tag" {
const any_idx: i64 = @intCast(TypeId.any.index());
// Native first-class Type value → the held TypeId directly.
try std.testing.expectEqual(@as(?TypeId, .u64), (Value{ .type_tag = .u64 }).reflectTypeId());
// Any holding a VALUE: `{ tag = s64, value = 6 }` → s64 (the tag),
// never `types[6]` (u8). This is the bug the fix closes.
var held_value = [_]Value{ .{ .int = @intCast(TypeId.s64.index()) }, .{ .int = 6 } };
try std.testing.expectEqual(@as(?TypeId, .s64), (Value{ .aggregate = &held_value }).reflectTypeId());
// Any holding a VALUE of an unsigned type: `{ tag = u32, value = 7 }` → u32.
var held_u32 = [_]Value{ .{ .int = @intCast(TypeId.u32.index()) }, .{ .int = 7 } };
try std.testing.expectEqual(@as(?TypeId, .u32), (Value{ .aggregate = &held_u32 }).reflectTypeId());
// Any holding a TYPE value (the `type_of(x)` / `const_type` shape):
// `{ tag = .any, value = u64 }` → u64 (the payload). Payload as a plain
// int (the runtime box shape) ...
var held_type_int = [_]Value{ .{ .int = any_idx }, .{ .int = @intCast(TypeId.u64.index()) } };
try std.testing.expectEqual(@as(?TypeId, .u64), (Value{ .aggregate = &held_type_int }).reflectTypeId());
// ... and payload as a `.type_tag` (the comptime box shape) → same result.
var held_type_tag = [_]Value{ .{ .int = any_idx }, .{ .type_tag = .u64 } };
try std.testing.expectEqual(@as(?TypeId, .u64), (Value{ .aggregate = &held_type_tag }).reflectTypeId());
// Neither shape → null (the caller bails loudly, never guesses a TypeId).
try std.testing.expectEqual(@as(?TypeId, null), (Value{ .int = 6 }).reflectTypeId());
}

View File

@@ -90,6 +90,34 @@ pub const Value = union(enum) {
};
}
/// Resolve the `TypeId` a dynamic `type_name` / `type_is_unsigned` must
/// operate on, honoring the rule that a reflection builtin reads an
/// `Any`'s runtime TYPE-TAG, never its raw payload:
/// - a native `.type_tag(tid)` Value → `tid` (a first-class Type value).
/// - an `Any` aggregate `{ tag, value }`: when the tag is `.any`, the
/// box carries a *Type value* (the `box_any(.., .any)` / `const_type`
/// shape) → the TypeId is the payload; otherwise the box carries a
/// *runtime value* whose type IS the tag → the tag is the TypeId. This
/// makes `type_name(av)` for `av : Any = 6` report `s64` (the held
/// value's type) while `type_name(type_of(x))` still names the type.
/// Returns null when `self` is neither shape (the caller bails loudly).
pub fn reflectTypeId(self: Value) ?TypeId {
if (self.asTypeId()) |t| return t;
if (self == .aggregate) {
const fields = self.aggregate;
if (fields.len >= 2) {
const tag = fields[0].asInt() orelse return null;
if (tag == @as(i64, @intCast(TypeId.any.index()))) {
if (fields[1].asTypeId()) |t| return t;
if (fields[1].asInt()) |iv| return TypeId.fromIndex(@intCast(iv));
return null;
}
return TypeId.fromIndex(@intCast(tag));
}
}
return null;
}
/// Get the string content, whether from a literal or a heap-backed string aggregate.
pub fn asString(self: Value, interp: *const Interpreter) ?[]const u8 {
return switch (self) {
@@ -1870,22 +1898,14 @@ pub const Interpreter = struct {
.type_name => {
if (bi.args.len < 1) return bailDetail("comptime type_name: missing argument");
const arg = frame.getRef(bi.args[0]);
// Accept either a bare `.type_tag` Value (the
// comptime-native form) or an Any-boxed Type
// (`.aggregate { tag: int, value: .type_tag }`)
// — the latter shape is what `box_any` produces
// when const_type values flow through a `.any`-typed
// slice or struct field.
const tid = blk: {
if (arg.asTypeId()) |t| break :blk t;
if (arg == .aggregate) {
const fields = arg.aggregate;
if (fields.len >= 2) {
if (fields[1].asTypeId()) |t| break :blk t;
}
}
return bailDetail("comptime type_name: argument is not a Type value (expected `.type_tag` or Any-boxed Type)");
};
// A bare `.type_tag` Value (the comptime-native form), an
// Any-boxed Type (`{ .any, tid }`), or an Any holding a
// runtime value (`{ tag, value }`, where the tag IS the
// value's type). `reflectTypeId` reads the runtime tag so
// `type_name(av)` for `av : Any = 6` names `s64`, not the
// type whose index equals the payload.
const tid = arg.reflectTypeId() orelse
return bailDetail("comptime type_name: argument is not a Type value or boxed value (expected `.type_tag` or Any aggregate)");
const name = self.module.types.typeName(tid);
// Copy the slice into the interp's allocator so it
// outlives any TypeTable churn during the rest of the
@@ -1903,22 +1923,14 @@ pub const Interpreter = struct {
.type_is_unsigned => {
if (bi.args.len < 1) return bailDetail("comptime type_is_unsigned: missing argument");
const arg = frame.getRef(bi.args[0]);
// Accept a bare `.type_tag`, an Any-boxed Type (`{tag,
// .type_tag}`), or the `type_of(x)` shape (`{.int(any),
// .int(typeid)}`) — the last is what `any_to_string`'s
// `case int:` passes, where the inner TypeId is carried
// as a plain integer rather than a `.type_tag`.
const tid = blk: {
if (arg.asTypeId()) |t| break :blk t;
if (arg == .aggregate) {
const fields = arg.aggregate;
if (fields.len >= 2) {
if (fields[1].asTypeId()) |t| break :blk t;
if (fields[1].asInt()) |iv| break :blk TypeId.fromIndex(@intCast(iv));
}
}
return bailDetail("comptime type_is_unsigned: argument is not a Type value (expected `.type_tag`, Any-boxed Type, or `type_of(x)`)");
};
// A bare `.type_tag`, an Any-boxed Type (`{ .any, tid }`,
// the `type_of(x)` shape), or an Any holding a runtime value
// (`{ tag, value }`, where the tag IS the value's type).
// `reflectTypeId` reads the runtime tag so
// `type_is_unsigned(av)` for `av : Any = 6` answers about
// `s64`, not the type whose index equals the payload.
const tid = arg.reflectTypeId() orelse
return bailDetail("comptime type_is_unsigned: argument is not a Type value or boxed value (expected `.type_tag` or Any aggregate)");
return .{ .value = .{ .boolean = self.module.types.isUnsignedInt(tid) } };
},
.has_impl => {