ERR/E3: error-tag {} interpolation via an always-linked tag-name table

`{}` on an error-set value printed `<?>` (any_to_string had no error_set
category). Now it renders the tag name (`BadDigit`), reusing the existing
any_to_string dispatch.

Pieces:
- New `error_tag_name_get` IR op (UnaryOp): tag id -> name. Lowered from a new
  `error_tag_name(e) -> string #builtin` (std.sx). Handled across inst.zig
  (op def), print.zig, interp.zig (comptime: tags.getName), and emit_llvm.zig.
- emit_llvm getOrBuildTagNameArray: an always-linked `[N x {ptr,i64}]` global
  of tag names indexed by global tag id (the TagRegistry namespace, slot 0 =
  ""). error_tag_name_get zext's the u32 tag value and GEPs into it. Built once;
  not trace-gated, so it works in release too (per the spec's "tag-name table
  always shipped").
- resolveTypeCategoryTags gains an `error_set` category so the
  `case error_set:` arm in any_to_string matches; that arm coerces the Any to
  u32 (`xx val`) and calls error_tag_name. (cast(type) didn't recover the tag
  id for error-set values; the u32 coercion does.)

examples/240-error-tag-interpolation.sx: bound tags + a catch-bound tag print
their names. Regenerated ffi-objc-call-06-sret-return.ir — pure block-renumber
drift from adding one if-arm to the shared any_to_string (verified
semantically identical after collapsing block numbers).

Gates: zig build, zig build test, bash tests/run_examples.sh (277 passed; lone
failure is the user's uncommitted 213-canonical-map pack WIP).
This commit is contained in:
agra
2026-06-01 07:47:32 +03:00
parent 6e32e6c63c
commit a3ff503f47
10 changed files with 684 additions and 569 deletions

View File

@@ -0,0 +1,29 @@
// Error-tag `{}` interpolation (ERR step E3 — tag-name table). Formatting an
// error-set value with `{}` renders the tag NAME (`BadDigit`), not the raw id,
// reusing the `any_to_string` dispatch (new `error_set` category → the
// `error_tag_name` builtin → the always-linked tag-name table indexed by global
// tag id). Works for a bound tag, a re-raised/caught tag, and inside text.
#import "modules/std.sx";
E :: error { BadDigit, Empty, Overflow }
parse :: (n: s32) -> (s32, !E) {
if n < 0 { raise error.BadDigit; }
if n == 0 { raise error.Empty; }
return n * 2;
}
main :: () -> s32 {
a : E = error.BadDigit;
b : E = error.Overflow;
print("a={} b={}\n", a, b); // a=BadDigit b=Overflow
// A tag bound by `catch` interpolates too (diverging handler).
v := parse(0) catch e {
print("parse failed with {}\n", e); // parse failed with Empty
return 0;
};
print("v={}\n", v); // not reached (parse(0) raises Empty)
return 0;
}

View File

@@ -22,6 +22,7 @@ field_value :: (s: $T, idx: s64) -> Any #builtin;
is_flags :: ($T: Type) -> bool #builtin; is_flags :: ($T: Type) -> bool #builtin;
field_value_int :: ($T: Type, idx: s64) -> s64 #builtin; field_value_int :: ($T: Type, idx: s64) -> s64 #builtin;
field_index :: ($T: Type, val: T) -> s64 #builtin; field_index :: ($T: Type, val: T) -> s64 #builtin;
error_tag_name :: (e: $T) -> string #builtin;
string :: []u8 #builtin; string :: []u8 #builtin;
#import "allocators.sx"; #import "allocators.sx";
@@ -314,6 +315,7 @@ any_to_string :: (val: Any) -> string {
case float: result = float_to_string(xx val); case float: result = float_to_string(xx val);
case struct: result = struct_to_string(cast(type) val); case struct: result = struct_to_string(cast(type) val);
case enum: result = enum_to_string(cast(type) val); case enum: result = enum_to_string(cast(type) val);
case error_set: { tagid : u32 = xx val; result = error_tag_name(tagid); }
case vector: result = vector_to_string(cast(type) val); case vector: result = vector_to_string(cast(type) val);
case array: result = array_to_string(cast(type) val); case array: result = array_to_string(cast(type) val);
case slice: result = slice_to_string(cast(type) val); case slice: result = slice_to_string(cast(type) val);

View File

@@ -143,6 +143,8 @@ pub const LLVMEmitter = struct {
// Cached field name arrays for reflection (TypeId → LLVM global) // Cached field name arrays for reflection (TypeId → LLVM global)
field_name_arrays: std.AutoHashMap(u32, c.LLVMValueRef), field_name_arrays: std.AutoHashMap(u32, c.LLVMValueRef),
// The always-linked tag-name table (global tag id → name); built once.
tag_name_array: ?c.LLVMValueRef = null,
// Lazy global `[N x string]` indexed by TypeId.index(), holding // Lazy global `[N x string]` indexed by TypeId.index(), holding
// each type's display name. Built on the first dynamic // each type's display name. Built on the first dynamic
@@ -3304,6 +3306,23 @@ pub const LLVMEmitter = struct {
// Switch on index, each case: extractvalue field k → box as Any // Switch on index, each case: extractvalue field k → box as Any
self.emitFieldValueGet(fr, func_idx); self.emitFieldValueGet(fr, func_idx);
}, },
.error_tag_name_get => |u| {
// Tag id → name: GEP into the always-linked tag-name table at
// the runtime tag id (the error-set value, a u32). Out-of-range
// ids can't occur — ids come from the same registry the table
// is built from — so no bounds branch is needed.
const global = self.getOrBuildTagNameArray();
const tag_raw = self.resolveRef(u.operand);
const idx = c.LLVMBuildZExt(self.builder, tag_raw, self.cached_i64, "etn.idx");
const string_ty = self.getStringStructType();
const n: u32 = @intCast(self.ir_mod.types.tags.names.items.len);
const array_ty = c.LLVMArrayType(string_ty, n);
const zero = c.LLVMConstInt(self.cached_i64, 0, 0);
var indices = [2]c.LLVMValueRef{ zero, idx };
const gep = c.LLVMBuildInBoundsGEP2(self.builder, array_ty, global, &indices, 2, "etn.gep");
const result = c.LLVMBuildLoad2(self.builder, string_ty, gep, "etn.load");
self.mapRef(result);
},
// ── Switch branch ──────────────────────────────────────── // ── Switch branch ────────────────────────────────────────
.switch_br => |sw| { .switch_br => |sw| {
@@ -4662,6 +4681,44 @@ pub const LLVMEmitter = struct {
return global; return global;
} }
/// The always-linked tag-name table: a `[N x {ptr, i64}]` global of tag
/// names indexed by global tag id (the `TagRegistry` namespace; slot 0 is
/// the reserved "" no-error name). `error_tag_name_get` GEPs into it at the
/// runtime tag id. Built once per module. Always emitted (not trace-gated)
/// so `{}` interpolation of an error tag works even in release builds.
fn getOrBuildTagNameArray(self: *LLVMEmitter) c.LLVMValueRef {
if (self.tag_name_array) |g| return g;
const string_ty = self.getStringStructType();
const names = self.ir_mod.types.tags.names.items;
var field_vals = std.ArrayList(c.LLVMValueRef).empty;
defer field_vals.deinit(self.alloc);
for (names) |name_str| {
const str_z = self.alloc.dupeZ(u8, name_str) catch unreachable;
defer self.alloc.free(str_z);
const global_str = c.LLVMAddGlobal(self.llvm_module, c.LLVMArrayType(self.cached_i8, @intCast(name_str.len + 1)), "tag.str");
c.LLVMSetInitializer(global_str, c.LLVMConstStringInContext(self.context, str_z.ptr, @intCast(name_str.len + 1), 1));
c.LLVMSetGlobalConstant(global_str, 1);
c.LLVMSetLinkage(global_str, c.LLVMPrivateLinkage);
const len_val = c.LLVMConstInt(self.cached_i64, name_str.len, 0);
var struct_fields = [2]c.LLVMValueRef{ global_str, len_val };
const const_struct = c.LLVMConstStructInContext(self.context, &struct_fields, 2, 0);
field_vals.append(self.alloc, const_struct) catch unreachable;
}
const n: u32 = @intCast(names.len);
const array_ty = c.LLVMArrayType(string_ty, n);
const array_init = c.LLVMConstArray(string_ty, field_vals.items.ptr, n);
const global = c.LLVMAddGlobal(self.llvm_module, array_ty, "tag_names");
c.LLVMSetInitializer(global, array_init);
c.LLVMSetGlobalConstant(global, 1);
c.LLVMSetLinkage(global, c.LLVMPrivateLinkage);
self.tag_name_array = global;
return global;
}
/// Emit field_value_get: switch on runtime index, each case extracts a field and boxes it as Any. /// Emit field_value_get: switch on runtime index, each case extracts a field and boxes it as Any.
fn emitFieldValueGet(self: *LLVMEmitter, fr: ir_inst.FieldReflect, func_idx: u32) void { fn emitFieldValueGet(self: *LLVMEmitter, fr: ir_inst.FieldReflect, func_idx: u32) void {
const base_val = self.resolveRef(fr.base); const base_val = self.resolveRef(fr.base);

View File

@@ -219,6 +219,7 @@ pub const Op = union(enum) {
// ── Reflection ───────────────────────────────────────────────── // ── Reflection ─────────────────────────────────────────────────
field_name_get: FieldReflect, // field_name(T, i) → string (runtime index) field_name_get: FieldReflect, // field_name(T, i) → string (runtime index)
field_value_get: FieldReflect, // field_value(s, i) → Any (runtime struct + index) field_value_get: FieldReflect, // field_value(s, i) → Any (runtime struct + index)
error_tag_name_get: UnaryOp, // error_tag_name(e) → string (runtime tag id → name, via the always-linked tag-name table)
// ── Terminators ───────────────────────────────────────────────── // ── Terminators ─────────────────────────────────────────────────
br: Branch, br: Branch,

View File

@@ -1227,6 +1227,14 @@ pub const Interpreter = struct {
const name = self.module.types.getString(fields[idx].name); const name = self.module.types.getString(fields[idx].name);
return .{ .value = .{ .string = name } }; return .{ .value = .{ .string = name } };
}, },
.error_tag_name_get => |u| {
const tag_val = frame.getRef(u.operand);
const id: u32 = @intCast(switch (tag_val) {
.int => |i| i,
else => return bailDetail("comptime error_tag_name(e): operand is not an integer tag id"),
});
return .{ .value = .{ .string = self.module.types.tags.getName(id) } };
},
.field_value_get => |fr| { .field_value_get => |fr| {
const base_val = frame.getRef(fr.base); const base_val = frame.getRef(fr.base);
const idx_val = frame.getRef(fr.index); const idx_val = frame.getRef(fr.index);

View File

@@ -10271,6 +10271,13 @@ pub const Lowering = struct {
.struct_type = ty, .struct_type = ty,
} }, .string); } }, .string);
} }
if (std.mem.eql(u8, name, "error_tag_name")) {
// error_tag_name(e) → look the error-set value's runtime tag id up
// in the always-linked tag-name table. The value IS its u32 tag id.
if (c.args.len < 1) return self.builder.constString(self.module.types.internString(""));
const e = self.lowerExpr(c.args[0]);
return self.builder.emit(.{ .error_tag_name_get = .{ .operand = e } }, .string);
}
if (std.mem.eql(u8, name, "field_value")) { if (std.mem.eql(u8, name, "field_value")) {
// field_value(s, i) → field_value_get instruction (structs/unions) // field_value(s, i) → field_value_get instruction (structs/unions)
// → index_get + box_any (slices/arrays) // → index_get + box_any (slices/arrays)
@@ -10892,7 +10899,7 @@ pub const Lowering = struct {
} }
// Dynamic categories: scan TypeTable for matching types // Dynamic categories: scan TypeTable for matching types
const Category = enum { @"struct", @"enum", @"union", slice, array, pointer, vector, optional }; const Category = enum { @"struct", @"enum", @"union", slice, array, pointer, vector, optional, error_set };
const cat: ?Category = if (std.mem.eql(u8, name, "struct")) const cat: ?Category = if (std.mem.eql(u8, name, "struct"))
.@"struct" .@"struct"
else if (std.mem.eql(u8, name, "enum") or std.mem.eql(u8, name, "union")) else if (std.mem.eql(u8, name, "enum") or std.mem.eql(u8, name, "union"))
@@ -10907,6 +10914,8 @@ pub const Lowering = struct {
.vector .vector
else if (std.mem.eql(u8, name, "optional")) else if (std.mem.eql(u8, name, "optional"))
.optional .optional
else if (std.mem.eql(u8, name, "error_set"))
.error_set
else else
null; null;
@@ -10921,6 +10930,7 @@ pub const Lowering = struct {
.pointer => info == .pointer or info == .many_pointer, .pointer => info == .pointer or info == .many_pointer,
.vector => info == .vector, .vector => info == .vector,
.optional => info == .optional, .optional => info == .optional,
.error_set => info == .error_set,
}; };
if (matches) { if (matches) {
tags.append(self.alloc, @intCast(idx)) catch {}; tags.append(self.alloc, @intCast(idx)) catch {};
@@ -13797,6 +13807,7 @@ pub const Lowering = struct {
if (std.mem.eql(u8, bare_name, "field_count")) return .s64; if (std.mem.eql(u8, bare_name, "field_count")) return .s64;
if (std.mem.eql(u8, bare_name, "field_index")) return .s64; if (std.mem.eql(u8, bare_name, "field_index")) return .s64;
if (std.mem.eql(u8, bare_name, "field_name")) return .string; if (std.mem.eql(u8, bare_name, "field_name")) return .string;
if (std.mem.eql(u8, bare_name, "error_tag_name")) return .string;
if (std.mem.eql(u8, bare_name, "is_flags")) return .bool; if (std.mem.eql(u8, bare_name, "is_flags")) return .bool;
if (std.mem.eql(u8, bare_name, "type_of")) return .any; if (std.mem.eql(u8, bare_name, "type_of")) return .any;
if (std.mem.eql(u8, bare_name, "field_value")) return .any; if (std.mem.eql(u8, bare_name, "field_value")) return .any;

View File

@@ -359,6 +359,7 @@ fn printInst(instruction: *const Inst, ref_idx: u32, tt: *const TypeTable, write
// ── Reflection ────────────────────────────────────────── // ── Reflection ──────────────────────────────────────────
.field_name_get => |fr| try writer.print("field_name_get T{d}[%{d}] : ", .{ fr.struct_type.index(), fr.index.index() }), .field_name_get => |fr| try writer.print("field_name_get T{d}[%{d}] : ", .{ fr.struct_type.index(), fr.index.index() }),
.field_value_get => |fr| try writer.print("field_value_get %{d}, T{d}[%{d}] : ", .{ fr.base.index(), fr.struct_type.index(), fr.index.index() }), .field_value_get => |fr| try writer.print("field_value_get %{d}, T{d}[%{d}] : ", .{ fr.base.index(), fr.struct_type.index(), fr.index.index() }),
.error_tag_name_get => |u| try writer.print("error_tag_name_get %{d} : ", .{u.operand.index()}),
// ── Terminators ───────────────────────────────────────── // ── Terminators ─────────────────────────────────────────
.br => |b| { .br => |b| {

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1,2 @@
a=BadDigit b=Overflow
parse failed with Empty

File diff suppressed because it is too large Load Diff