fix(std): render integer formatter extremes — i64::MIN and unsigned all-ones [F0.8]
Resolves issue 0090. The `{}` integer formatter mis-rendered both ends of
the 64-bit range:
- `int_to_string` computed the magnitude as `0 - n`, which overflows for
`s64::MIN` (its magnitude is unrepresentable as a positive s64) — the
value stayed negative, the digit loop ran zero times, so only `-`
printed. It now extracts digits straight from `n` (per-digit
`|n % 10|`, `n` truncating toward zero), never negating MIN.
- `any_to_string`'s `case int:` formatted every integer as s64, so a u64
all-ones value printed as `-1`. There was no `uint` type-category to
distinguish signedness. Added an additive `type_is_unsigned(T)`
reflection builtin (static fold + dynamic interp/LLVM paths, mirroring
`type_name`), backed by the new `TypeTable.isUnsignedInt` predicate, and
a `uint_to_string` formatter (unsigned decimal via long-division over
four 16-bit limbs). `case int:` routes through `type_is_unsigned(type)`.
The 16-bit-limb split is factored into a shared `decompose_u16x4`, now
reused by `int_to_hex_string` (no second unsigned-math routine).
Regression: examples/0046-basic-int-formatter-extremes pins both extremes
plus a width spread; unit tests cover `isUnsignedInt`. Docs (specs.md
representation note, readme std API) updated for unsigned/extreme `{}`
behavior. IR snapshots refreshed for the two new std functions.
This commit is contained in:
@@ -1064,6 +1064,27 @@ pub const Ops = struct {
|
||||
const eq_res = c.LLVMBuildICmp(self.e.builder, c.LLVMIntEQ, a, b, "te.eq");
|
||||
self.e.mapRef(eq_res);
|
||||
},
|
||||
.type_is_unsigned => {
|
||||
// Dynamic `type_is_unsigned(t)`: extract the TypeId from
|
||||
// the arg (Any-boxed Type → value field, or bare i64
|
||||
// index), GEP into the `__sx_type_is_unsigned` table, load
|
||||
// the i1. Mirrors the `type_name` runtime lookup.
|
||||
const arg_ref = bi.args[0];
|
||||
const arg_val = self.e.resolveRef(arg_ref);
|
||||
const tid_idx = switch (self.e.reflectArgRepr(arg_ref)) {
|
||||
.unresolved => @panic("type_is_unsigned: reflection arg IR-type unresolved — a type-resolution failure reached LLVM emission without a diagnostic"),
|
||||
.boxed => c.LLVMBuildExtractValue(self.e.builder, arg_val, 1, "tiu.tid"),
|
||||
.bare => arg_val,
|
||||
};
|
||||
const arr_global = self.e.reflection().getOrBuildTypeIsUnsignedArray();
|
||||
const arr_len = self.e.type_is_unsigned_array_len;
|
||||
const arr_ty = c.LLVMArrayType(self.e.cached_i1, arr_len);
|
||||
const zero = c.LLVMConstInt(self.e.cached_i64, 0, 0);
|
||||
var indices = [2]c.LLVMValueRef{ zero, tid_idx };
|
||||
const gep = c.LLVMBuildInBoundsGEP2(self.e.builder, arr_ty, arr_global, &indices, 2, "tiu.gep");
|
||||
const result = c.LLVMBuildLoad2(self.e.builder, self.e.cached_i1, gep, "tiu.load");
|
||||
self.e.mapRef(result);
|
||||
},
|
||||
.has_impl => {
|
||||
// Runtime has_impl needs a protocol-map
|
||||
// snapshot — not wired yet. Silent false for
|
||||
|
||||
@@ -60,6 +60,36 @@ pub const Reflection = struct {
|
||||
return global;
|
||||
}
|
||||
|
||||
/// Lazy global `[N x i1]` indexed by `TypeId.index()`: 1 where the type is
|
||||
/// an unsigned integer. Built on the first dynamic `type_is_unsigned(t)`
|
||||
/// call site; the runtime arm GEPs in at the boxed TypeId and loads the bit.
|
||||
/// Derives every entry from `TypeTable.isUnsignedInt` — the single
|
||||
/// signedness source-of-truth, so no per-index magic lives in the emitter.
|
||||
pub fn getOrBuildTypeIsUnsignedArray(self: Reflection) c.LLVMValueRef {
|
||||
if (self.e.type_is_unsigned_array) |g| return g;
|
||||
|
||||
const n: u32 = @intCast(self.e.ir_mod.types.infos.items.len);
|
||||
var field_vals = std.ArrayList(c.LLVMValueRef).empty;
|
||||
defer field_vals.deinit(self.e.alloc);
|
||||
var i: u32 = 0;
|
||||
while (i < n) : (i += 1) {
|
||||
const tid = TypeId.fromIndex(i);
|
||||
const bit: u64 = if (self.e.ir_mod.types.isUnsignedInt(tid)) 1 else 0;
|
||||
field_vals.append(self.e.alloc, c.LLVMConstInt(self.e.cached_i1, bit, 0)) catch unreachable;
|
||||
}
|
||||
|
||||
const arr_ty = c.LLVMArrayType(self.e.cached_i1, n);
|
||||
const arr_init = c.LLVMConstArray(self.e.cached_i1, field_vals.items.ptr, n);
|
||||
const global = c.LLVMAddGlobal(self.e.llvm_module, arr_ty, "__sx_type_is_unsigned");
|
||||
c.LLVMSetInitializer(global, arr_init);
|
||||
c.LLVMSetGlobalConstant(global, 1);
|
||||
c.LLVMSetLinkage(global, c.LLVMPrivateLinkage);
|
||||
|
||||
self.e.type_is_unsigned_array = global;
|
||||
self.e.type_is_unsigned_array_len = n;
|
||||
return global;
|
||||
}
|
||||
|
||||
/// Build (or return cached) a global constant array of {ptr, i64} string values
|
||||
/// for the field names of a struct type.
|
||||
pub fn getOrBuildFieldNameArray(self: Reflection, struct_type: TypeId) c.LLVMValueRef {
|
||||
|
||||
@@ -146,6 +146,7 @@ pub const CallResolver = struct {
|
||||
if (std.mem.eql(u8, bare_name, "__trace_resolve_frame"))
|
||||
return refl(bare_name, self.l.module.types.findByName(self.l.module.types.internString("Frame")) orelse .unresolved);
|
||||
if (std.mem.eql(u8, bare_name, "is_flags")) return refl(bare_name, .bool);
|
||||
if (std.mem.eql(u8, bare_name, "type_is_unsigned")) return refl(bare_name, .bool);
|
||||
if (std.mem.eql(u8, bare_name, "type_of")) return refl(bare_name, .any);
|
||||
if (std.mem.eql(u8, bare_name, "field_value")) return refl(bare_name, .any);
|
||||
// Generic function — infer return type via type bindings.
|
||||
|
||||
@@ -182,6 +182,12 @@ pub const LLVMEmitter = struct {
|
||||
type_name_array: ?c.LLVMValueRef = null,
|
||||
type_name_array_len: u32 = 0,
|
||||
|
||||
// Lazy global `[N x i1]` indexed by TypeId.index(): true where the
|
||||
// type is an unsigned integer. Built on the first dynamic
|
||||
// `type_is_unsigned(t)` call site (the `{}` formatter's int branch).
|
||||
type_is_unsigned_array: ?c.LLVMValueRef = null,
|
||||
type_is_unsigned_array_len: u32 = 0,
|
||||
|
||||
// Target configuration (stored for ABI decisions during emission)
|
||||
target_config: TargetConfig,
|
||||
|
||||
|
||||
@@ -404,6 +404,7 @@ pub const BuiltinId = enum(u16) {
|
||||
// implements them; emit_llvm bails (Type is comptime-only).
|
||||
type_name,
|
||||
type_eq,
|
||||
type_is_unsigned,
|
||||
has_impl,
|
||||
};
|
||||
|
||||
|
||||
@@ -1900,6 +1900,27 @@ pub const Interpreter = struct {
|
||||
const b = frame.getRef(bi.args[1]).asTypeId() orelse return bailDetail("comptime type_eq: second argument is not a Type value");
|
||||
return .{ .value = .{ .boolean = a == b } };
|
||||
},
|
||||
.type_is_unsigned => {
|
||||
if (bi.args.len < 1) return bailDetail("comptime type_is_unsigned: missing argument");
|
||||
const arg = frame.getRef(bi.args[0]);
|
||||
// Accept a bare `.type_tag`, an Any-boxed Type (`{tag,
|
||||
// .type_tag}`), or the `type_of(x)` shape (`{.int(any),
|
||||
// .int(typeid)}`) — the last is what `any_to_string`'s
|
||||
// `case int:` passes, where the inner TypeId is carried
|
||||
// as a plain integer rather than a `.type_tag`.
|
||||
const tid = blk: {
|
||||
if (arg.asTypeId()) |t| break :blk t;
|
||||
if (arg == .aggregate) {
|
||||
const fields = arg.aggregate;
|
||||
if (fields.len >= 2) {
|
||||
if (fields[1].asTypeId()) |t| break :blk t;
|
||||
if (fields[1].asInt()) |iv| break :blk TypeId.fromIndex(@intCast(iv));
|
||||
}
|
||||
}
|
||||
return bailDetail("comptime type_is_unsigned: argument is not a Type value (expected `.type_tag`, Any-boxed Type, or `type_of(x)`)");
|
||||
};
|
||||
return .{ .value = .{ .boolean = self.module.types.isUnsignedInt(tid) } };
|
||||
},
|
||||
.has_impl => {
|
||||
// has_impl at interp time needs access to the host's
|
||||
// protocol-registration maps (protocol_thunk_map +
|
||||
|
||||
@@ -10561,6 +10561,24 @@ pub const Lowering = struct {
|
||||
const b = self.resolveTypeArg(c.args[1]);
|
||||
return self.builder.constBool(a == b);
|
||||
}
|
||||
if (std.mem.eql(u8, name, "type_is_unsigned")) {
|
||||
// type_is_unsigned(T) → bool. Static arg (a spelled type or
|
||||
// generic binding) folds to const_bool at lower time. A
|
||||
// dynamic arg — the runtime `type_of(x)` value queried by
|
||||
// `any_to_string` — emits a `callBuiltin`: the interp reads
|
||||
// the boxed TypeId, LLVM GEPs a per-type signedness table.
|
||||
// Mirrors `type_name`'s static/dynamic split; the same split
|
||||
// avoids `resolveTypeArg`'s silent `.s64` default lying about
|
||||
// a runtime Type value.
|
||||
if (c.args.len < 1) return self.builder.constBool(false);
|
||||
if (self.isStaticTypeArg(c.args[0])) {
|
||||
const ty = self.resolveTypeArg(c.args[0]);
|
||||
return self.builder.constBool(self.module.types.isUnsignedInt(ty));
|
||||
}
|
||||
const arg_ref = self.lowerExpr(c.args[0]);
|
||||
const args_owned = self.alloc.dupe(Ref, &.{arg_ref}) catch return self.builder.constBool(false);
|
||||
return self.builder.callBuiltin(.type_is_unsigned, args_owned, .bool);
|
||||
}
|
||||
if (std.mem.eql(u8, name, "has_impl")) {
|
||||
// has_impl(P, T) → const_bool. Returns true when type T has
|
||||
// a reachable impl for protocol P. P is either:
|
||||
|
||||
@@ -253,3 +253,37 @@ test "errorSetType: tags stored sorted by global id" {
|
||||
try std.testing.expectEqual(@as(usize, 3), stored.len);
|
||||
try std.testing.expect(stored[0] <= stored[1] and stored[1] <= stored[2]);
|
||||
}
|
||||
|
||||
test "isUnsignedInt: builtin signedness classification" {
|
||||
const alloc = std.testing.allocator;
|
||||
var table = TypeTable.init(alloc);
|
||||
defer table.deinit();
|
||||
|
||||
// Unsigned builtins (the formatter must route these to unsigned decimal).
|
||||
inline for (.{ TypeId.u8, TypeId.u16, TypeId.u32, TypeId.u64, TypeId.usize }) |ty| {
|
||||
try std.testing.expect(table.isUnsignedInt(ty));
|
||||
}
|
||||
// Signed / non-integer builtins are not unsigned.
|
||||
inline for (.{
|
||||
TypeId.s8, TypeId.s16, TypeId.s32, TypeId.s64, TypeId.isize,
|
||||
TypeId.bool, TypeId.f32, TypeId.f64, TypeId.string,
|
||||
TypeId.void, TypeId.any, TypeId.unresolved,
|
||||
}) |ty| {
|
||||
try std.testing.expect(!table.isUnsignedInt(ty));
|
||||
}
|
||||
}
|
||||
|
||||
test "isUnsignedInt: user-defined arbitrary-width ints" {
|
||||
const alloc = std.testing.allocator;
|
||||
var table = TypeTable.init(alloc);
|
||||
defer table.deinit();
|
||||
|
||||
const u24_ty = table.intern(.{ .unsigned = 24 });
|
||||
const s24_ty = table.intern(.{ .signed = 24 });
|
||||
try std.testing.expect(table.isUnsignedInt(u24_ty));
|
||||
try std.testing.expect(!table.isUnsignedInt(s24_ty));
|
||||
|
||||
// A non-integer user type is never unsigned.
|
||||
const ptr_ty = table.ptrTo(.u32);
|
||||
try std.testing.expect(!table.isUnsignedInt(ptr_ty));
|
||||
}
|
||||
|
||||
@@ -580,6 +580,20 @@ pub const TypeTable = struct {
|
||||
return 8;
|
||||
}
|
||||
|
||||
/// True iff `ty` is an unsigned integer — a builtin (u8/u16/u32/u64/usize)
|
||||
/// or a user-defined arbitrary-width unsigned int. Canonical signedness
|
||||
/// query for reflection (`type_is_unsigned`) and the `{}` formatter so a
|
||||
/// u64 value renders as unsigned decimal rather than the s64 reinterpretation.
|
||||
pub fn isUnsignedInt(self: *const TypeTable, ty: TypeId) bool {
|
||||
switch (ty) {
|
||||
.u8, .u16, .u32, .u64, .usize => return true,
|
||||
.bool, .s8, .s16, .s32, .s64, .isize => return false,
|
||||
else => {},
|
||||
}
|
||||
if (ty.isBuiltin()) return false;
|
||||
return self.get(ty) == .unsigned;
|
||||
}
|
||||
|
||||
pub fn typeSizeBytes(self: *const TypeTable, ty: TypeId) usize {
|
||||
const ptr_size: usize = self.pointer_size;
|
||||
if (ty == .void) return 0;
|
||||
|
||||
Reference in New Issue
Block a user