fix(std): render integer formatter extremes — i64::MIN and unsigned all-ones [F0.8]

Resolves issue 0090. The `{}` integer formatter mis-rendered both ends of the 64-bit range: - `int_to_string` computed the magnitude as `0 - n`, which overflows for `s64::MIN` (its magnitude is unrepresentable as a positive s64) — the value stayed negative, the digit loop ran zero times, so only `-` printed. It now extracts digits straight from `n` (per-digit `|n % 10|`, `n` truncating toward zero), never negating MIN. - `any_to_string`'s `case int:` formatted every integer as s64, so a u64 all-ones value printed as `-1`. There was no `uint` type-category to distinguish signedness. Added an additive `type_is_unsigned(T)` reflection builtin (static fold + dynamic interp/LLVM paths, mirroring `type_name`), backed by the new `TypeTable.isUnsignedInt` predicate, and a `uint_to_string` formatter (unsigned decimal via long-division over four 16-bit limbs). `case int:` routes through `type_is_unsigned(type)`. The 16-bit-limb split is factored into a shared `decompose_u16x4`, now reused by `int_to_hex_string` (no second unsigned-math routine). Regression: examples/0046-basic-int-formatter-extremes pins both extremes plus a width spread; unit tests cover `isUnsignedInt`. Docs (specs.md representation note, readme std API) updated for unsigned/extreme `{}` behavior. IR snapshots refreshed for the two new std functions.
2026-06-05 09:05:37 +03:00
parent 5ef74a15f3
commit 64f77e9779
54 changed files with 36282 additions and 30161 deletions
--- a/library/modules/std.sx
+++ b/library/modules/std.sx
@@ -20,6 +20,7 @@ field_count :: ($T: Type) -> s64 #builtin;
 field_name :: ($T: Type, idx: s64) -> string #builtin;
 field_value :: (s: $T, idx: s64) -> Any #builtin;
 is_flags :: ($T: Type) -> bool #builtin;
+type_is_unsigned :: ($T: Type) -> bool #builtin;
 field_value_int :: ($T: Type, idx: s64) -> s64 #builtin;
 field_index :: ($T: Type, val: T) -> s64 #builtin;
 error_tag_name :: (e: $T) -> string #builtin;
@@ -67,17 +68,50 @@ alloc_slice :: ($T: Type, count: s64) -> []T {
 int_to_string :: (n: s64) -> string {
    if n == 0 { return "0"; }
    neg := n < 0;
-    v := if neg then 0 - n else n;
-    // Single pass: fill digits backwards into temp string, then substr
+    // Extract digits straight from `n` without ever negating it: `0 - n`
+    // overflows for s64::MIN (its magnitude is unrepresentable as a
+    // positive s64). sx `%` truncates toward zero, so `n % 10` keeps n's
+    // sign; take each remainder's absolute value for the digit.
    tmp := cstring(20);
    i := 19;
-    while v > 0 {
-        tmp[i] = (v % 10) + 48;
+    v := n;
+    while v != 0 {
+        d := v % 10;
+        if d < 0 { d = 0 - d; }
+        tmp[i] = d + 48;
        v = v / 10;
        i -= 1;
    }
    if neg { tmp[i] = 45; i -= 1; }
-    substr(tmp, i + 1, 20 - i - 1)
+    substr(tmp, i + 1, 19 - i)
+}
+
+// Unsigned decimal of `n`'s 64 bits — renders the full u64 range
+// (0 .. 18446744073709551615). Used by `any_to_string` for unsigned
+// integer values, which an s64-based formatter would misread (e.g. a
+// u64 all-ones value as -1).
+uint_to_string :: (n: s64) -> string {
+    if n == 0 { return "0"; }
+    // Long division by 10 across the four unsigned 16-bit limbs, most
+    // significant first. Each step folds the running remainder into the
+    // next limb; the per-step accumulator stays well within s64
+    // (max 9*65536 + 65535), so signed `/` and `%` are exact.
+    g := decompose_u16x4(n);
+    tmp := cstring(20);
+    i := 19;
+    while g[0] != 0 or g[1] != 0 or g[2] != 0 or g[3] != 0 {
+        rem := 0;
+        k := 0;
+        while k < 4 {
+            acc := rem * 65536 + g[k];
+            g[k] = acc / 10;
+            rem = acc % 10;
+            k += 1;
+        }
+        tmp[i] = rem + 48;
+        i -= 1;
+    }
+    substr(tmp, i + 1, 19 - i)
 }

 bool_to_string :: (b: bool) -> string {
@@ -121,10 +155,13 @@ hex_group :: (buf: string, offset: s64, val: s64) {
    }
 }

-int_to_hex_string :: (n: s64) -> string {
-    if n == 0 { return "0"; }
-
-    // Split into four 16-bit groups for correct unsigned treatment
+// Split the 64 bits of `n` into four unsigned 16-bit limbs, most
+// significant first: [g3, g2, g1, g0]. A negative input is treated as
+// its two's-complement unsigned bit pattern — each limb is corrected
+// back into 0..65535 — so callers get correct unsigned arithmetic out
+// of a signed-only integer type. Shared by the hex and unsigned-decimal
+// formatters.
+decompose_u16x4 :: (n: s64) -> [4]s64 {
    g0 := n % 65536;
    if g0 < 0 { g0 = g0 + 65536; }
    r1 := (n - g0) / 65536;
@@ -136,12 +173,23 @@ int_to_hex_string :: (n: s64) -> string {
    r3 := (r2 - g2) / 65536;
    g3 := r3 % 65536;
    if g3 < 0 { g3 = g3 + 65536; }
+    limbs : [4]s64 = ---;
+    limbs[0] = g3;
+    limbs[1] = g2;
+    limbs[2] = g1;
+    limbs[3] = g0;
+    limbs
+}

+int_to_hex_string :: (n: s64) -> string {
+    if n == 0 { return "0"; }
+
+    g := decompose_u16x4(n);
    buf := cstring(16);
-    hex_group(buf, 0, g3);
-    hex_group(buf, 4, g2);
-    hex_group(buf, 8, g1);
-    hex_group(buf, 12, g0);
+    hex_group(buf, 0, g[0]);
+    hex_group(buf, 4, g[1]);
+    hex_group(buf, 8, g[2]);
+    hex_group(buf, 12, g[3]);

    // Skip leading zeros (keep at least 1 digit)
    start := 0;
@@ -319,7 +367,10 @@ any_to_string :: (val: Any) -> string {
    type := type_of(val);
    if type == {
        case void: result = "";
-        case int: result = int_to_string(xx val);
+        case int: {
+            if type_is_unsigned(type) { result = uint_to_string(xx val); }
+            else { result = int_to_string(xx val); }
+        }
        case string: { s : string = xx val; result = s; }
        case bool: result = bool_to_string(xx val);
        case float: result = float_to_string(xx val);