cstring is ONE pointer to a null-terminated u8 buffer, C's char*: thin (8 bytes, no length; cstring_len walks to the terminator), crossing #foreign boundaries verbatim in both directions, with ?cstring as the nullable case lowering to the same bare pointer (null = absent). Conversion discipline mirrors Odin: a string LITERAL coerces implicitly (its bytes are terminated constants); any other string is rejected with a diagnostic naming to_cstring (it may be an unterminated view); and cstring never coerces to string implicitly — from_cstring(c) is the explicit zero-copy view, pricing the strlen. Plumbing: TypeId/TypeInfo builtin slot 18 (first_user 19), name classifiers, size/align/name tables, LLVM ptr lowering, the ?T pointer niche, the xx pointer ladder, the literal-gated coercion plan (isConstString + data_ptr), and the reserved-spelling set. std gains cstring_len/from_cstring/to_cstring (fmt.sx, re-exported); the old cstring(size) allocator helper is renamed alloc_string everywhere; getenv migrates to (name: cstring) -> ?cstring as the canonical user and env() drops its manual strlen/memcpy. Pinned: examples/1222 (FFI both directions, literal coercion, ?cstring null paths, round trip) and examples/1173 (both coercion diagnostics); FAIL pre-feature. The alloc_string rename + getenv signature shift the .ir snapshots — regenerated. zig build test 426/426; run_examples 604/604. Spec: reserved spelling + cstring section + C-interop rows.
423 lines
13 KiB
Plaintext
423 lines
13 KiB
Plaintext
// Formatting + string helpers: the `*_to_string` family, `any_to_string`,
|
|
// the comptime `format` / `print` pair, and the slice/string allocation
|
|
// helpers they build on. Consumers never import this file directly —
|
|
// std.sx re-exports every public name here.
|
|
#import "modules/std/core.sx";
|
|
|
|
// --- Slice & string allocation ---
|
|
|
|
alloc_string :: (size: i64) -> string {
|
|
raw := context.allocator.alloc_bytes(size + 1);
|
|
memset(raw, 0, size + 1);
|
|
s : string = ---;
|
|
s.ptr = xx raw;
|
|
s.len = size;
|
|
s
|
|
}
|
|
|
|
alloc_slice :: ($T: Type, count: i64) -> []T {
|
|
raw := context.allocator.alloc_bytes(count * size_of(T));
|
|
memset(raw, 0, count * size_of(T));
|
|
s : []T = ---;
|
|
s.ptr = xx raw;
|
|
s.len = count;
|
|
s
|
|
}
|
|
|
|
int_to_string :: (n: i64) -> string {
|
|
if n == 0 { return "0"; }
|
|
neg := n < 0;
|
|
// Extract digits straight from `n` without ever negating it: `0 - n`
|
|
// overflows for i64::MIN (its magnitude is unrepresentable as a
|
|
// positive i64). sx `%` truncates toward zero, so `n % 10` keeps n's
|
|
// sign; take each remainder's absolute value for the digit.
|
|
tmp := alloc_string(20);
|
|
i := 19;
|
|
v := n;
|
|
while v != 0 {
|
|
d := v % 10;
|
|
if d < 0 { d = 0 - d; }
|
|
tmp[i] = d + 48;
|
|
v = v / 10;
|
|
i -= 1;
|
|
}
|
|
if neg { tmp[i] = 45; i -= 1; }
|
|
substr(tmp, i + 1, 19 - i)
|
|
}
|
|
|
|
// Unsigned decimal of `n`'s 64 bits — renders the full u64 range
|
|
// (0 .. 18446744073709551615). Used by `any_to_string` for unsigned
|
|
// integer values, which an i64-based formatter would misread (e.g. a
|
|
// u64 all-ones value as -1).
|
|
uint_to_string :: (n: i64) -> string {
|
|
if n == 0 { return "0"; }
|
|
// Long division by 10 across the four unsigned 16-bit limbs, most
|
|
// significant first. Each step folds the running remainder into the
|
|
// next limb; the per-step accumulator stays well within i64
|
|
// (max 9*65536 + 65535), so signed `/` and `%` are exact.
|
|
g := decompose_u16x4(n);
|
|
tmp := alloc_string(20);
|
|
i := 19;
|
|
while g[0] != 0 or g[1] != 0 or g[2] != 0 or g[3] != 0 {
|
|
rem := 0;
|
|
k := 0;
|
|
while k < 4 {
|
|
acc := rem * 65536 + g[k];
|
|
g[k] = acc / 10;
|
|
rem = acc % 10;
|
|
k += 1;
|
|
}
|
|
tmp[i] = rem + 48;
|
|
i -= 1;
|
|
}
|
|
substr(tmp, i + 1, 19 - i)
|
|
}
|
|
|
|
bool_to_string :: (b: bool) -> string {
|
|
if b then "true" else "false"
|
|
}
|
|
|
|
float_to_string :: (f: f64) -> string {
|
|
neg := f < 0.0;
|
|
v := if neg then 0.0 - f else f;
|
|
int_part := cast(i64) v;
|
|
frac := cast(i64) ((v - cast(f64) int_part) * 1000000.0);
|
|
if frac < 0 { frac = 0 - frac; }
|
|
istr := int_to_string(int_part);
|
|
fstr := int_to_string(frac);
|
|
il := istr.len;
|
|
fl := fstr.len;
|
|
prefix := if neg then 1 else 0;
|
|
total := prefix + il + 1 + 6;
|
|
buf := alloc_string(total);
|
|
pos := 0;
|
|
if neg { buf[0] = 45; pos = 1; }
|
|
memcpy(@buf[pos], istr.ptr, il);
|
|
pos = pos + il;
|
|
buf[pos] = 46;
|
|
pos += 1;
|
|
pad := 6 - fl;
|
|
memset(@buf[pos], 48, pad);
|
|
pos = pos + pad;
|
|
memcpy(@buf[pos], fstr.ptr, fl);
|
|
buf
|
|
}
|
|
|
|
hex_group :: (buf: string, offset: i64, val: i64) {
|
|
i := offset + 3;
|
|
v := val;
|
|
while i >= offset {
|
|
d := v % 16;
|
|
buf[i] = if d < 10 then d + 48 else d - 10 + 97;
|
|
v = v / 16;
|
|
i -= 1;
|
|
}
|
|
}
|
|
|
|
// Split the 64 bits of `n` into four unsigned 16-bit limbs, most
|
|
// significant first: [g3, g2, g1, g0]. A negative input is treated as
|
|
// its two's-complement unsigned bit pattern — each limb is corrected
|
|
// back into 0..65535 — so callers get correct unsigned arithmetic out
|
|
// of a signed-only integer type. Shared by the hex and unsigned-decimal
|
|
// formatters.
|
|
decompose_u16x4 :: (n: i64) -> [4]i64 {
|
|
g0 := n % 65536;
|
|
if g0 < 0 { g0 = g0 + 65536; }
|
|
r1 := (n - g0) / 65536;
|
|
g1 := r1 % 65536;
|
|
if g1 < 0 { g1 = g1 + 65536; }
|
|
r2 := (r1 - g1) / 65536;
|
|
g2 := r2 % 65536;
|
|
if g2 < 0 { g2 = g2 + 65536; }
|
|
r3 := (r2 - g2) / 65536;
|
|
g3 := r3 % 65536;
|
|
if g3 < 0 { g3 = g3 + 65536; }
|
|
limbs : [4]i64 = ---;
|
|
limbs[0] = g3;
|
|
limbs[1] = g2;
|
|
limbs[2] = g1;
|
|
limbs[3] = g0;
|
|
limbs
|
|
}
|
|
|
|
int_to_hex_string :: (n: i64) -> string {
|
|
if n == 0 { return "0"; }
|
|
|
|
g := decompose_u16x4(n);
|
|
buf := alloc_string(16);
|
|
hex_group(buf, 0, g[0]);
|
|
hex_group(buf, 4, g[1]);
|
|
hex_group(buf, 8, g[2]);
|
|
hex_group(buf, 12, g[3]);
|
|
|
|
// Skip leading zeros (keep at least 1 digit)
|
|
start := 0;
|
|
while start < 15 {
|
|
if buf[start] != 48 { break; }
|
|
start += 1;
|
|
}
|
|
substr(buf, start, 16 - start)
|
|
}
|
|
|
|
concat :: (a: string, b: string) -> string {
|
|
al := a.len;
|
|
bl := b.len;
|
|
buf := alloc_string(al + bl);
|
|
memcpy(buf.ptr, a.ptr, al);
|
|
memcpy(@buf[al], b.ptr, bl);
|
|
buf
|
|
}
|
|
|
|
substr :: (s: string, start: i64, len: i64) -> string {
|
|
buf := alloc_string(len);
|
|
memcpy(buf.ptr, @s[start], len);
|
|
buf
|
|
}
|
|
|
|
// ── cstring: the C-boundary string ────────────────────────────────────
|
|
// `cstring` is ONE pointer to a null-terminated u8 buffer — C's `char *`.
|
|
// It carries no length (`cstring_len` walks to the terminator) and
|
|
// crosses `#foreign` boundaries verbatim in both directions; `?cstring`
|
|
// is the nullable case (null pointer = absent). String LITERALS coerce
|
|
// to `cstring` implicitly — their bytes are terminated constants; every
|
|
// other `string` must materialize through `to_cstring`.
|
|
|
|
// Byte length of `c` (strlen — O(n), walks to the terminator).
|
|
cstring_len :: (c: cstring) -> i64 {
|
|
p : [*]u8 = xx c;
|
|
n := 0;
|
|
while p[n] != 0 { n += 1; }
|
|
n
|
|
}
|
|
|
|
// A zero-copy string VIEW over `c`'s bytes ({ptr, strlen}). The view
|
|
// shares C's buffer — `substr` it if it must outlive the source.
|
|
from_cstring :: (c: cstring) -> string {
|
|
p : [*]u8 = xx c;
|
|
string.{ ptr = p, len = cstring_len(c) }
|
|
}
|
|
|
|
// An owned, terminated copy of `s` as a `cstring`.
|
|
to_cstring :: (s: string) -> cstring {
|
|
z := alloc_string(s.len);
|
|
memcpy(z.ptr, s.ptr, s.len);
|
|
xx z.ptr
|
|
}
|
|
|
|
// Join path components with the POSIX separator ('/'). Skips empty
|
|
// components and collapses duplicate separators at component
|
|
// boundaries. Used for bundle paths where Apple .app and Android APK
|
|
// both expect POSIX-style paths.
|
|
path_join :: (..parts: []string) -> string {
|
|
result := "";
|
|
i := 0;
|
|
while i < parts.len {
|
|
p := parts[i];
|
|
if p.len > 0 {
|
|
if result.len > 0 {
|
|
tail := result[result.len - 1];
|
|
head := p[0];
|
|
if tail == 47 {
|
|
if head == 47 {
|
|
p = substr(p, 1, p.len - 1);
|
|
}
|
|
} else {
|
|
if head != 47 {
|
|
result = concat(result, "/");
|
|
}
|
|
}
|
|
}
|
|
result = concat(result, p);
|
|
}
|
|
i += 1;
|
|
}
|
|
result
|
|
}
|
|
|
|
struct_to_string :: (s: $T) -> string {
|
|
result := concat(type_name(T), "{");
|
|
i := 0;
|
|
while i < field_count(T) {
|
|
if i > 0 { result = concat(result, ", "); }
|
|
result = concat(result, field_name(T, i));
|
|
result = concat(result, ": ");
|
|
result = concat(result, any_to_string(field_value(s, i)));
|
|
i += 1;
|
|
}
|
|
concat(result, "}")
|
|
}
|
|
|
|
vector_to_string :: (v: $T) -> string {
|
|
result := "[";
|
|
i := 0;
|
|
while i < field_count(T) {
|
|
if i > 0 { result = concat(result, ", "); }
|
|
result = concat(result, any_to_string(field_value(v, i)));
|
|
i += 1;
|
|
}
|
|
concat(result, "]")
|
|
}
|
|
|
|
array_to_string :: (a: $T) -> string {
|
|
result := "[";
|
|
i := 0;
|
|
while i < field_count(T) {
|
|
if i > 0 { result = concat(result, ", "); }
|
|
result = concat(result, any_to_string(field_value(a, i)));
|
|
i += 1;
|
|
}
|
|
concat(result, "]")
|
|
}
|
|
|
|
slice_to_string :: (items: []$T) -> string {
|
|
result := "[";
|
|
i := 0;
|
|
while i < items.len {
|
|
if i > 0 { result = concat(result, ", "); }
|
|
result = concat(result, any_to_string(field_value(items, i)));
|
|
i += 1;
|
|
}
|
|
concat(result, "]")
|
|
}
|
|
|
|
pointer_to_string :: (p: $T) -> string {
|
|
addr : i64 = xx p;
|
|
if addr == 0 { "null" } else {
|
|
concat(type_name(T), concat("@0x", int_to_hex_string(addr)))
|
|
}
|
|
}
|
|
|
|
flags_to_string :: (val: $T) -> string {
|
|
v := cast(i64) val;
|
|
result := "";
|
|
i := 0;
|
|
while i < field_count(T) {
|
|
fv := field_value_int(T, i);
|
|
if v & fv {
|
|
if result.len > 0 { result = concat(result, " | "); }
|
|
result = concat(result, concat(".", field_name(T, i)));
|
|
}
|
|
i += 1;
|
|
}
|
|
if result.len == 0 { result = "0"; }
|
|
result
|
|
}
|
|
|
|
enum_to_string :: (u: $T) -> string {
|
|
if is_flags(T) { return flags_to_string(u); }
|
|
idx := field_index(T, u);
|
|
result := concat(".", field_name(T, idx));
|
|
payload := field_value(u, idx);
|
|
pstr := any_to_string(payload);
|
|
if pstr.len > 0 {
|
|
result = concat(result, concat("(", concat(pstr, ")")));
|
|
}
|
|
result
|
|
}
|
|
|
|
optional_to_string :: (o: $T) -> string {
|
|
if o == null { return "null"; }
|
|
return any_to_string(o!);
|
|
}
|
|
|
|
any_to_string :: (val: Any) -> string {
|
|
result := "<?>";
|
|
type := type_of(val);
|
|
if type == {
|
|
case void: result = "";
|
|
case int: {
|
|
if type_is_unsigned(type) { result = uint_to_string(xx val); }
|
|
else { result = int_to_string(xx val); }
|
|
}
|
|
case string: { s : string = xx val; result = s; }
|
|
case bool: result = bool_to_string(xx val);
|
|
case float: result = float_to_string(xx val);
|
|
case struct: result = struct_to_string(cast(type) val);
|
|
case enum: result = enum_to_string(cast(type) val);
|
|
case error_set: { tagid : u32 = xx val; result = error_tag_name(tagid); }
|
|
case vector: result = vector_to_string(cast(type) val);
|
|
case array: result = array_to_string(cast(type) val);
|
|
case slice: result = slice_to_string(cast(type) val);
|
|
case pointer: result = pointer_to_string(cast(type) val);
|
|
case optional: result = optional_to_string(cast(type) val);
|
|
case type: result = type_name(val);
|
|
}
|
|
result
|
|
}
|
|
|
|
build_format :: (fmt: string) -> string {
|
|
code := "result := \"\"; ";
|
|
seg_start := 0;
|
|
i := 0;
|
|
arg_idx := 0;
|
|
while i < fmt.len {
|
|
if fmt[i] == 123 {
|
|
if i + 1 < fmt.len {
|
|
if fmt[i + 1] == 125 {
|
|
if i > seg_start {
|
|
code = concat(code, "result = concat(result, substr(fmt, ");
|
|
code = concat(code, int_to_string(seg_start));
|
|
code = concat(code, ", ");
|
|
code = concat(code, int_to_string(i - seg_start));
|
|
code = concat(code, ")); ");
|
|
}
|
|
code = concat(code, "result = concat(result, any_to_string(args[");
|
|
code = concat(code, int_to_string(arg_idx));
|
|
code = concat(code, "])); ");
|
|
arg_idx += 1;
|
|
i += 2;
|
|
seg_start = i;
|
|
} else if fmt[i + 1] == 123 {
|
|
code = concat(code, "result = concat(result, substr(fmt, ");
|
|
code = concat(code, int_to_string(seg_start));
|
|
code = concat(code, ", ");
|
|
code = concat(code, int_to_string(i - seg_start + 1));
|
|
code = concat(code, ")); ");
|
|
i += 2;
|
|
seg_start = i;
|
|
} else {
|
|
i += 1;
|
|
}
|
|
} else {
|
|
i += 1;
|
|
}
|
|
} else if fmt[i] == 125 {
|
|
if i + 1 < fmt.len {
|
|
if fmt[i + 1] == 125 {
|
|
code = concat(code, "result = concat(result, substr(fmt, ");
|
|
code = concat(code, int_to_string(seg_start));
|
|
code = concat(code, ", ");
|
|
code = concat(code, int_to_string(i - seg_start + 1));
|
|
code = concat(code, ")); ");
|
|
i += 2;
|
|
seg_start = i;
|
|
} else {
|
|
i += 1;
|
|
}
|
|
} else {
|
|
i += 1;
|
|
}
|
|
} else {
|
|
i += 1;
|
|
}
|
|
}
|
|
if seg_start < fmt.len {
|
|
code = concat(code, "result = concat(result, substr(fmt, ");
|
|
code = concat(code, int_to_string(seg_start));
|
|
code = concat(code, ", ");
|
|
code = concat(code, int_to_string(fmt.len - seg_start));
|
|
code = concat(code, ")); ");
|
|
}
|
|
code
|
|
}
|
|
|
|
format :: ($fmt: string, ..$args) -> string {
|
|
#insert build_format(fmt);
|
|
#insert "return result;";
|
|
}
|
|
|
|
print :: ($fmt: string, ..$args) {
|
|
#insert build_format(fmt);
|
|
#insert "out(result);";
|
|
}
|