lang: introduce cstring — the C-boundary string (Odin model)

cstring is ONE pointer to a null-terminated u8 buffer, C's char*: thin
(8 bytes, no length; cstring_len walks to the terminator), crossing
#foreign boundaries verbatim in both directions, with ?cstring as the
nullable case lowering to the same bare pointer (null = absent).

Conversion discipline mirrors Odin: a string LITERAL coerces implicitly
(its bytes are terminated constants); any other string is rejected with
a diagnostic naming to_cstring (it may be an unterminated view); and
cstring never coerces to string implicitly — from_cstring(c) is the
explicit zero-copy view, pricing the strlen.

Plumbing: TypeId/TypeInfo builtin slot 18 (first_user 19), name
classifiers, size/align/name tables, LLVM ptr lowering, the ?T pointer
niche, the xx pointer ladder, the literal-gated coercion plan
(isConstString + data_ptr), and the reserved-spelling set. std gains
cstring_len/from_cstring/to_cstring (fmt.sx, re-exported); the old
cstring(size) allocator helper is renamed alloc_string everywhere;
getenv migrates to (name: cstring) -> ?cstring as the canonical user
and env() drops its manual strlen/memcpy.

Pinned: examples/1222 (FFI both directions, literal coercion,
?cstring null paths, round trip) and examples/1173 (both coercion
diagnostics); FAIL pre-feature. The alloc_string rename + getenv
signature shift the .ir snapshots — regenerated. zig build test
426/426; run_examples 604/604.

Spec: reserved spelling + cstring section + C-interop rows.
This commit is contained in:
agra
2026-06-12 14:50:53 +03:00
parent d88bdd7242
commit 1d17b0abcf
58 changed files with 26437 additions and 25257 deletions

View File

@@ -64,13 +64,14 @@ pub const TypeLowering = struct {
.bool => self.e.cached_i1,
.error_set => self.e.cached_i32, // u32 tag id on the error channel
.string => self.e.getStringStructType(),
.cstring => self.e.cached_ptr,
.pointer, .many_pointer, .function => self.e.cached_ptr,
.closure => self.e.getClosureStructType(),
.slice => self.e.getStringStructType(), // same {ptr, i64} layout
.optional => |opt| {
// ?*T / ?fn → bare pointer (null = none)
const child_info = self.e.ir_mod.types.get(opt.child);
if (child_info == .pointer or child_info == .many_pointer or child_info == .function) {
if (child_info == .pointer or child_info == .many_pointer or child_info == .function or child_info == .cstring) {
return self.e.cached_ptr;
}
if (child_info == .closure) {

View File

@@ -43,11 +43,15 @@ pub const CoercionResolver = struct {
widen, // same kind, dst wider
narrow, // same kind, dst narrower
array_to_slice, // [N]T → []T (materialize backing storage + header)
string_to_cstring, // literal-only implicit; other strings need to_cstring
cstring_to_string_reject, // explicit from_cstring required (diagnostic)
none, // nothing applies — pass the value through
};
pub fn classify(self: CoercionResolver, src_ty: TypeId, dst_ty: TypeId) CoercionPlan {
if (src_ty == dst_ty) return .no_op;
if (src_ty == .string and dst_ty == .cstring) return .string_to_cstring;
if (src_ty == .cstring and dst_ty == .string) return .cstring_to_string_reject;
if (src_ty == .any and dst_ty != .any) return .unbox_any;
if (dst_ty == .any and src_ty != .any) return .box_any;
@@ -113,8 +117,8 @@ pub const CoercionResolver = struct {
const dst_float = Lowering.isFloat(dst_ty);
const src_int = self.l.isIntEx(src_ty);
const dst_int = self.l.isIntEx(dst_ty);
const src_ptr = !src_ty.isBuiltin() and self.l.module.types.get(src_ty) == .pointer;
const dst_ptr = !dst_ty.isBuiltin() and self.l.module.types.get(dst_ty) == .pointer;
const src_ptr = (!src_ty.isBuiltin() and self.l.module.types.get(src_ty) == .pointer) or src_ty == .cstring;
const dst_ptr = (!dst_ty.isBuiltin() and self.l.module.types.get(dst_ty) == .pointer) or dst_ty == .cstring;
if (src_int and dst_float) return .int_to_float;
if (src_float and dst_int) return .float_to_int;

View File

@@ -630,6 +630,30 @@ pub fn coerceMode(self: *Lowering, val: Ref, src_ty: TypeId, dst_ty: TypeId, mod
const unwrapped = self.builder.emit(.{ .optional_unwrap = .{ .operand = val } }, child_ty);
return self.coerceMode(unwrapped, child_ty, dst_ty, mode);
},
// string → cstring: ONLY a string LITERAL coerces implicitly — its
// bytes are a terminated constant (Odin's literal blessing). Any
// other string may be an unterminated view, so it must materialize
// through `to_cstring`.
.string_to_cstring => {
if (self.builder.isConstString(val)) {
return self.builder.emit(.{ .data_ptr = .{ .operand = val } }, .cstring);
}
if (self.diagnostics) |d| {
const cs = self.builder.current_span;
d.addFmt(.err, ast.Span{ .start = cs.start, .end = cs.end }, "only a string LITERAL coerces to 'cstring' implicitly; an arbitrary string may be an unterminated view — materialize it with to_cstring(s)", .{});
}
return val;
},
// cstring → string: the length is implicit (strlen), so the
// conversion is never silent — `from_cstring(c)` is the zero-copy
// view, `substr(from_cstring(c), 0, ...)` the owned copy.
.cstring_to_string_reject => {
if (self.diagnostics) |d| {
const cs = self.builder.current_span;
d.addFmt(.err, ast.Span{ .start = cs.start, .end = cs.end }, "'cstring' does not coerce to 'string' implicitly (the length is implicit); convert with from_cstring(c)", .{});
}
return val;
},
// void → Optional: produce null (void is the type of null_literal)
.void_to_optional => return self.builder.constNull(dst_ty),
// Concrete → Optional wrapping (coerce to the inner type first)

View File

@@ -358,6 +358,24 @@ pub const Builder = struct {
/// value and the span it was emitted with; else null. The implicit
/// float→int coercion rule reads this to fold an integral literal to its
/// int (and to locate a non-integral one for its diagnostic).
/// True iff `ref` is a `const_string` instruction — a string LITERAL
/// value (terminated constant data), the only string shape that may
/// implicitly coerce to `cstring`.
pub fn isConstString(self: *Builder, ref: Ref) bool {
if (self.func == null) return false;
const func = self.currentFunc();
const ref_idx = @intFromEnum(ref);
if (ref_idx < func.params.len) return false;
for (func.blocks.items) |*block| {
const first = block.first_ref;
if (ref_idx >= first and ref_idx < first + @as(u32, @intCast(block.insts.items.len))) {
const i = block.insts.items[ref_idx - first];
return i.op == .const_string;
}
}
return false;
}
pub fn constFloatInfo(self: *Builder, ref: Ref) ?ConstFloatInfo {
if (self.func == null) return null;
const func = self.currentFunc();

View File

@@ -55,6 +55,7 @@ pub const TypeResolver = struct {
if (std.mem.eql(u8, name, "f64")) return .f64;
if (std.mem.eql(u8, name, "bool")) return .bool;
if (std.mem.eql(u8, name, "string")) return .string;
if (std.mem.eql(u8, name, "cstring")) return .cstring;
if (std.mem.eql(u8, name, "void")) return .void;
if (std.mem.eql(u8, name, "Any")) return .any;
// `Type` values are runtime-representable as Any-shaped pairs

View File

@@ -33,9 +33,10 @@ pub const TypeId = enum(u32) {
isize = 15,
usize = 16,
void = 17,
_, // user-defined types start at 18
cstring = 18, // thin null-terminated char* (see TypeInfo.cstring)
_, // user-defined types start at 19
pub const first_user: u32 = 18;
pub const first_user: u32 = 19;
pub fn index(self: TypeId) u32 {
return @intFromEnum(self);
@@ -63,6 +64,7 @@ pub const TypeInfo = union(enum) {
void,
bool,
string, // [:0]u8 — fat pointer {ptr, len}
cstring, // thin null-terminated char* — ONE pointer, length implicit (strlen)
@"struct": StructInfo,
@"enum": EnumInfo,
@@ -382,6 +384,7 @@ pub const TypeTable = struct {
.isize, // 15: isize (pointer-sized signed)
.usize, // 16: usize (pointer-sized unsigned)
.void, // 17
.cstring, // 18: thin null-terminated char*
};
for (&builtins) |info| {
table.infos.append(alloc, info) catch unreachable;
@@ -587,13 +590,14 @@ pub const TypeTable = struct {
.f32 => 4,
.f64 => 8,
.string => 16, // {ptr, len}
.cstring => 8, // one pointer
.pointer, .many_pointer, .function => 8,
.closure => 16, // {fn_ptr, env}
.optional => |opt| blk: {
// Sentinel-shaped optionals (pointer/closure/protocol) cost
// no extra storage — null reuses the payload's null state.
const child_info = self.get(opt.child);
if (child_info == .pointer or child_info == .many_pointer or child_info == .function) break :blk 8;
if (child_info == .pointer or child_info == .many_pointer or child_info == .function or child_info == .cstring) break :blk 8;
if (child_info == .closure) break :blk 16;
if (child_info == .@"struct" and child_info.@"struct".is_protocol) break :blk self.sizeOf(opt.child);
// Discriminated form: payload + has_value flag (8-aligned).
@@ -682,6 +686,7 @@ pub const TypeTable = struct {
if (ty == .i32 or ty == .u32 or ty == .f32) return 4;
if (ty == .i64 or ty == .u64 or ty == .f64) return 8;
if (ty == .usize or ty == .isize) return ptr_size;
if (ty == .cstring) return ptr_size;
if (ty == .string) return 16; // {ptr, i64} — always 16 (i64 alignment pads on wasm32)
if (ty == .any) return 16; // {i64 tag, i64 value} — Any boxed layout
if (ty.isBuiltin()) return ptr_size; // default for unknown builtins
@@ -783,6 +788,7 @@ pub const TypeTable = struct {
if (ty == .i64 or ty == .u64 or ty == .f64) return 8;
if (ty == .usize or ty == .isize) return ptr_align;
if (ty == .string) return 8; // i64 drives alignment
if (ty == .cstring) return ptr_align;
if (ty == .any) return 8; // {i64, i64} aligns to 8
if (ty.isBuiltin()) return ptr_align;
const info = self.get(ty);
@@ -853,6 +859,7 @@ pub const TypeTable = struct {
.f32 => "f32",
.f64 => "f64",
.string => "string",
.cstring => "cstring",
.any => "Any",
.noreturn => "noreturn",
.isize => "isize",
@@ -999,7 +1006,7 @@ fn hashTypeInfo(h: *std.hash.Wyhash, info: TypeInfo) void {
switch (info) {
.signed => |w| h.update(&.{w}),
.unsigned => |w| h.update(&.{w}),
.f32, .f64, .void, .bool, .string, .any, .noreturn, .usize, .isize, .unresolved => {},
.f32, .f64, .void, .bool, .string, .cstring, .any, .noreturn, .usize, .isize, .unresolved => {},
.pointer => |p| h.update(std.mem.asBytes(&p.pointee)),
.many_pointer => |p| h.update(std.mem.asBytes(&p.element)),
.slice => |s| h.update(std.mem.asBytes(&s.element)),
@@ -1070,7 +1077,7 @@ fn typeInfoEql(a: TypeInfo, b: TypeInfo) bool {
return switch (a) {
.signed => |w| w == b.signed,
.unsigned => |w| w == b.unsigned,
.f32, .f64, .void, .bool, .string, .any, .noreturn, .usize, .isize, .unresolved => true,
.f32, .f64, .void, .bool, .string, .cstring, .any, .noreturn, .usize, .isize, .unresolved => true,
.pointer => |p| p.pointee == b.pointer.pointee,
.many_pointer => |p| p.element == b.many_pointer.element,
.slice => |s| s.element == b.slice.element,

View File

@@ -20,6 +20,7 @@ pub const Type = union(enum) {
void_type,
boolean,
string_type,
cstring_type,
enum_type: []const u8,
struct_type: []const u8,
union_type: []const u8,
@@ -116,6 +117,7 @@ pub const Type = union(enum) {
if (name.len == 0) return null;
return switch (name[0]) {
's' => if (std.mem.eql(u8, name, "string")) .string_type else null,
'c' => if (std.mem.eql(u8, name, "cstring")) .cstring_type else null,
'u' => {
if (std.mem.eql(u8, name, "usize")) return .usize_type;
if (name.len >= 2) {
@@ -199,6 +201,7 @@ pub const Type = union(enum) {
.f64 => "f64",
.boolean => "bool",
.string_type => "string",
.cstring_type => "cstring",
.void_type => "void",
.usize_type => "usize",
.isize_type => "isize",
@@ -286,6 +289,7 @@ pub const Type = union(enum) {
.f64 => "f64",
.boolean => "bool",
.string_type => "string",
.cstring_type => "cstring",
.void_type => "void",
.any_type => "Any",
.usize_type => "usize",