fix(0128): foreign cstring returns + conflicting same-symbol bindings

Two genuine defects behind the 0128 filing (whose original repros were
both poisoned by binding getenv, which std already declares -> *u8):

1. Re-declaring a C symbol was silent first-wins: every call through
   the later declaration was typed by the older signature. Foreign
   registration now dedupes — equal signatures share one FuncId,
   conflicting ones are diagnosed.

2. Foreign -> string / -> ?string returns read garbage: C returns one
   char*, but the LLVM signature declared the fat {ptr,i64} (len =
   register garbage), and ?string was mis-declared SRET (the hidden
   out-pointer landed in the callee's first arg register). cstrRetKind
   now classifies such returns, declares them as plain ptr (never
   sret), and the call site synthesizes {ptr, strlen} via a
   branch-guarded strlen (NULL -> {null,0} / optional null), wrapping
   {string, i1} for ?string.

?[:0]u8 itself resolves fine (it is ?string); the spelling works in
return, param, local, and alias positions.

Regression: examples/1221 (plain + optional non-null + NULL paths) and
examples/1172 (conflict diagnostic); both FAIL pre-fix. The extern
dedupe collapses duplicate libc decls, so affected .ir snapshots were
regenerated. zig build test 426/426; run_examples 602/602;
distribution suite 21/21.
This commit is contained in:
agra
2026-06-12 14:13:01 +03:00
parent a8fbded567
commit d88bdd7242
50 changed files with 24903 additions and 28907 deletions

View File

@@ -804,7 +804,10 @@ pub const Ops = struct {
};
const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c;
const callee_raw_ret = self.e.toLLVMType(callee_func.ret);
const callee_uses_sret = callee_needs_c_abi and self.e.needsByval(callee_func.ret, callee_raw_ret);
// Foreign string/?string returns receive one `char *` — never sret
// (must mirror declareFunction's signature classification).
const cstr_ret = self.e.cstrRetKind(callee_func);
const callee_uses_sret = callee_needs_c_abi and cstr_ret == .none and self.e.needsByval(callee_func.ret, callee_raw_ret);
// When the callee uses sret, prepend an alloca for the result.
// Index alignment: actual_args[0] = sret_slot; actual_args[i+1] = sx arg i.
@@ -859,6 +862,10 @@ pub const Ops = struct {
c.LLVMAddCallSiteAttribute(result, param1_idx, sret_attr);
// Load the actual struct value the callee wrote into the slot.
result = c.LLVMBuildLoad2(self.e.builder, callee_raw_ret, sret_slot, "sret.load");
} else if (!call_is_void_like and cstr_ret != .none) {
// The C side returned `char *`; build the fat sx string (and the
// optional wrapper) from it.
result = self.e.cstrReturnToSx(result, cstr_ret == .optional);
} else if (!call_is_void_like and callee_func.is_extern) {
// Coerce ABI return value (e.g. i64 / [2 x i64]) back to IR struct type if needed
const expected_ty = self.e.toLLVMType(instruction.ty);

View File

@@ -1242,13 +1242,19 @@ pub const LLVMEmitter = struct {
// main always returns i32 at the LLVM level (JIT expects it)
const raw_ret_ty = self.toLLVMType(func.ret);
const needs_c_abi = func.is_extern or func.call_conv == .c;
// A foreign `-> string` / `-> ?string` receives ONE `char *` from C;
// the fat sx value is synthesized at the call site (emitCall's
// cstrReturnToSx). Never sret — the C callee knows nothing about an
// out-pointer.
const cstr_ret = self.cstrRetKind(func);
// sret return: C-ABI functions returning a >16 B non-HFA struct
// use the indirect-return convention (caller allocates space,
// passes its pointer as a hidden first arg with `sret(<T>)`,
// function writes through and returns void). Distinct from
// small-struct register coercion (i64 / [2 x i64]) and HFA.
const uses_sret = needs_c_abi and !is_main and self.needsByval(func.ret, raw_ret_ty);
const uses_sret = needs_c_abi and !is_main and cstr_ret == .none and self.needsByval(func.ret, raw_ret_ty);
const ret_ty = if (is_main) self.cached_i32
else if (cstr_ret != .none) self.cached_ptr
else if (uses_sret) self.cached_void
else if (needs_c_abi) self.abiCoerceParamTypeEx(func.ret, raw_ret_ty, func.is_extern)
else raw_ret_ty;
@@ -2244,6 +2250,69 @@ pub const LLVMEmitter = struct {
/// Coerce a call argument to match the expected parameter type.
/// Handles int width mismatches (trunc/ext), float width, and int↔float.
/// How a FOREIGN function's declared sx return maps onto a C `char *`:
/// `-> string` (.plain) and `-> ?string` (.optional) both receive one
/// pointer from C; everything else is `.none`. Keep `declareFunction`'s
/// signature building and `emitCall`'s result synthesis keyed on the
/// SAME classification or the ABI splits.
pub const CstrRet = enum { none, plain, optional };
pub fn cstrRetKind(self: *LLVMEmitter, func: *const Function) CstrRet {
if (!func.is_extern) return .none;
if (func.ret == .string) return .plain;
if (!func.ret.isBuiltin()) {
const info = self.ir_mod.types.get(func.ret);
if (info == .optional and info.optional.child == .string) return .optional;
}
return .none;
}
/// Build the sx-level value for a foreign call that returned a `char *`:
/// `{ptr, strlen(ptr)}` for `string` (NULL → `{null, 0}`), wrapped in
/// `{string, i1}` with `has = ptr != null` for `?string`. The strlen call
/// is branch-guarded — `select` would evaluate `strlen(NULL)`.
pub fn cstrReturnToSx(self: *LLVMEmitter, p: c.LLVMValueRef, optional: bool) c.LLVMValueRef {
const strlen_fn = c.LLVMGetNamedFunction(self.llvm_module, "strlen") orelse blk: {
var pt = [_]c.LLVMTypeRef{self.cached_ptr};
const ft = c.LLVMFunctionType(self.cached_i64, &pt, 1, 0);
break :blk c.LLVMAddFunction(self.llvm_module, "strlen", ft);
};
const strlen_ty = c.LLVMGlobalGetValueType(strlen_fn);
const cur_fn = c.LLVMGetBasicBlockParent(c.LLVMGetInsertBlock(self.builder));
const entry_bb = c.LLVMGetInsertBlock(self.builder);
const len_bb = c.LLVMAppendBasicBlockInContext(self.context, cur_fn, "cstr.len");
const join_bb = c.LLVMAppendBasicBlockInContext(self.context, cur_fn, "cstr.join");
const is_null = c.LLVMBuildICmp(self.builder, c.LLVMIntEQ, p, c.LLVMConstNull(self.cached_ptr), "cstr.isnull");
_ = c.LLVMBuildCondBr(self.builder, is_null, join_bb, len_bb);
c.LLVMPositionBuilderAtEnd(self.builder, len_bb);
var sargs = [_]c.LLVMValueRef{p};
const n = c.LLVMBuildCall2(self.builder, strlen_ty, strlen_fn, &sargs, 1, "cstr.n");
_ = c.LLVMBuildBr(self.builder, join_bb);
c.LLVMPositionBuilderAtEnd(self.builder, join_bb);
const len_phi = c.LLVMBuildPhi(self.builder, self.cached_i64, "cstr.lenphi");
var ivals = [_]c.LLVMValueRef{ c.LLVMConstInt(self.cached_i64, 0, 0), n };
var ibbs = [_]c.LLVMBasicBlockRef{ entry_bb, len_bb };
c.LLVMAddIncoming(len_phi, &ivals, &ibbs, 2);
const str_ty = self.getStringStructType();
var s = c.LLVMGetUndef(str_ty);
s = c.LLVMBuildInsertValue(self.builder, s, p, 0, "cstr.sp");
s = c.LLVMBuildInsertValue(self.builder, s, len_phi, 1, "cstr.sv");
if (!optional) return s;
var ofields = [_]c.LLVMTypeRef{ str_ty, self.cached_i1 };
const opt_ty = c.LLVMStructTypeInContext(self.context, &ofields, 2, 0);
const has = c.LLVMBuildNot(self.builder, is_null, "cstr.has");
var o = c.LLVMGetUndef(opt_ty);
o = c.LLVMBuildInsertValue(self.builder, o, s, 0, "cstr.ov");
o = c.LLVMBuildInsertValue(self.builder, o, has, 1, "cstr.opt");
return o;
}
pub fn coerceArg(self: *LLVMEmitter, val: c.LLVMValueRef, param_ty: c.LLVMTypeRef) c.LLVMValueRef {
const val_ty = c.LLVMTypeOf(val);
if (val_ty == param_ty) return val;

View File

@@ -1888,6 +1888,7 @@ pub const Lowering = struct {
pub const findTaggedVariant = lower_expr.findTaggedVariant;
pub const emitBadVariant = lower_expr.emitBadVariant;
pub const emitBadEnumVariant = lower_expr.emitBadEnumVariant;
pub const dedupeForeignSymbol = lower_decl.dedupeForeignSymbol;
pub const resolveVariantValue = lower_expr.resolveVariantValue;
pub const resolveVariantIndex = lower_expr.resolveVariantIndex;
pub const lowerArrayLiteral = lower_expr.lowerArrayLiteral;

View File

@@ -2020,6 +2020,37 @@ fn returnGenericLeaf(node: *const Node) ?[]const u8 {
}
/// Declare a function as an extern stub (signature only, no body).
/// The same C SYMBOL declared more than once (two modules binding the same
/// libc function, or a rename colliding with an existing binding): an EQUAL
/// signature shares the first registration; a CONFLICTING one is diagnosed —
/// silently letting the first registration win mis-types every call through
/// the later declaration (a `-> string` view of a symbol registered `-> *u8`
/// reads the wrong shape; issue 0128). True = handled (shared or diagnosed),
/// caller must not declare again.
pub fn dedupeForeignSymbol(self: *Lowering, fd: *const ast.FnDecl, sym_name: StringId, params: []const Function.Param, ret_ty: TypeId) bool {
for (self.module.functions.items, 0..) |*func, i| {
if (func.name != sym_name or !func.is_extern) continue;
var same = func.ret == ret_ty and func.params.len == params.len;
if (same) {
for (func.params, params) |a, b| {
if (a.ty != b.ty) {
same = false;
break;
}
}
}
if (same) {
self.fn_decl_fids.put(fd, FuncId.fromIndex(@intCast(i))) catch {};
return true;
}
if (self.diagnostics) |d| {
d.addFmt(.err, fd.body.span, "foreign symbol '{s}' is already bound with a different signature; two views of one C symbol must declare identical types", .{self.module.types.getString(sym_name)});
}
return true;
}
return false;
}
pub fn declareFunction(self: *Lowering, fd: *const ast.FnDecl, name: []const u8) void {
// Skip generic templates — they're monomorphized on demand, not declared as extern
if (fd.type_params.len > 0) return;
@@ -2083,6 +2114,10 @@ pub fn declareFunction(self: *Lowering, fd: *const ast.FnDecl, name: []const u8)
const fe = fd.body.data.foreign_expr;
if (fe.c_name) |c_name| {
const c_name_id = self.module.types.internString(c_name);
if (self.dedupeForeignSymbol(fd, c_name_id, params.items, ret_ty)) {
self.foreign_name_map.put(name, c_name) catch {};
return;
}
const fid = self.builder.declareExtern(c_name_id, params.items, ret_ty);
const func = self.module.getFunctionMut(fid);
func.call_conv = cc;
@@ -2096,6 +2131,7 @@ pub fn declareFunction(self: *Lowering, fd: *const ast.FnDecl, name: []const u8)
}
const name_id = self.module.types.internString(name);
if (is_foreign and self.dedupeForeignSymbol(fd, name_id, params.items, ret_ty)) return;
const fid = self.builder.declareExtern(name_id, params.items, ret_ty);
const func = self.module.getFunctionMut(fid);
func.call_conv = cc;