fix(0128): foreign cstring returns + conflicting same-symbol bindings

Two genuine defects behind the 0128 filing (whose original repros were
both poisoned by binding getenv, which std already declares -> *u8):

1. Re-declaring a C symbol was silent first-wins: every call through
   the later declaration was typed by the older signature. Foreign
   registration now dedupes — equal signatures share one FuncId,
   conflicting ones are diagnosed.

2. Foreign -> string / -> ?string returns read garbage: C returns one
   char*, but the LLVM signature declared the fat {ptr,i64} (len =
   register garbage), and ?string was mis-declared SRET (the hidden
   out-pointer landed in the callee's first arg register). cstrRetKind
   now classifies such returns, declares them as plain ptr (never
   sret), and the call site synthesizes {ptr, strlen} via a
   branch-guarded strlen (NULL -> {null,0} / optional null), wrapping
   {string, i1} for ?string.

?[:0]u8 itself resolves fine (it is ?string); the spelling works in
return, param, local, and alias positions.

Regression: examples/1221 (plain + optional non-null + NULL paths) and
examples/1172 (conflict diagnostic); both FAIL pre-fix. The extern
dedupe collapses duplicate libc decls, so affected .ir snapshots were
regenerated. zig build test 426/426; run_examples 602/602;
distribution suite 21/21.
This commit is contained in:
agra
2026-06-12 14:13:01 +03:00
parent a8fbded567
commit d88bdd7242
50 changed files with 24903 additions and 28907 deletions

View File

@@ -1242,13 +1242,19 @@ pub const LLVMEmitter = struct {
// main always returns i32 at the LLVM level (JIT expects it)
const raw_ret_ty = self.toLLVMType(func.ret);
const needs_c_abi = func.is_extern or func.call_conv == .c;
// A foreign `-> string` / `-> ?string` receives ONE `char *` from C;
// the fat sx value is synthesized at the call site (emitCall's
// cstrReturnToSx). Never sret — the C callee knows nothing about an
// out-pointer.
const cstr_ret = self.cstrRetKind(func);
// sret return: C-ABI functions returning a >16 B non-HFA struct
// use the indirect-return convention (caller allocates space,
// passes its pointer as a hidden first arg with `sret(<T>)`,
// function writes through and returns void). Distinct from
// small-struct register coercion (i64 / [2 x i64]) and HFA.
const uses_sret = needs_c_abi and !is_main and self.needsByval(func.ret, raw_ret_ty);
const uses_sret = needs_c_abi and !is_main and cstr_ret == .none and self.needsByval(func.ret, raw_ret_ty);
const ret_ty = if (is_main) self.cached_i32
else if (cstr_ret != .none) self.cached_ptr
else if (uses_sret) self.cached_void
else if (needs_c_abi) self.abiCoerceParamTypeEx(func.ret, raw_ret_ty, func.is_extern)
else raw_ret_ty;
@@ -2244,6 +2250,69 @@ pub const LLVMEmitter = struct {
/// Coerce a call argument to match the expected parameter type.
/// Handles int width mismatches (trunc/ext), float width, and int↔float.
/// How a FOREIGN function's declared sx return maps onto a C `char *`:
/// `-> string` (.plain) and `-> ?string` (.optional) both receive one
/// pointer from C; everything else is `.none`. Keep `declareFunction`'s
/// signature building and `emitCall`'s result synthesis keyed on the
/// SAME classification or the ABI splits.
pub const CstrRet = enum { none, plain, optional };
pub fn cstrRetKind(self: *LLVMEmitter, func: *const Function) CstrRet {
if (!func.is_extern) return .none;
if (func.ret == .string) return .plain;
if (!func.ret.isBuiltin()) {
const info = self.ir_mod.types.get(func.ret);
if (info == .optional and info.optional.child == .string) return .optional;
}
return .none;
}
/// Build the sx-level value for a foreign call that returned a `char *`:
/// `{ptr, strlen(ptr)}` for `string` (NULL → `{null, 0}`), wrapped in
/// `{string, i1}` with `has = ptr != null` for `?string`. The strlen call
/// is branch-guarded — `select` would evaluate `strlen(NULL)`.
pub fn cstrReturnToSx(self: *LLVMEmitter, p: c.LLVMValueRef, optional: bool) c.LLVMValueRef {
const strlen_fn = c.LLVMGetNamedFunction(self.llvm_module, "strlen") orelse blk: {
var pt = [_]c.LLVMTypeRef{self.cached_ptr};
const ft = c.LLVMFunctionType(self.cached_i64, &pt, 1, 0);
break :blk c.LLVMAddFunction(self.llvm_module, "strlen", ft);
};
const strlen_ty = c.LLVMGlobalGetValueType(strlen_fn);
const cur_fn = c.LLVMGetBasicBlockParent(c.LLVMGetInsertBlock(self.builder));
const entry_bb = c.LLVMGetInsertBlock(self.builder);
const len_bb = c.LLVMAppendBasicBlockInContext(self.context, cur_fn, "cstr.len");
const join_bb = c.LLVMAppendBasicBlockInContext(self.context, cur_fn, "cstr.join");
const is_null = c.LLVMBuildICmp(self.builder, c.LLVMIntEQ, p, c.LLVMConstNull(self.cached_ptr), "cstr.isnull");
_ = c.LLVMBuildCondBr(self.builder, is_null, join_bb, len_bb);
c.LLVMPositionBuilderAtEnd(self.builder, len_bb);
var sargs = [_]c.LLVMValueRef{p};
const n = c.LLVMBuildCall2(self.builder, strlen_ty, strlen_fn, &sargs, 1, "cstr.n");
_ = c.LLVMBuildBr(self.builder, join_bb);
c.LLVMPositionBuilderAtEnd(self.builder, join_bb);
const len_phi = c.LLVMBuildPhi(self.builder, self.cached_i64, "cstr.lenphi");
var ivals = [_]c.LLVMValueRef{ c.LLVMConstInt(self.cached_i64, 0, 0), n };
var ibbs = [_]c.LLVMBasicBlockRef{ entry_bb, len_bb };
c.LLVMAddIncoming(len_phi, &ivals, &ibbs, 2);
const str_ty = self.getStringStructType();
var s = c.LLVMGetUndef(str_ty);
s = c.LLVMBuildInsertValue(self.builder, s, p, 0, "cstr.sp");
s = c.LLVMBuildInsertValue(self.builder, s, len_phi, 1, "cstr.sv");
if (!optional) return s;
var ofields = [_]c.LLVMTypeRef{ str_ty, self.cached_i1 };
const opt_ty = c.LLVMStructTypeInContext(self.context, &ofields, 2, 0);
const has = c.LLVMBuildNot(self.builder, is_null, "cstr.has");
var o = c.LLVMGetUndef(opt_ty);
o = c.LLVMBuildInsertValue(self.builder, o, s, 0, "cstr.ov");
o = c.LLVMBuildInsertValue(self.builder, o, has, 1, "cstr.opt");
return o;
}
pub fn coerceArg(self: *LLVMEmitter, val: c.LLVMValueRef, param_ty: c.LLVMTypeRef) c.LLVMValueRef {
const val_ty = c.LLVMTypeOf(val);
if (val_ty == param_ty) return val;