fix(0128): foreign cstring returns + conflicting same-symbol bindings
Two genuine defects behind the 0128 filing (whose original repros were
both poisoned by binding getenv, which std already declares -> *u8):
1. Re-declaring a C symbol was silent first-wins: every call through
the later declaration was typed by the older signature. Foreign
registration now dedupes — equal signatures share one FuncId,
conflicting ones are diagnosed.
2. Foreign -> string / -> ?string returns read garbage: C returns one
char*, but the LLVM signature declared the fat {ptr,i64} (len =
register garbage), and ?string was mis-declared SRET (the hidden
out-pointer landed in the callee's first arg register). cstrRetKind
now classifies such returns, declares them as plain ptr (never
sret), and the call site synthesizes {ptr, strlen} via a
branch-guarded strlen (NULL -> {null,0} / optional null), wrapping
{string, i1} for ?string.
?[:0]u8 itself resolves fine (it is ?string); the spelling works in
return, param, local, and alias positions.
Regression: examples/1221 (plain + optional non-null + NULL paths) and
examples/1172 (conflict diagnostic); both FAIL pre-fix. The extern
dedupe collapses duplicate libc decls, so affected .ir snapshots were
regenerated. zig build test 426/426; run_examples 602/602;
distribution suite 21/21.
This commit is contained in:
@@ -804,7 +804,10 @@ pub const Ops = struct {
|
||||
};
|
||||
const callee_needs_c_abi = callee_func.is_extern or callee_func.call_conv == .c;
|
||||
const callee_raw_ret = self.e.toLLVMType(callee_func.ret);
|
||||
const callee_uses_sret = callee_needs_c_abi and self.e.needsByval(callee_func.ret, callee_raw_ret);
|
||||
// Foreign string/?string returns receive one `char *` — never sret
|
||||
// (must mirror declareFunction's signature classification).
|
||||
const cstr_ret = self.e.cstrRetKind(callee_func);
|
||||
const callee_uses_sret = callee_needs_c_abi and cstr_ret == .none and self.e.needsByval(callee_func.ret, callee_raw_ret);
|
||||
|
||||
// When the callee uses sret, prepend an alloca for the result.
|
||||
// Index alignment: actual_args[0] = sret_slot; actual_args[i+1] = sx arg i.
|
||||
@@ -859,6 +862,10 @@ pub const Ops = struct {
|
||||
c.LLVMAddCallSiteAttribute(result, param1_idx, sret_attr);
|
||||
// Load the actual struct value the callee wrote into the slot.
|
||||
result = c.LLVMBuildLoad2(self.e.builder, callee_raw_ret, sret_slot, "sret.load");
|
||||
} else if (!call_is_void_like and cstr_ret != .none) {
|
||||
// The C side returned `char *`; build the fat sx string (and the
|
||||
// optional wrapper) from it.
|
||||
result = self.e.cstrReturnToSx(result, cstr_ret == .optional);
|
||||
} else if (!call_is_void_like and callee_func.is_extern) {
|
||||
// Coerce ABI return value (e.g. i64 / [2 x i64]) back to IR struct type if needed
|
||||
const expected_ty = self.e.toLLVMType(instruction.ty);
|
||||
|
||||
@@ -1242,13 +1242,19 @@ pub const LLVMEmitter = struct {
|
||||
// main always returns i32 at the LLVM level (JIT expects it)
|
||||
const raw_ret_ty = self.toLLVMType(func.ret);
|
||||
const needs_c_abi = func.is_extern or func.call_conv == .c;
|
||||
// A foreign `-> string` / `-> ?string` receives ONE `char *` from C;
|
||||
// the fat sx value is synthesized at the call site (emitCall's
|
||||
// cstrReturnToSx). Never sret — the C callee knows nothing about an
|
||||
// out-pointer.
|
||||
const cstr_ret = self.cstrRetKind(func);
|
||||
// sret return: C-ABI functions returning a >16 B non-HFA struct
|
||||
// use the indirect-return convention (caller allocates space,
|
||||
// passes its pointer as a hidden first arg with `sret(<T>)`,
|
||||
// function writes through and returns void). Distinct from
|
||||
// small-struct register coercion (i64 / [2 x i64]) and HFA.
|
||||
const uses_sret = needs_c_abi and !is_main and self.needsByval(func.ret, raw_ret_ty);
|
||||
const uses_sret = needs_c_abi and !is_main and cstr_ret == .none and self.needsByval(func.ret, raw_ret_ty);
|
||||
const ret_ty = if (is_main) self.cached_i32
|
||||
else if (cstr_ret != .none) self.cached_ptr
|
||||
else if (uses_sret) self.cached_void
|
||||
else if (needs_c_abi) self.abiCoerceParamTypeEx(func.ret, raw_ret_ty, func.is_extern)
|
||||
else raw_ret_ty;
|
||||
@@ -2244,6 +2250,69 @@ pub const LLVMEmitter = struct {
|
||||
|
||||
/// Coerce a call argument to match the expected parameter type.
|
||||
/// Handles int width mismatches (trunc/ext), float width, and int↔float.
|
||||
/// How a FOREIGN function's declared sx return maps onto a C `char *`:
|
||||
/// `-> string` (.plain) and `-> ?string` (.optional) both receive one
|
||||
/// pointer from C; everything else is `.none`. Keep `declareFunction`'s
|
||||
/// signature building and `emitCall`'s result synthesis keyed on the
|
||||
/// SAME classification or the ABI splits.
|
||||
pub const CstrRet = enum { none, plain, optional };
|
||||
|
||||
pub fn cstrRetKind(self: *LLVMEmitter, func: *const Function) CstrRet {
|
||||
if (!func.is_extern) return .none;
|
||||
if (func.ret == .string) return .plain;
|
||||
if (!func.ret.isBuiltin()) {
|
||||
const info = self.ir_mod.types.get(func.ret);
|
||||
if (info == .optional and info.optional.child == .string) return .optional;
|
||||
}
|
||||
return .none;
|
||||
}
|
||||
|
||||
/// Build the sx-level value for a foreign call that returned a `char *`:
|
||||
/// `{ptr, strlen(ptr)}` for `string` (NULL → `{null, 0}`), wrapped in
|
||||
/// `{string, i1}` with `has = ptr != null` for `?string`. The strlen call
|
||||
/// is branch-guarded — `select` would evaluate `strlen(NULL)`.
|
||||
pub fn cstrReturnToSx(self: *LLVMEmitter, p: c.LLVMValueRef, optional: bool) c.LLVMValueRef {
|
||||
const strlen_fn = c.LLVMGetNamedFunction(self.llvm_module, "strlen") orelse blk: {
|
||||
var pt = [_]c.LLVMTypeRef{self.cached_ptr};
|
||||
const ft = c.LLVMFunctionType(self.cached_i64, &pt, 1, 0);
|
||||
break :blk c.LLVMAddFunction(self.llvm_module, "strlen", ft);
|
||||
};
|
||||
const strlen_ty = c.LLVMGlobalGetValueType(strlen_fn);
|
||||
|
||||
const cur_fn = c.LLVMGetBasicBlockParent(c.LLVMGetInsertBlock(self.builder));
|
||||
const entry_bb = c.LLVMGetInsertBlock(self.builder);
|
||||
const len_bb = c.LLVMAppendBasicBlockInContext(self.context, cur_fn, "cstr.len");
|
||||
const join_bb = c.LLVMAppendBasicBlockInContext(self.context, cur_fn, "cstr.join");
|
||||
|
||||
const is_null = c.LLVMBuildICmp(self.builder, c.LLVMIntEQ, p, c.LLVMConstNull(self.cached_ptr), "cstr.isnull");
|
||||
_ = c.LLVMBuildCondBr(self.builder, is_null, join_bb, len_bb);
|
||||
|
||||
c.LLVMPositionBuilderAtEnd(self.builder, len_bb);
|
||||
var sargs = [_]c.LLVMValueRef{p};
|
||||
const n = c.LLVMBuildCall2(self.builder, strlen_ty, strlen_fn, &sargs, 1, "cstr.n");
|
||||
_ = c.LLVMBuildBr(self.builder, join_bb);
|
||||
|
||||
c.LLVMPositionBuilderAtEnd(self.builder, join_bb);
|
||||
const len_phi = c.LLVMBuildPhi(self.builder, self.cached_i64, "cstr.lenphi");
|
||||
var ivals = [_]c.LLVMValueRef{ c.LLVMConstInt(self.cached_i64, 0, 0), n };
|
||||
var ibbs = [_]c.LLVMBasicBlockRef{ entry_bb, len_bb };
|
||||
c.LLVMAddIncoming(len_phi, &ivals, &ibbs, 2);
|
||||
|
||||
const str_ty = self.getStringStructType();
|
||||
var s = c.LLVMGetUndef(str_ty);
|
||||
s = c.LLVMBuildInsertValue(self.builder, s, p, 0, "cstr.sp");
|
||||
s = c.LLVMBuildInsertValue(self.builder, s, len_phi, 1, "cstr.sv");
|
||||
if (!optional) return s;
|
||||
|
||||
var ofields = [_]c.LLVMTypeRef{ str_ty, self.cached_i1 };
|
||||
const opt_ty = c.LLVMStructTypeInContext(self.context, &ofields, 2, 0);
|
||||
const has = c.LLVMBuildNot(self.builder, is_null, "cstr.has");
|
||||
var o = c.LLVMGetUndef(opt_ty);
|
||||
o = c.LLVMBuildInsertValue(self.builder, o, s, 0, "cstr.ov");
|
||||
o = c.LLVMBuildInsertValue(self.builder, o, has, 1, "cstr.opt");
|
||||
return o;
|
||||
}
|
||||
|
||||
pub fn coerceArg(self: *LLVMEmitter, val: c.LLVMValueRef, param_ty: c.LLVMTypeRef) c.LLVMValueRef {
|
||||
const val_ty = c.LLVMTypeOf(val);
|
||||
if (val_ty == param_ty) return val;
|
||||
|
||||
@@ -1888,6 +1888,7 @@ pub const Lowering = struct {
|
||||
pub const findTaggedVariant = lower_expr.findTaggedVariant;
|
||||
pub const emitBadVariant = lower_expr.emitBadVariant;
|
||||
pub const emitBadEnumVariant = lower_expr.emitBadEnumVariant;
|
||||
pub const dedupeForeignSymbol = lower_decl.dedupeForeignSymbol;
|
||||
pub const resolveVariantValue = lower_expr.resolveVariantValue;
|
||||
pub const resolveVariantIndex = lower_expr.resolveVariantIndex;
|
||||
pub const lowerArrayLiteral = lower_expr.lowerArrayLiteral;
|
||||
|
||||
@@ -2020,6 +2020,37 @@ fn returnGenericLeaf(node: *const Node) ?[]const u8 {
|
||||
}
|
||||
|
||||
/// Declare a function as an extern stub (signature only, no body).
|
||||
/// The same C SYMBOL declared more than once (two modules binding the same
|
||||
/// libc function, or a rename colliding with an existing binding): an EQUAL
|
||||
/// signature shares the first registration; a CONFLICTING one is diagnosed —
|
||||
/// silently letting the first registration win mis-types every call through
|
||||
/// the later declaration (a `-> string` view of a symbol registered `-> *u8`
|
||||
/// reads the wrong shape; issue 0128). True = handled (shared or diagnosed),
|
||||
/// caller must not declare again.
|
||||
pub fn dedupeForeignSymbol(self: *Lowering, fd: *const ast.FnDecl, sym_name: StringId, params: []const Function.Param, ret_ty: TypeId) bool {
|
||||
for (self.module.functions.items, 0..) |*func, i| {
|
||||
if (func.name != sym_name or !func.is_extern) continue;
|
||||
var same = func.ret == ret_ty and func.params.len == params.len;
|
||||
if (same) {
|
||||
for (func.params, params) |a, b| {
|
||||
if (a.ty != b.ty) {
|
||||
same = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (same) {
|
||||
self.fn_decl_fids.put(fd, FuncId.fromIndex(@intCast(i))) catch {};
|
||||
return true;
|
||||
}
|
||||
if (self.diagnostics) |d| {
|
||||
d.addFmt(.err, fd.body.span, "foreign symbol '{s}' is already bound with a different signature; two views of one C symbol must declare identical types", .{self.module.types.getString(sym_name)});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn declareFunction(self: *Lowering, fd: *const ast.FnDecl, name: []const u8) void {
|
||||
// Skip generic templates — they're monomorphized on demand, not declared as extern
|
||||
if (fd.type_params.len > 0) return;
|
||||
@@ -2083,6 +2114,10 @@ pub fn declareFunction(self: *Lowering, fd: *const ast.FnDecl, name: []const u8)
|
||||
const fe = fd.body.data.foreign_expr;
|
||||
if (fe.c_name) |c_name| {
|
||||
const c_name_id = self.module.types.internString(c_name);
|
||||
if (self.dedupeForeignSymbol(fd, c_name_id, params.items, ret_ty)) {
|
||||
self.foreign_name_map.put(name, c_name) catch {};
|
||||
return;
|
||||
}
|
||||
const fid = self.builder.declareExtern(c_name_id, params.items, ret_ty);
|
||||
const func = self.module.getFunctionMut(fid);
|
||||
func.call_conv = cc;
|
||||
@@ -2096,6 +2131,7 @@ pub fn declareFunction(self: *Lowering, fd: *const ast.FnDecl, name: []const u8)
|
||||
}
|
||||
|
||||
const name_id = self.module.types.internString(name);
|
||||
if (is_foreign and self.dedupeForeignSymbol(fd, name_id, params.items, ret_ty)) return;
|
||||
const fid = self.builder.declareExtern(name_id, params.items, ret_ty);
|
||||
const func = self.module.getFunctionMut(fid);
|
||||
func.call_conv = cc;
|
||||
|
||||
Reference in New Issue
Block a user