fix(ir): halt cleanly when a global initializer can't be serialized

The global-init constant serializers in emit_llvm.zig printed a diagnostic
on an unserializable value and then RETURNED an undef/null placeholder and
CONTINUED emitting. For a comptime `#run` global that yields a function
reference (`fp :: #run pick();` where pick returns a function), the build
fell through to the JIT and segfaulted calling through the undef pointer
(exit 134) — a silent miscompile dressed up as a printed error.

Route every genuine bail in the serialization family through a new
`failGlobalInit` helper: it sets `comptime_failed` (so core.generateCode
aborts with a non-zero exit after emit()) and returns an undef placeholder
that never ships, because the halt fires before object emission / JIT. This
covers the comptime func_ref leaf, the require_resolved aggregate func_ref
leaf, the top-level + vtable func_ref globals, the comptime-init catch, and
the remaining heap-walk / aggregate-shape bails. Unresolved-function
diagnostics now name the function instead of its (stdlib-unstable) IR index.

The require_resolved=false Pass-0 placeholder is unchanged (func_map is
empty until Pass 1; the aggregate is re-emitted with require_resolved=true).

Regression: examples/1128-diagnostics-comptime-global-funcref-rejected.sx —
a `#run` global returning a function ref now exits 1 with the diagnostic
(was: exit 134 segfault). Fail-before/pass-after verified.
This commit is contained in:
agra
2026-06-04 05:25:19 +03:00
parent 263333bd26
commit d87bad2ec4
5 changed files with 78 additions and 19 deletions

View File

@@ -0,0 +1,26 @@
// A comptime `#run` global initializer that yields a function reference cannot
// be serialized to a static constant: at global-init time (Pass 0) functions
// are not yet declared, and the comptime serialization path has no later
// re-emit, so the func_ref can never resolve to a real function pointer. The
// compiler must reject this with a diagnostic AND a CLEAN non-zero exit — never
// print the error and then fall through into an undef initializer that crashes
// (pre-fix: the diagnostic printed, emission continued, and the JIT segfaulted
// calling through the undef pointer → exit 134).
// Regression (issue 0079 follow-up): every global-init serialization bail now
// routes through `failGlobalInit`, which sets the halt flag so the driver aborts
// after emit() instead of shipping the placeholder.
// Expected: "comptime init of 'fp' produced a reference to function 'add'…";
// exit 1, no segfault.
#import "modules/std.sx";
add :: (a: s32, b: s32) -> s32 { a + b }
pick :: () -> (s32, s32) -> s32 { return add; }
fp :: #run pick();
main :: () -> s32 {
print("{}\n", fp(3, 4));
return 0;
}

View File

@@ -0,0 +1 @@
error: comptime init of 'fp' produced a reference to function 'add', which cannot be serialized as a static constant (function declarations are not available at global-init time)

View File

@@ -106,8 +106,11 @@ pub const LLVMEmitter = struct {
// IR Module being emitted // IR Module being emitted
ir_mod: *const Module, ir_mod: *const Module,
// Set when a comptime `#run` raised an unhandled error (E5.2). The driver // Set when a comptime `#run` raised an unhandled error (E5.2), or when a
// (core.generateCode) aborts with a non-zero exit after emit() when set. // global initializer could not be serialized to a valid static constant.
// The driver (core.generateCode) aborts with a non-zero exit after emit()
// when set, so an invalid/placeholder initializer never reaches the object
// file or the JIT — the emit-time diagnostic is the surfaced error.
comptime_failed: bool = false, comptime_failed: bool = false,
// Allocator for temporary bookkeeping // Allocator for temporary bookkeeping
@@ -875,6 +878,7 @@ pub const LLVMEmitter = struct {
const sep: []const u8 = if (detail.len > 0) ": " else ""; const sep: []const u8 = if (detail.len > 0) ": " else "";
const gname = self.ir_mod.types.getString(global.name); const gname = self.ir_mod.types.getString(global.name);
std.debug.print("error: comptime init of '{s}' failed: {s} (op={s}{s}{s})\n", .{ gname, @errorName(err), op, sep, detail }); std.debug.print("error: comptime init of '{s}' failed: {s} (op={s}{s}{s})\n", .{ gname, @errorName(err), op, sep, detail });
self.comptime_failed = true;
break :blk .void_val; break :blk .void_val;
}; };
// A bare failable `NAME :: #run f();`: the comptime function // A bare failable `NAME :: #run f();`: the comptime function
@@ -936,7 +940,14 @@ pub const LLVMEmitter = struct {
defer field_vals.deinit(self.alloc); defer field_vals.deinit(self.alloc);
for (func_ids) |fid| { for (func_ids) |fid| {
const llvm_func = self.func_map.get(fid.index()) orelse { const llvm_func = self.func_map.get(fid.index()) orelse {
std.debug.print(
"error: vtable global '{s}' references function '{s}' which has no declaration\n",
.{ self.ir_mod.types.getString(global.name), self.ir_mod.types.getString(self.ir_mod.getFunction(fid).name) },
);
// Keep the struct shape so module construction can
// finish; comptime_failed halts before it ships.
field_vals.append(self.alloc, c.LLVMConstNull(self.cached_ptr)) catch unreachable; field_vals.append(self.alloc, c.LLVMConstNull(self.cached_ptr)) catch unreachable;
self.comptime_failed = true;
continue; continue;
}; };
field_vals.append(self.alloc, llvm_func) catch unreachable; field_vals.append(self.alloc, llvm_func) catch unreachable;
@@ -957,9 +968,10 @@ pub const LLVMEmitter = struct {
.func_ref => |fid| { .func_ref => |fid| {
const llvm_func = self.func_map.get(fid.index()) orelse { const llvm_func = self.func_map.get(fid.index()) orelse {
std.debug.print( std.debug.print(
"error: global '{s}' references function #{d} which has no declaration\n", "error: global '{s}' references function '{s}' which has no declaration\n",
.{ self.ir_mod.types.getString(global.name), fid.index() }, .{ self.ir_mod.types.getString(global.name), self.ir_mod.types.getString(self.ir_mod.getFunction(fid).name) },
); );
self.comptime_failed = true;
continue; continue;
}; };
c.LLVMSetInitializer(llvm_global, llvm_func); c.LLVMSetInitializer(llvm_global, llvm_func);
@@ -981,6 +993,17 @@ pub const LLVMEmitter = struct {
return ptr[0..len]; return ptr[0..len];
} }
/// Record that a global initializer could not be serialized to a valid
/// static constant: set the halt flag (the driver aborts with a non-zero
/// exit after `emit()`) and return an `undef` placeholder so in-process
/// LLVM module construction can finish without tripping over an invalid
/// value before the halt is observed. The placeholder is never shipped —
/// `comptime_failed` guarantees we stop before object emission / JIT.
fn failGlobalInit(self: *LLVMEmitter, llvm_ty: c.LLVMTypeRef) c.LLVMValueRef {
self.comptime_failed = true;
return c.LLVMGetUndef(llvm_ty);
}
/// Serialize an interp `Value` to an LLVM constant for use as a static /// Serialize an interp `Value` to an LLVM constant for use as a static
/// global initializer. `ty` is the IR-level type of the destination; /// global initializer. `ty` is the IR-level type of the destination;
/// the LLVM type is derived from it. `interp` gives access to the /// the LLVM type is derived from it. `interp` gives access to the
@@ -988,8 +1011,10 @@ pub const LLVMEmitter = struct {
/// is included in any diagnostic the path produces so the user can /// is included in any diagnostic the path produces so the user can
/// locate the offending `#run` site. /// locate the offending `#run` site.
/// ///
/// Returns `LLVMGetUndef` on bail — the build continues so adjacent /// On bail, prints the diagnostic and routes through `failGlobalInit`
/// constants can still emit, but the diagnostic makes the problem clear. /// (sets `comptime_failed`, returns `undef`): the in-process module
/// finishes constructing, but the driver halts with a non-zero exit
/// before object emission / JIT, so the placeholder never ships.
fn valueToLLVMConst( fn valueToLLVMConst(
self: *LLVMEmitter, self: *LLVMEmitter,
val: Value, val: Value,
@@ -1015,7 +1040,7 @@ pub const LLVMEmitter = struct {
"error: comptime init of '{s}' produced a raw integer for a pointer field — needs IR-typed heap-walk serialization (Phase 1.4a heap-walk follow-up)\n", "error: comptime init of '{s}' produced a raw integer for a pointer field — needs IR-typed heap-walk serialization (Phase 1.4a heap-walk follow-up)\n",
.{global_name}, .{global_name},
); );
break :blk c.LLVMGetUndef(llvm_ty); break :blk self.failGlobalInit(llvm_ty);
} }
break :blk c.LLVMConstInt(llvm_ty, @bitCast(v), 1); break :blk c.LLVMConstInt(llvm_ty, @bitCast(v), 1);
}, },
@@ -1029,10 +1054,10 @@ pub const LLVMEmitter = struct {
// bail loudly rather than ship a silently-null function pointer. // bail loudly rather than ship a silently-null function pointer.
.func_ref => |fid| blk: { .func_ref => |fid| blk: {
std.debug.print( std.debug.print(
"error: comptime init of '{s}' produced a reference to function #{d}, which cannot be serialized as a static constant (function declarations are not available at global-init time)\n", "error: comptime init of '{s}' produced a reference to function '{s}', which cannot be serialized as a static constant (function declarations are not available at global-init time)\n",
.{ global_name, fid.index() }, .{ global_name, self.ir_mod.types.getString(self.ir_mod.getFunction(fid).name) },
); );
break :blk c.LLVMGetUndef(llvm_ty); break :blk self.failGlobalInit(llvm_ty);
}, },
.string => |s| self.emitConstStringGlobal(s), .string => |s| self.emitConstStringGlobal(s),
.aggregate => |fields| self.serializeAggregateValue(fields, ty, interp, global_name), .aggregate => |fields| self.serializeAggregateValue(fields, ty, interp, global_name),
@@ -1046,7 +1071,7 @@ pub const LLVMEmitter = struct {
"error: comptime init of '{s}' produced a {s} value, which cannot be serialized as a static constant\n", "error: comptime init of '{s}' produced a {s} value, which cannot be serialized as a static constant\n",
.{ global_name, @tagName(val) }, .{ global_name, @tagName(val) },
); );
break :blk c.LLVMGetUndef(llvm_ty); break :blk self.failGlobalInit(llvm_ty);
}, },
}; };
} }
@@ -1083,7 +1108,7 @@ pub const LLVMEmitter = struct {
"error: comptime init of '{s}' produced a fat-pointer aggregate whose len field is not an integer\n", "error: comptime init of '{s}' produced a fat-pointer aggregate whose len field is not an integer\n",
.{global_name}, .{global_name},
); );
return c.LLVMGetUndef(llvm_ty); return self.failGlobalInit(llvm_ty);
}; };
const len: usize = @intCast(len_i); const len: usize = @intCast(len_i);
@@ -1107,7 +1132,7 @@ pub const LLVMEmitter = struct {
"error: comptime init of '{s}' produced a fat-pointer aggregate whose data field ({s}) cannot be resolved to {} bytes — needs Phase 1.4a heap-walk for this shape\n", "error: comptime init of '{s}' produced a fat-pointer aggregate whose data field ({s}) cannot be resolved to {} bytes — needs Phase 1.4a heap-walk for this shape\n",
.{ global_name, @tagName(data), len }, .{ global_name, @tagName(data), len },
); );
return c.LLVMGetUndef(llvm_ty); return self.failGlobalInit(llvm_ty);
}; };
return self.emitConstStringGlobal(bytes); return self.emitConstStringGlobal(bytes);
@@ -1123,7 +1148,7 @@ pub const LLVMEmitter = struct {
"error: comptime init of '{s}' produced aggregate with {} fields but struct '{s}' expects {}\n", "error: comptime init of '{s}' produced aggregate with {} fields but struct '{s}' expects {}\n",
.{ global_name, fields.len, self.ir_mod.types.getString(info.@"struct".name), ir_fields.len }, .{ global_name, fields.len, self.ir_mod.types.getString(info.@"struct".name), ir_fields.len },
); );
return c.LLVMGetUndef(llvm_ty); return self.failGlobalInit(llvm_ty);
} }
var field_vals = std.ArrayList(c.LLVMValueRef).empty; var field_vals = std.ArrayList(c.LLVMValueRef).empty;
defer field_vals.deinit(self.alloc); defer field_vals.deinit(self.alloc);
@@ -1148,7 +1173,7 @@ pub const LLVMEmitter = struct {
"error: comptime init of '{s}' produced an aggregate but the destination type ({s}) is neither struct, array, string, nor slice\n", "error: comptime init of '{s}' produced an aggregate but the destination type ({s}) is neither struct, array, string, nor slice\n",
.{ global_name, self.ir_mod.types.typeName(ty) }, .{ global_name, self.ir_mod.types.typeName(ty) },
); );
return c.LLVMGetUndef(llvm_ty); return self.failGlobalInit(llvm_ty);
} }
// ── Function declaration ──────────────────────────────────────── // ── Function declaration ────────────────────────────────────────
@@ -2468,10 +2493,15 @@ pub const LLVMEmitter = struct {
.string => |sid| self.emitConstStringGlobal(self.ir_mod.types.getString(sid)), .string => |sid| self.emitConstStringGlobal(self.ir_mod.types.getString(sid)),
.aggregate => |inner| self.emitConstAggregate(inner, elem_ty, require_resolved), .aggregate => |inner| self.emitConstAggregate(inner, elem_ty, require_resolved),
.func_ref => |fid| self.func_map.get(fid.index()) orelse blk: { .func_ref => |fid| self.func_map.get(fid.index()) orelse blk: {
if (require_resolved) std.debug.print( if (require_resolved) {
"error: static initializer references function #{d} which has no declaration\n", std.debug.print(
.{fid.index()}, "error: static initializer references function '{s}' which has no declaration\n",
); .{self.ir_mod.types.getString(self.ir_mod.getFunction(fid).name)},
);
break :blk self.failGlobalInit(elem_ty);
}
// Pass 0 placeholder: func_map is empty until Pass 1, so the
// whole aggregate is re-emitted with require_resolved=true.
break :blk c.LLVMConstNull(elem_ty); break :blk c.LLVMConstNull(elem_ty);
}, },
// A null pointer field and a zero-initialized field both emit as // A null pointer field and a zero-initialized field both emit as