ERR/E3.0 (slice 2): emit DWARF line-info

Attach LLVM debug metadata so a captured return-address PC resolves to
file:line:col (the runtime half E3.3 needs) and sx binaries become
debuggable in lldb/gdb.

- llvm_api.zig: bind llvm-c/DebugInfo.h (DIBuilder C API was unbound).
- emit_llvm.zig: DIBuilder + one DICompileUnit/DIFile on the main file,
  a DISubprogram per function (LLVMSetSubprogram), and a DILocation per
  instruction from Inst.span (errors.SourceLoc.compute, scoped to the
  subprogram). Plus the "Debug Info Version"/"Dwarf Version" module
  flags and LLVMDIBuilderFinalize.
- Gated on opt none/less + a wired source map (setDebugContext from
  core.zig), mirroring lower.zig's tracesEnabled; release strips it.

Verified: sx ir/sx asm --opt none show correct DILocations + .loc
directives; the 290-example JIT suite (-O0 -> debug on) verifies and
runs unchanged. +2 DWARF unit tests.
This commit is contained in:
agra
2026-06-01 13:14:00 +03:00
parent b44a5d05ef
commit c32d694d57
4 changed files with 273 additions and 0 deletions

View File

@@ -146,6 +146,7 @@ pub const Compilation = struct {
const ir_mod_ptr = try self.allocator.create(ir.Module);
ir_mod_ptr.* = try self.lowerToIR();
var emitter = ir.LLVMEmitter.init(self.allocator, ir_mod_ptr, "sx_module", self.target_config);
emitter.setDebugContext(&self.import_sources, self.file_path);
emitter.emit();
// Keep the IR module alive past LLVM emission so post-link
// callbacks can re-enter the interpreter via `invokeByName`.

View File

@@ -951,3 +951,69 @@ test "emit: box_any and unbox_any" {
try std.testing.expect(std.mem.indexOf(u8, ir_str, "insertvalue") != null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "extractvalue") != null);
}
test "emit: ERR E3.0 — DWARF debug info (compile unit + subprogram + per-inst location)" {
const alloc = std.testing.allocator;
var module = Module.init(alloc);
defer module.deinit();
var b = Builder.init(&module);
// func main() -> s64 { return 42; } — with the `return` instruction
// carrying a span that lands on line 3 of the source map below.
_ = b.beginFunction(str(&module, "main"), &.{}, .s64);
const entry = b.appendBlock(str(&module, "entry"), &.{});
b.switchToBlock(entry);
// "a\nb\nXYZ" — byte offset 4 ('X') is line 3, col 1.
b.current_span = .{ .start = 4, .end = 5 };
const c42 = b.constInt(42, .s64);
b.ret(c42, .s64);
b.finalize();
// Source map keyed on the main file. setDebugContext + opt none
// turns DWARF emission on (release opt levels skip it entirely).
var sources = std.StringHashMap([:0]const u8).init(alloc);
defer sources.deinit();
try sources.put("probe.sx", "a\nb\nXYZ");
var emitter = LLVMEmitter.init(alloc, &module, "test_dwarf", .{ .opt_level = .none });
defer emitter.deinit();
emitter.setDebugContext(&sources, "probe.sx");
emitter.emit();
try std.testing.expect(emitter.verify());
const ir_str = emitter.dumpToString();
// Module flags, compile unit on the main file, a subprogram for main,
// and the return instruction's location resolved to line 3.
try std.testing.expect(std.mem.indexOf(u8, ir_str, "\"Debug Info Version\"") != null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "\"Dwarf Version\"") != null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "DICompileUnit") != null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "DIFile(filename: \"probe.sx\"") != null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "DISubprogram(name: \"main\"") != null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "DILocation(line: 3") != null);
}
test "emit: ERR E3.0 — no DWARF without a debug context (unit-test default)" {
const alloc = std.testing.allocator;
var module = Module.init(alloc);
defer module.deinit();
var b = Builder.init(&module);
_ = b.beginFunction(str(&module, "main"), &.{}, .s64);
const entry = b.appendBlock(str(&module, "entry"), &.{});
b.switchToBlock(entry);
b.ret(b.constInt(42, .s64), .s64);
b.finalize();
// No setDebugContext call → no source map → debug info off even at
// opt none. Confirms the gate keeps the metadata out by default.
var emitter = LLVMEmitter.init(alloc, &module, "test_no_dwarf", .{ .opt_level = .none });
defer emitter.deinit();
emitter.emit();
try std.testing.expect(emitter.verify());
const ir_str = emitter.dumpToString();
try std.testing.expect(std.mem.indexOf(u8, ir_str, "DICompileUnit") == null);
try std.testing.expect(std.mem.indexOf(u8, ir_str, "!dbg") == null);
}

View File

@@ -9,8 +9,10 @@ const TypeId = ir_types.TypeId;
const TypeInfo = ir_types.TypeInfo;
const TypeTable = ir_types.TypeTable;
const StringId = ir_types.StringId;
const errors = @import("../errors.zig");
const ir_inst = @import("inst.zig");
const Ref = ir_inst.Ref;
const Span = ir_inst.Span;
const BlockId = ir_inst.BlockId;
const FuncId = ir_inst.FuncId;
const GlobalId = ir_inst.GlobalId;
@@ -158,6 +160,31 @@ pub const LLVMEmitter = struct {
// Build configuration accumulated from #run blocks
build_config: interp_mod.BuildConfig,
// ── DWARF debug info (ERR E3.0) ──────────────────────────────────
// Emitted only when the build keeps error traces (opt_level
// none/less, matching lower.zig's `tracesEnabled`) AND a source map
// is wired in via `setDebugContext`. One `DICompileUnit` (on the
// main file) + a `DIFile` per source file + a `DISubprogram` per
// emitted function + a `DILocation` per instruction (resolved from
// `Inst.span`). Lets a captured return-address PC resolve to
// file:line:col for E3.3's runtime trace formatting, and makes sx
// binaries debuggable in lldb/gdb as a bonus.
di_builder: c.LLVMDIBuilderRef = null,
di_cu: c.LLVMMetadataRef = null,
di_files: std.StringHashMap(c.LLVMMetadataRef),
// The current function's DISubprogram — the scope for its
// DILocations. Null between functions (and in functions we don't
// describe, e.g. the synthetic Obj-C init constructors).
di_scope: c.LLVMMetadataRef = null,
// Source file of the function currently being emitted (span → line).
current_func_file: []const u8 = "",
// File path → source text (the diagnostics' `import_sources` map).
// Null in unit tests, so no debug info is emitted there.
import_sources: ?*const std.StringHashMap([:0]const u8) = null,
// Main file path — the compile unit's file and the span-resolution
// fallback for functions with no recorded source file.
main_file: []const u8 = "",
const PendingPhi = struct {
phi: c.LLVMValueRef,
block_id: BlockId, // the block this phi belongs to
@@ -238,6 +265,7 @@ pub const LLVMEmitter = struct {
.field_name_arrays = std.AutoHashMap(u32, c.LLVMValueRef).init(alloc),
.target_config = target_config,
.build_config = .{},
.di_files = std.StringHashMap(c.LLVMMetadataRef).init(alloc),
};
}
@@ -251,6 +279,8 @@ pub const LLVMEmitter = struct {
self.jni_slots.deinit();
self.global_map.deinit();
self.block_map.deinit();
self.di_files.deinit();
if (self.di_builder != null) c.LLVMDisposeDIBuilder(self.di_builder);
if (self.target_machine) |tm| c.LLVMDisposeTargetMachine(tm);
c.LLVMDisposeBuilder(self.builder);
c.LLVMDisposeModule(self.llvm_module);
@@ -260,6 +290,10 @@ pub const LLVMEmitter = struct {
// ── Top-level emit ──────────────────────────────────────────────
pub fn emit(self: *LLVMEmitter) void {
// Pass -1: Set up DWARF debug info (compile unit + module flags).
// Must precede any DISubprogram (created per function below).
self.initDebugInfo();
// Pass 0: Declare and initialize globals
self.emitGlobals();
@@ -304,6 +338,167 @@ pub const LLVMEmitter = struct {
// Pass 3: Verify typeSizeBytes matches LLVM's ABI sizes
self.verifySizes();
// Pass 4: Resolve DWARF temporary metadata. Must come after all
// DISubprograms / DILocations are created and before the module
// is verified or emitted.
self.finalizeDebugInfo();
}
// ── DWARF debug info (ERR E3.0) ──────────────────────────────────
/// Wire the source map + main file so spans can resolve to
/// file:line:col. Called by the driver after `init`; absent in unit
/// tests, which keeps debug-info emission off there.
pub fn setDebugContext(self: *LLVMEmitter, import_sources: *const std.StringHashMap([:0]const u8), main_file: []const u8) void {
self.import_sources = import_sources;
self.main_file = main_file;
}
/// Debug info is emitted only when error traces are kept (opt_level
/// none/less, matching `tracesEnabled` in lower.zig) and a source
/// map is available. Release builds (default/aggressive) skip it, so
/// the DWARF is strippable cost-free.
fn debugEnabled(self: *const LLVMEmitter) bool {
if (self.import_sources == null) return false;
return self.target_config.opt_level == .none or self.target_config.opt_level == .less;
}
/// Source text for `file` via the diagnostics' file→source map (the
/// same map `#caller_location` uses). Empty when unavailable —
/// line:col then degrades to 1:1 rather than crash.
fn sourceForFile(self: *LLVMEmitter, file: []const u8) []const u8 {
const is = self.import_sources orelse return "";
if (is.get(file)) |s| return s;
if (self.main_file.len > 0) {
if (is.get(self.main_file)) |s| return s;
}
return "";
}
/// The `DIFile` for `path`, created once and cached. Splits the path
/// into basename + directory as DWARF expects.
fn diFileFor(self: *LLVMEmitter, path: []const u8) c.LLVMMetadataRef {
if (self.di_files.get(path)) |f| return f;
const slash = std.mem.lastIndexOfScalar(u8, path, '/');
const dir = if (slash) |s| path[0..s] else "";
const base = if (slash) |s| path[s + 1 ..] else path;
const f = c.LLVMDIBuilderCreateFile(self.di_builder, base.ptr, base.len, dir.ptr, dir.len);
self.di_files.put(path, f) catch {};
return f;
}
/// Create the DIBuilder, the module flags ("Debug Info Version" /
/// "Dwarf Version"), and the single compile unit on the main file.
fn initDebugInfo(self: *LLVMEmitter) void {
if (!self.debugEnabled()) return;
self.di_builder = c.LLVMCreateDIBuilder(self.llvm_module);
c.LLVMAddModuleFlag(
self.llvm_module,
c.LLVMModuleFlagBehaviorWarning,
"Debug Info Version",
"Debug Info Version".len,
c.LLVMValueAsMetadata(c.LLVMConstInt(self.cached_i32, c.LLVMDebugMetadataVersion(), 0)),
);
c.LLVMAddModuleFlag(
self.llvm_module,
c.LLVMModuleFlagBehaviorWarning,
"Dwarf Version",
"Dwarf Version".len,
c.LLVMValueAsMetadata(c.LLVMConstInt(self.cached_i32, 4, 0)),
);
const cu_file = self.diFileFor(if (self.main_file.len > 0) self.main_file else "sx");
self.di_cu = c.LLVMDIBuilderCreateCompileUnit(
self.di_builder,
c.LLVMDWARFSourceLanguageC,
cu_file,
"sx",
"sx".len,
0, // isOptimized
"",
0, // flags
0, // runtime version
"",
0, // split name
c.LLVMDWARFEmissionFull,
0, // DWOId
0, // split debug inlining
0, // debug info for profiling
"",
0, // sysroot
"",
0, // sdk
);
}
/// Create a `DISubprogram` for `func` and attach it to `llvm_func`,
/// making it the scope (`di_scope`) for the function's instruction
/// locations. Clears any stale builder location first so synthetic
/// functions emitted between sx functions carry none.
fn beginFunctionDebug(self: *LLVMEmitter, func: *const Function, llvm_func: c.LLVMValueRef, name: []const u8) void {
self.di_scope = null;
c.LLVMSetCurrentDebugLocation2(self.builder, null);
if (self.di_builder == null) return;
const file = func.source_file orelse self.main_file;
self.current_func_file = file;
const di_file = self.diFileFor(file);
const subroutine_ty = c.LLVMDIBuilderCreateSubroutineType(self.di_builder, di_file, null, 0, c.LLVMDIFlagZero);
// Line = the first instruction's line (the function body's start),
// else 1 when the body is empty / span-less.
var line: c_uint = 1;
if (func.blocks.items.len > 0 and func.blocks.items[0].insts.items.len > 0) {
const sp = func.blocks.items[0].insts.items[0].span;
const src = self.sourceForFile(file);
line = errors.SourceLoc.compute(src, sp.start).line;
}
const is_local: c.LLVMBool = if (func.linkage == .external) 0 else 1;
const subprogram = c.LLVMDIBuilderCreateFunction(
self.di_builder,
di_file, // scope
name.ptr,
name.len,
name.ptr,
name.len, // linkage name
di_file,
line,
subroutine_ty,
is_local,
1, // is definition
line, // scope line
c.LLVMDIFlagZero,
0, // isOptimized
);
c.LLVMSetSubprogram(llvm_func, subprogram);
self.di_scope = subprogram;
}
/// End the current function's debug scope and clear the builder's
/// location, so the next (possibly synthetic) function doesn't
/// inherit a DILocation pointing into this function's subprogram.
fn endFunctionDebug(self: *LLVMEmitter) void {
self.di_scope = null;
c.LLVMSetCurrentDebugLocation2(self.builder, null);
}
/// Set the builder's current debug location from an instruction span,
/// scoped to the current function's subprogram. No-op when debug info
/// is off (`di_scope == null`).
fn setInstDebugLocation(self: *LLVMEmitter, span: Span) void {
const scope = self.di_scope orelse return;
const src = self.sourceForFile(self.current_func_file);
const loc = errors.SourceLoc.compute(src, span.start);
const di_loc = c.LLVMDIBuilderCreateDebugLocation(self.context, loc.line, loc.col, scope, null);
c.LLVMSetCurrentDebugLocation2(self.builder, di_loc);
}
fn finalizeDebugInfo(self: *LLVMEmitter) void {
if (self.di_builder == null) return;
c.LLVMDIBuilderFinalize(self.di_builder);
}
/// Synthesize a module constructor that populates each interned
@@ -1528,6 +1723,10 @@ pub const LLVMEmitter = struct {
self.current_func_is_main = std.mem.eql(u8, name, "main");
self.current_func_idx = func_idx;
// DWARF: describe this function and make it the scope for the
// per-instruction locations set in emitInst (no-op if off).
self.beginFunctionDebug(func, llvm_func, name);
// Clear ref_map and pre-map parameter refs
self.ref_map.clearRetainingCapacity();
self.ref_counter = 0;
@@ -1586,6 +1785,9 @@ pub const LLVMEmitter = struct {
// Fixup PHI nodes: scan all blocks for branches that pass args
self.fixupPhiNodes(func, func_idx);
// DWARF: leave no stale location for the next function.
self.endFunctionDebug();
}
/// After emitting all blocks, fill in PHI incoming values from branch args.
@@ -1635,6 +1837,9 @@ pub const LLVMEmitter = struct {
// ── Instruction emission ────────────────────────────────────────
fn emitInst(self: *LLVMEmitter, instruction: *const Inst, func_idx: u32) void {
// DWARF: stamp every LLVM instruction this op emits with the sx
// source location (no-op when debug info is off).
self.setInstDebugLocation(instruction.span);
switch (instruction.op) {
// ── Constants ───────────────────────────────────────────
.const_int => |val| {

View File

@@ -1,6 +1,7 @@
pub const c = @cImport({
@cInclude("llvm-c/Core.h");
@cInclude("llvm-c/Analysis.h");
@cInclude("llvm-c/DebugInfo.h");
@cInclude("llvm-c/Target.h");
@cInclude("llvm-c/TargetMachine.h");