From c32d694d57efecd154fbc2fc944be49f1a5cd163 Mon Sep 17 00:00:00 2001 From: agra Date: Mon, 1 Jun 2026 13:14:00 +0300 Subject: [PATCH] ERR/E3.0 (slice 2): emit DWARF line-info Attach LLVM debug metadata so a captured return-address PC resolves to file:line:col (the runtime half E3.3 needs) and sx binaries become debuggable in lldb/gdb. - llvm_api.zig: bind llvm-c/DebugInfo.h (DIBuilder C API was unbound). - emit_llvm.zig: DIBuilder + one DICompileUnit/DIFile on the main file, a DISubprogram per function (LLVMSetSubprogram), and a DILocation per instruction from Inst.span (errors.SourceLoc.compute, scoped to the subprogram). Plus the "Debug Info Version"/"Dwarf Version" module flags and LLVMDIBuilderFinalize. - Gated on opt none/less + a wired source map (setDebugContext from core.zig), mirroring lower.zig's tracesEnabled; release strips it. Verified: sx ir/sx asm --opt none show correct DILocations + .loc directives; the 290-example JIT suite (-O0 -> debug on) verifies and runs unchanged. +2 DWARF unit tests. --- src/core.zig | 1 + src/ir/emit_llvm.test.zig | 66 ++++++++++++ src/ir/emit_llvm.zig | 205 ++++++++++++++++++++++++++++++++++++++ src/llvm_api.zig | 1 + 4 files changed, 273 insertions(+) diff --git a/src/core.zig b/src/core.zig index 69f4525..83d4641 100644 --- a/src/core.zig +++ b/src/core.zig @@ -146,6 +146,7 @@ pub const Compilation = struct { const ir_mod_ptr = try self.allocator.create(ir.Module); ir_mod_ptr.* = try self.lowerToIR(); var emitter = ir.LLVMEmitter.init(self.allocator, ir_mod_ptr, "sx_module", self.target_config); + emitter.setDebugContext(&self.import_sources, self.file_path); emitter.emit(); // Keep the IR module alive past LLVM emission so post-link // callbacks can re-enter the interpreter via `invokeByName`. diff --git a/src/ir/emit_llvm.test.zig b/src/ir/emit_llvm.test.zig index b59234e..0384442 100644 --- a/src/ir/emit_llvm.test.zig +++ b/src/ir/emit_llvm.test.zig @@ -951,3 +951,69 @@ test "emit: box_any and unbox_any" { try std.testing.expect(std.mem.indexOf(u8, ir_str, "insertvalue") != null); try std.testing.expect(std.mem.indexOf(u8, ir_str, "extractvalue") != null); } + +test "emit: ERR E3.0 — DWARF debug info (compile unit + subprogram + per-inst location)" { + const alloc = std.testing.allocator; + var module = Module.init(alloc); + defer module.deinit(); + + var b = Builder.init(&module); + + // func main() -> s64 { return 42; } — with the `return` instruction + // carrying a span that lands on line 3 of the source map below. + _ = b.beginFunction(str(&module, "main"), &.{}, .s64); + const entry = b.appendBlock(str(&module, "entry"), &.{}); + b.switchToBlock(entry); + // "a\nb\nXYZ" — byte offset 4 ('X') is line 3, col 1. + b.current_span = .{ .start = 4, .end = 5 }; + const c42 = b.constInt(42, .s64); + b.ret(c42, .s64); + b.finalize(); + + // Source map keyed on the main file. setDebugContext + opt none + // turns DWARF emission on (release opt levels skip it entirely). + var sources = std.StringHashMap([:0]const u8).init(alloc); + defer sources.deinit(); + try sources.put("probe.sx", "a\nb\nXYZ"); + + var emitter = LLVMEmitter.init(alloc, &module, "test_dwarf", .{ .opt_level = .none }); + defer emitter.deinit(); + emitter.setDebugContext(&sources, "probe.sx"); + emitter.emit(); + + try std.testing.expect(emitter.verify()); + + const ir_str = emitter.dumpToString(); + // Module flags, compile unit on the main file, a subprogram for main, + // and the return instruction's location resolved to line 3. + try std.testing.expect(std.mem.indexOf(u8, ir_str, "\"Debug Info Version\"") != null); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "\"Dwarf Version\"") != null); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "DICompileUnit") != null); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "DIFile(filename: \"probe.sx\"") != null); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "DISubprogram(name: \"main\"") != null); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "DILocation(line: 3") != null); +} + +test "emit: ERR E3.0 — no DWARF without a debug context (unit-test default)" { + const alloc = std.testing.allocator; + var module = Module.init(alloc); + defer module.deinit(); + + var b = Builder.init(&module); + _ = b.beginFunction(str(&module, "main"), &.{}, .s64); + const entry = b.appendBlock(str(&module, "entry"), &.{}); + b.switchToBlock(entry); + b.ret(b.constInt(42, .s64), .s64); + b.finalize(); + + // No setDebugContext call → no source map → debug info off even at + // opt none. Confirms the gate keeps the metadata out by default. + var emitter = LLVMEmitter.init(alloc, &module, "test_no_dwarf", .{ .opt_level = .none }); + defer emitter.deinit(); + emitter.emit(); + + try std.testing.expect(emitter.verify()); + const ir_str = emitter.dumpToString(); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "DICompileUnit") == null); + try std.testing.expect(std.mem.indexOf(u8, ir_str, "!dbg") == null); +} diff --git a/src/ir/emit_llvm.zig b/src/ir/emit_llvm.zig index 652b223..e7d1c35 100644 --- a/src/ir/emit_llvm.zig +++ b/src/ir/emit_llvm.zig @@ -9,8 +9,10 @@ const TypeId = ir_types.TypeId; const TypeInfo = ir_types.TypeInfo; const TypeTable = ir_types.TypeTable; const StringId = ir_types.StringId; +const errors = @import("../errors.zig"); const ir_inst = @import("inst.zig"); const Ref = ir_inst.Ref; +const Span = ir_inst.Span; const BlockId = ir_inst.BlockId; const FuncId = ir_inst.FuncId; const GlobalId = ir_inst.GlobalId; @@ -158,6 +160,31 @@ pub const LLVMEmitter = struct { // Build configuration accumulated from #run blocks build_config: interp_mod.BuildConfig, + // ── DWARF debug info (ERR E3.0) ────────────────────────────────── + // Emitted only when the build keeps error traces (opt_level + // none/less, matching lower.zig's `tracesEnabled`) AND a source map + // is wired in via `setDebugContext`. One `DICompileUnit` (on the + // main file) + a `DIFile` per source file + a `DISubprogram` per + // emitted function + a `DILocation` per instruction (resolved from + // `Inst.span`). Lets a captured return-address PC resolve to + // file:line:col for E3.3's runtime trace formatting, and makes sx + // binaries debuggable in lldb/gdb as a bonus. + di_builder: c.LLVMDIBuilderRef = null, + di_cu: c.LLVMMetadataRef = null, + di_files: std.StringHashMap(c.LLVMMetadataRef), + // The current function's DISubprogram — the scope for its + // DILocations. Null between functions (and in functions we don't + // describe, e.g. the synthetic Obj-C init constructors). + di_scope: c.LLVMMetadataRef = null, + // Source file of the function currently being emitted (span → line). + current_func_file: []const u8 = "", + // File path → source text (the diagnostics' `import_sources` map). + // Null in unit tests, so no debug info is emitted there. + import_sources: ?*const std.StringHashMap([:0]const u8) = null, + // Main file path — the compile unit's file and the span-resolution + // fallback for functions with no recorded source file. + main_file: []const u8 = "", + const PendingPhi = struct { phi: c.LLVMValueRef, block_id: BlockId, // the block this phi belongs to @@ -238,6 +265,7 @@ pub const LLVMEmitter = struct { .field_name_arrays = std.AutoHashMap(u32, c.LLVMValueRef).init(alloc), .target_config = target_config, .build_config = .{}, + .di_files = std.StringHashMap(c.LLVMMetadataRef).init(alloc), }; } @@ -251,6 +279,8 @@ pub const LLVMEmitter = struct { self.jni_slots.deinit(); self.global_map.deinit(); self.block_map.deinit(); + self.di_files.deinit(); + if (self.di_builder != null) c.LLVMDisposeDIBuilder(self.di_builder); if (self.target_machine) |tm| c.LLVMDisposeTargetMachine(tm); c.LLVMDisposeBuilder(self.builder); c.LLVMDisposeModule(self.llvm_module); @@ -260,6 +290,10 @@ pub const LLVMEmitter = struct { // ── Top-level emit ────────────────────────────────────────────── pub fn emit(self: *LLVMEmitter) void { + // Pass -1: Set up DWARF debug info (compile unit + module flags). + // Must precede any DISubprogram (created per function below). + self.initDebugInfo(); + // Pass 0: Declare and initialize globals self.emitGlobals(); @@ -304,6 +338,167 @@ pub const LLVMEmitter = struct { // Pass 3: Verify typeSizeBytes matches LLVM's ABI sizes self.verifySizes(); + + // Pass 4: Resolve DWARF temporary metadata. Must come after all + // DISubprograms / DILocations are created and before the module + // is verified or emitted. + self.finalizeDebugInfo(); + } + + // ── DWARF debug info (ERR E3.0) ────────────────────────────────── + + /// Wire the source map + main file so spans can resolve to + /// file:line:col. Called by the driver after `init`; absent in unit + /// tests, which keeps debug-info emission off there. + pub fn setDebugContext(self: *LLVMEmitter, import_sources: *const std.StringHashMap([:0]const u8), main_file: []const u8) void { + self.import_sources = import_sources; + self.main_file = main_file; + } + + /// Debug info is emitted only when error traces are kept (opt_level + /// none/less, matching `tracesEnabled` in lower.zig) and a source + /// map is available. Release builds (default/aggressive) skip it, so + /// the DWARF is strippable cost-free. + fn debugEnabled(self: *const LLVMEmitter) bool { + if (self.import_sources == null) return false; + return self.target_config.opt_level == .none or self.target_config.opt_level == .less; + } + + /// Source text for `file` via the diagnostics' file→source map (the + /// same map `#caller_location` uses). Empty when unavailable — + /// line:col then degrades to 1:1 rather than crash. + fn sourceForFile(self: *LLVMEmitter, file: []const u8) []const u8 { + const is = self.import_sources orelse return ""; + if (is.get(file)) |s| return s; + if (self.main_file.len > 0) { + if (is.get(self.main_file)) |s| return s; + } + return ""; + } + + /// The `DIFile` for `path`, created once and cached. Splits the path + /// into basename + directory as DWARF expects. + fn diFileFor(self: *LLVMEmitter, path: []const u8) c.LLVMMetadataRef { + if (self.di_files.get(path)) |f| return f; + const slash = std.mem.lastIndexOfScalar(u8, path, '/'); + const dir = if (slash) |s| path[0..s] else ""; + const base = if (slash) |s| path[s + 1 ..] else path; + const f = c.LLVMDIBuilderCreateFile(self.di_builder, base.ptr, base.len, dir.ptr, dir.len); + self.di_files.put(path, f) catch {}; + return f; + } + + /// Create the DIBuilder, the module flags ("Debug Info Version" / + /// "Dwarf Version"), and the single compile unit on the main file. + fn initDebugInfo(self: *LLVMEmitter) void { + if (!self.debugEnabled()) return; + self.di_builder = c.LLVMCreateDIBuilder(self.llvm_module); + + c.LLVMAddModuleFlag( + self.llvm_module, + c.LLVMModuleFlagBehaviorWarning, + "Debug Info Version", + "Debug Info Version".len, + c.LLVMValueAsMetadata(c.LLVMConstInt(self.cached_i32, c.LLVMDebugMetadataVersion(), 0)), + ); + c.LLVMAddModuleFlag( + self.llvm_module, + c.LLVMModuleFlagBehaviorWarning, + "Dwarf Version", + "Dwarf Version".len, + c.LLVMValueAsMetadata(c.LLVMConstInt(self.cached_i32, 4, 0)), + ); + + const cu_file = self.diFileFor(if (self.main_file.len > 0) self.main_file else "sx"); + self.di_cu = c.LLVMDIBuilderCreateCompileUnit( + self.di_builder, + c.LLVMDWARFSourceLanguageC, + cu_file, + "sx", + "sx".len, + 0, // isOptimized + "", + 0, // flags + 0, // runtime version + "", + 0, // split name + c.LLVMDWARFEmissionFull, + 0, // DWOId + 0, // split debug inlining + 0, // debug info for profiling + "", + 0, // sysroot + "", + 0, // sdk + ); + } + + /// Create a `DISubprogram` for `func` and attach it to `llvm_func`, + /// making it the scope (`di_scope`) for the function's instruction + /// locations. Clears any stale builder location first so synthetic + /// functions emitted between sx functions carry none. + fn beginFunctionDebug(self: *LLVMEmitter, func: *const Function, llvm_func: c.LLVMValueRef, name: []const u8) void { + self.di_scope = null; + c.LLVMSetCurrentDebugLocation2(self.builder, null); + if (self.di_builder == null) return; + + const file = func.source_file orelse self.main_file; + self.current_func_file = file; + const di_file = self.diFileFor(file); + const subroutine_ty = c.LLVMDIBuilderCreateSubroutineType(self.di_builder, di_file, null, 0, c.LLVMDIFlagZero); + + // Line = the first instruction's line (the function body's start), + // else 1 when the body is empty / span-less. + var line: c_uint = 1; + if (func.blocks.items.len > 0 and func.blocks.items[0].insts.items.len > 0) { + const sp = func.blocks.items[0].insts.items[0].span; + const src = self.sourceForFile(file); + line = errors.SourceLoc.compute(src, sp.start).line; + } + + const is_local: c.LLVMBool = if (func.linkage == .external) 0 else 1; + const subprogram = c.LLVMDIBuilderCreateFunction( + self.di_builder, + di_file, // scope + name.ptr, + name.len, + name.ptr, + name.len, // linkage name + di_file, + line, + subroutine_ty, + is_local, + 1, // is definition + line, // scope line + c.LLVMDIFlagZero, + 0, // isOptimized + ); + c.LLVMSetSubprogram(llvm_func, subprogram); + self.di_scope = subprogram; + } + + /// End the current function's debug scope and clear the builder's + /// location, so the next (possibly synthetic) function doesn't + /// inherit a DILocation pointing into this function's subprogram. + fn endFunctionDebug(self: *LLVMEmitter) void { + self.di_scope = null; + c.LLVMSetCurrentDebugLocation2(self.builder, null); + } + + /// Set the builder's current debug location from an instruction span, + /// scoped to the current function's subprogram. No-op when debug info + /// is off (`di_scope == null`). + fn setInstDebugLocation(self: *LLVMEmitter, span: Span) void { + const scope = self.di_scope orelse return; + const src = self.sourceForFile(self.current_func_file); + const loc = errors.SourceLoc.compute(src, span.start); + const di_loc = c.LLVMDIBuilderCreateDebugLocation(self.context, loc.line, loc.col, scope, null); + c.LLVMSetCurrentDebugLocation2(self.builder, di_loc); + } + + fn finalizeDebugInfo(self: *LLVMEmitter) void { + if (self.di_builder == null) return; + c.LLVMDIBuilderFinalize(self.di_builder); } /// Synthesize a module constructor that populates each interned @@ -1528,6 +1723,10 @@ pub const LLVMEmitter = struct { self.current_func_is_main = std.mem.eql(u8, name, "main"); self.current_func_idx = func_idx; + // DWARF: describe this function and make it the scope for the + // per-instruction locations set in emitInst (no-op if off). + self.beginFunctionDebug(func, llvm_func, name); + // Clear ref_map and pre-map parameter refs self.ref_map.clearRetainingCapacity(); self.ref_counter = 0; @@ -1586,6 +1785,9 @@ pub const LLVMEmitter = struct { // Fixup PHI nodes: scan all blocks for branches that pass args self.fixupPhiNodes(func, func_idx); + + // DWARF: leave no stale location for the next function. + self.endFunctionDebug(); } /// After emitting all blocks, fill in PHI incoming values from branch args. @@ -1635,6 +1837,9 @@ pub const LLVMEmitter = struct { // ── Instruction emission ──────────────────────────────────────── fn emitInst(self: *LLVMEmitter, instruction: *const Inst, func_idx: u32) void { + // DWARF: stamp every LLVM instruction this op emits with the sx + // source location (no-op when debug info is off). + self.setInstDebugLocation(instruction.span); switch (instruction.op) { // ── Constants ─────────────────────────────────────────── .const_int => |val| { diff --git a/src/llvm_api.zig b/src/llvm_api.zig index 9e0e158..6bdf8bf 100644 --- a/src/llvm_api.zig +++ b/src/llvm_api.zig @@ -1,6 +1,7 @@ pub const c = @cImport({ @cInclude("llvm-c/Core.h"); @cInclude("llvm-c/Analysis.h"); + @cInclude("llvm-c/DebugInfo.h"); @cInclude("llvm-c/Target.h"); @cInclude("llvm-c/TargetMachine.h");