feat: transitive quoted includes participate in the c-object cache key

The key previously covered the #source bytes + the block's DECLARED
headers, so a unit whose impl is a thin wrapper over an undeclared
header (vendors/kb_text_shape: two-line impl.c, all code in
kb/kb_text_shape.h) would serve STALE cached objects after an
upstream upgrade. collectIncludeDepBytes now walks the transitive
closure of quoted #include lines (includer-dir first, then -I dirs;
angle/system includes never participate; unresolvable names skip) and
the dep contents fold into the key — no sidecar, no compare logic, a
changed header is just a different key. Verified live: appending to
kb_text_shape.h mints a new cache entry; reverting hits the old one.
This commit is contained in:
agra
2026-06-12 18:48:56 +03:00
parent b06776d6e9
commit 4b9324e585
2 changed files with 126 additions and 23 deletions

View File

@@ -3,12 +3,13 @@ const c_import = @import("c_import.zig");
const SRC = "int f(void) { return 1; }";
const HDR = "int f(void);";
const DEP = "#define INNER 1";
const VER = "19.1.7";
const none: []const []const u8 = &.{};
fn baseKey() u64 {
return c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
return c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
}
test "cSourceCacheKey: stable when nothing changes" {
@@ -16,42 +17,52 @@ test "cSourceCacheKey: stable when nothing changes" {
}
test "cSourceCacheKey: source bytes vary the key" {
const other = c_import.cSourceCacheKey("int f(void) { return 2; }", &.{HDR}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const other = c_import.cSourceCacheKey("int f(void) { return 2; }", &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other);
}
test "cSourceCacheKey: declared header content varies the key" {
const other = c_import.cSourceCacheKey(SRC, &.{"int f(void); int g(void);"}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const other = c_import.cSourceCacheKey(SRC, &.{"int f(void); int g(void);"}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other);
}
test "cSourceCacheKey: transitive dep content varies the key" {
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"#define INNER 2"}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other);
// a header is not a dep (same string, different role)
const as_header = c_import.cSourceCacheKey(SRC, &.{"X"}, none, none, none, none, VER, null, null);
const as_dep = c_import.cSourceCacheKey(SRC, none, &.{"X"}, none, none, none, VER, null, null);
try std.testing.expect(as_header != as_dep);
}
test "cSourceCacheKey: defines vary the key (value and order)" {
const v2 = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=2"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const v2 = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=2"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != v2);
const ab = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{ "A=1", "B=1" }, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const ba = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{ "B=1", "A=1" }, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const ab = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{ "A=1", "B=1" }, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const ba = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{ "B=1", "A=1" }, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(ab != ba);
}
test "cSourceCacheKey: flags vary the key" {
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=1"}, &.{"-O3"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O3"}, &.{"inc"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other);
}
test "cSourceCacheKey: a define is not a flag (same string, different role)" {
const as_define = c_import.cSourceCacheKey(SRC, none, &.{"X"}, none, none, VER, null, null);
const as_flag = c_import.cSourceCacheKey(SRC, none, none, &.{"X"}, none, VER, null, null);
const as_define = c_import.cSourceCacheKey(SRC, none, none, &.{"X"}, none, none, VER, null, null);
const as_flag = c_import.cSourceCacheKey(SRC, none, none, none, &.{"X"}, none, VER, null, null);
try std.testing.expect(as_define != as_flag);
}
test "cSourceCacheKey: include dirs vary the key" {
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=1"}, &.{"-O2"}, &.{"other"}, VER, "arm64-apple-darwin", "/sdk");
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"other"}, VER, "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other);
}
test "cSourceCacheKey: llvm version varies the key" {
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, "20.0.0", "arm64-apple-darwin", "/sdk");
const other = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, "20.0.0", "arm64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other);
}
@@ -65,13 +76,32 @@ test "objectMagicOk: accepts Mach-O and ELF, rejects garbage and truncation" {
}
test "cSourceCacheKey: triple and sysroot vary the key; absent is not empty" {
const other_triple = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "x86_64-apple-darwin", "/sdk");
const other_triple = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "x86_64-apple-darwin", "/sdk");
try std.testing.expect(baseKey() != other_triple);
const other_sysroot = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/ndk");
const other_sysroot = c_import.cSourceCacheKey(SRC, &.{HDR}, &.{DEP}, &.{"A=1"}, &.{"-O2"}, &.{"inc"}, VER, "arm64-apple-darwin", "/ndk");
try std.testing.expect(baseKey() != other_sysroot);
const absent = c_import.cSourceCacheKey(SRC, none, none, none, none, VER, null, null);
const empty = c_import.cSourceCacheKey(SRC, none, none, none, none, VER, "", "");
const absent = c_import.cSourceCacheKey(SRC, none, none, none, none, none, VER, null, null);
const empty = c_import.cSourceCacheKey(SRC, none, none, none, none, none, VER, "", "");
try std.testing.expect(absent != empty);
}
test "scanQuotedIncludes: quoted forms collected in order, angle and noise skipped" {
const src =
\\#include "a.h"
\\ # include "sub/b.h"
\\#include <system.h>
\\#includex "not_an_include.h"
\\int f(void);
\\#include ""
\\#include "c.h"
;
var out = std.ArrayList([]const u8).empty;
try c_import.scanQuotedIncludes(std.testing.allocator, src, &out);
defer out.deinit(std.testing.allocator);
try std.testing.expectEqual(@as(usize, 3), out.items.len);
try std.testing.expectEqualStrings("a.h", out.items[0]);
try std.testing.expectEqualStrings("sub/b.h", out.items[1]);
try std.testing.expectEqualStrings("c.h", out.items[2]);
}

View File

@@ -35,17 +35,17 @@ pub const CImportInfo = struct {
/// Cache key for one compiled `#source` member. Everything that can
/// change the produced object participates: the source bytes, the
/// unit's declared `#include` headers BY CONTENT (editing a declared
/// header invalidates), defines / flags / include dirs in declaration
/// order, the LLVM version, and the cross-target (triple + sysroot).
/// Section tags keep equal strings in different roles distinct (a
/// define never aliases a flag, an absent triple never aliases an
/// empty one). Transitive includes of the .c itself do NOT
/// participate — the block's declared surface is the invalidation
/// boundary.
/// unit's declared `#include` headers BY CONTENT, the source's
/// TRANSITIVE quoted includes BY CONTENT (`dep_bytes` — editing any
/// header the compile actually reads invalidates), defines / flags /
/// include dirs in declaration order, the toolchain version, and the
/// cross-target (triple + sysroot). Section tags keep equal strings
/// in different roles distinct (a define never aliases a flag, an
/// absent triple never aliases an empty one).
pub fn cSourceCacheKey(
source_bytes: []const u8,
header_bytes: []const []const u8,
dep_bytes: []const []const u8,
defines: []const []const u8,
flags: []const []const u8,
include_dirs: []const []const u8,
@@ -58,6 +58,8 @@ pub fn cSourceCacheKey(
key = Wyhash.hash(key, source_bytes);
key = Wyhash.hash(key, "\x01headers");
for (header_bytes) |hb| key = Wyhash.hash(key, hb);
key = Wyhash.hash(key, "\x01deps");
for (dep_bytes) |db| key = Wyhash.hash(key, db);
key = Wyhash.hash(key, "\x01defines");
for (defines) |d| key = Wyhash.hash(key, d);
key = Wyhash.hash(key, "\x01flags");
@@ -77,6 +79,75 @@ pub fn cSourceCacheKey(
return key;
}
/// Quoted `#include "x"` targets in `source`, appended to `out` in
/// order of appearance. Angle includes (<...>) are system headers and
/// never participate in invalidation. Over-collection (an include
/// inside an inactive `#if` branch) is harmless: an extra existing
/// file gets hashed, a missing one is skipped at resolution.
pub fn scanQuotedIncludes(allocator: std.mem.Allocator, source: []const u8, out: *std.ArrayList([]const u8)) !void {
var it = std.mem.splitScalar(u8, source, '\n');
while (it.next()) |line| {
var s = std.mem.trimStart(u8, line, " \t");
if (s.len == 0 or s[0] != '#') continue;
s = std.mem.trimStart(u8, s[1..], " \t");
if (!std.mem.startsWith(u8, s, "include")) continue;
s = std.mem.trimStart(u8, s["include".len..], " \t");
if (s.len < 2 or s[0] != '"') continue;
const rest = s[1..];
const end = std.mem.indexOfScalar(u8, rest, '"') orelse continue;
if (end == 0) continue;
try out.append(allocator, rest[0..end]);
}
}
/// The transitive closure of quoted includes reachable from
/// `root_path`/`root_bytes`, each include resolved against its
/// includer's directory first and the unit's include dirs second;
/// unresolvable names (system or conditionally-absent includes) are
/// skipped. Returns the file CONTENTS of every dependency for
/// cache-key participation — editing any header the compile actually
/// reads must change the key.
fn collectIncludeDepBytes(
allocator: std.mem.Allocator,
io: std.Io,
root_path: []const u8,
root_bytes: []const u8,
include_dirs: []const []const u8,
) ![]const []const u8 {
const Pending = struct { path: []const u8, bytes: []const u8 };
var dep_bytes = std.ArrayList([]const u8).empty;
var visited = std.StringHashMap(void).init(allocator);
defer visited.deinit();
var queue = std.ArrayList(Pending).empty;
try queue.append(allocator, .{ .path = root_path, .bytes = root_bytes });
var idx: usize = 0;
while (idx < queue.items.len) : (idx += 1) {
const item = queue.items[idx];
var incs = std.ArrayList([]const u8).empty;
try scanQuotedIncludes(allocator, item.bytes, &incs);
const base = dirName(item.path);
for (incs.items) |inc| {
var candidates = std.ArrayList([]const u8).empty;
try candidates.append(allocator, try std.fs.path.join(allocator, &.{ base, inc }));
for (include_dirs) |dir| {
try candidates.append(allocator, try std.fs.path.join(allocator, &.{ dir, inc }));
}
for (candidates.items) |cand| {
const norm = std.fs.path.resolve(allocator, &.{cand}) catch cand;
if (visited.contains(norm)) break;
const bytes = std.Io.Dir.readFileAlloc(.cwd(), io, cand, allocator, .limited(64 * 1024 * 1024)) catch continue;
try visited.put(norm, {});
try dep_bytes.append(allocator, bytes);
try queue.append(allocator, .{ .path = cand, .bytes = bytes });
break;
}
}
}
return try dep_bytes.toOwnedSlice(allocator);
}
/// Handle returned from loadCObjectsForJIT — caller must call unload() after JIT.
pub const CImportHandle = struct {
dylib_handle: ?*anyopaque = null,
@@ -416,9 +487,11 @@ pub fn compileCToObjects(
var cache_path: ?[:0]const u8 = null;
if (cache_ok) {
if (std.Io.Dir.readFileAlloc(.cwd(), io, src, allocator, .limited(64 * 1024 * 1024))) |src_bytes| {
const dep_bytes = collectIncludeDepBytes(allocator, io, src, src_bytes, inc_dirs.items) catch &.{};
const key = cSourceCacheKey(
src_bytes,
header_bytes.items,
dep_bytes,
info.defines,
info.flags,
inc_dirs.items,