P2.2: fix put_file content-addressing — hash the published bytes (single source read)
put_file hashed the source path, then copied the source again — two reads. A source mutated in between would publish bytes whose digest != returned key, breaking the content-addressed invariant. Now copy the source once into a provisional staging file, derive the key from the SHA-256 of that staged file (the exact bytes published), then dedup/atomic-rename. Guarantees key == digest(published object) with a single source read. Extends the acceptance test: re-hashes the stored object and asserts it equals the returned key (and std.hash / shasum of the fixture), asserts cross-path dedup (put_file and put_bytes of identical content share one object), and asserts the staging temp is cleaned up on both the success and dedup paths.
This commit is contained in:
@@ -48,6 +48,17 @@ entry_count :: (dir: string) -> string {
|
||||
return res.stdout;
|
||||
}
|
||||
|
||||
// Number of `put_file` staging temps (`incoming-*`) left under `dir`.
|
||||
// 0 means every file-source put cleaned up its staging copy.
|
||||
incoming_count :: (dir: string) -> string {
|
||||
cmd := concat("ls -1 ", concat(dir, " 2>/dev/null | grep -c '^incoming-' | tr -dc '0-9'"));
|
||||
r := process.run(cmd);
|
||||
process.assert(r != null, "ls/grep must run");
|
||||
res := r!;
|
||||
if res.stdout.len == 0 { return "0"; }
|
||||
return res.stdout;
|
||||
}
|
||||
|
||||
main :: () -> s32 {
|
||||
root := ".sx-tmp/store-cas";
|
||||
process.run(concat("rm -rf ", root)); // fresh root, even after a crashed prior run
|
||||
@@ -99,17 +110,44 @@ main :: () -> s32 {
|
||||
process.assert(!st.has(missing), "failed publish must leave no object");
|
||||
print(" atomicity: staged write invisible; failed publish leaves no object\n");
|
||||
|
||||
// ── 4. put_file: file source, same key + bytes ──────────────────────
|
||||
// ── 4. put_file: single source read, key == digest of published object
|
||||
src := ".sx-tmp/store-cas-src.bin";
|
||||
file_bytes := "the quick brown fox\n";
|
||||
file_bytes := "file-source-bytes-123"; // shell-safe: no spaces/newlines
|
||||
process.assert(fs.write_file(src, file_bytes), "fixture source file must be written");
|
||||
|
||||
fkey, fe := st.put_file(src);
|
||||
process.assert(!fe, "put_file must succeed");
|
||||
process.assert(fkey == stdhash_key(file_bytes), "put_file key must equal std.hash of the file bytes");
|
||||
process.assert(st.has(fkey), "put_file object must be published");
|
||||
|
||||
// The returned key must be the SHA-256 of the bytes ACTUALLY published —
|
||||
// re-hash the stored object and confirm it equals the key (and equals
|
||||
// std.hash + shasum -a 256 of the original fixture).
|
||||
fstored := fs.read_file(st.object_path(fkey));
|
||||
process.assert(fstored != null, "published object must be readable");
|
||||
process.assert(fstored! == file_bytes, "put_file stored bytes must equal the file");
|
||||
print(" put_file: key {} published\n", fkey);
|
||||
process.assert(stdhash_key(fstored!) == fkey, "key must equal SHA-256 of the published object");
|
||||
process.assert(fkey == stdhash_key(file_bytes), "put_file key must equal std.hash of the file bytes");
|
||||
process.assert(fkey == shasum_key(file_bytes), "put_file key must equal shasum -a 256");
|
||||
process.assert(incoming_count(st.staging_dir()) == "0", "put_file must clean up its staging temp");
|
||||
objs_after_file := entry_count(st.objects_dir());
|
||||
print(" put_file: key {} == digest(published object)\n", fkey);
|
||||
|
||||
// Cross-path dedup: put_bytes of identical content yields the SAME key
|
||||
// and adds no second object; the stored bytes are not rewritten.
|
||||
bkey, be := st.put_bytes(file_bytes);
|
||||
process.assert(!be, "cross-path put_bytes must succeed");
|
||||
process.assert(bkey == fkey, "put_file and put_bytes of identical content share a key");
|
||||
process.assert(entry_count(st.objects_dir()) == objs_after_file, "cross-path dedup adds no object");
|
||||
afterb := fs.read_file(st.object_path(fkey));
|
||||
process.assert(afterb! == file_bytes, "cross-path dedup must not rewrite the object");
|
||||
|
||||
// A repeat put_file hits dedup and also drops its staging temp.
|
||||
fkey2, fe2 := st.put_file(src);
|
||||
process.assert(!fe2, "repeat put_file must succeed");
|
||||
process.assert(fkey2 == fkey, "repeat put_file dedup yields the same key");
|
||||
process.assert(entry_count(st.objects_dir()) == objs_after_file, "repeat put_file adds no object");
|
||||
process.assert(incoming_count(st.staging_dir()) == "0", "dedup put_file must clean up its staging temp");
|
||||
print(" put_file: cross-path dedup, one object, staging cleaned\n");
|
||||
|
||||
// ── cleanup ─────────────────────────────────────────────────────────
|
||||
process.run(concat("rm -rf ", root));
|
||||
|
||||
Reference in New Issue
Block a user