put_file hashed the source path, then copied the source again — two reads. A source mutated in between would publish bytes whose digest != returned key, breaking the content-addressed invariant. Now copy the source once into a provisional staging file, derive the key from the SHA-256 of that staged file (the exact bytes published), then dedup/atomic-rename. Guarantees key == digest(published object) with a single source read. Extends the acceptance test: re-hashes the stored object and asserts it equals the returned key (and std.hash / shasum of the fixture), asserts cross-path dedup (put_file and put_bytes of identical content share one object), and asserts the staging temp is cleaned up on both the success and dedup paths.
159 lines
8.2 KiB
Plaintext
159 lines
8.2 KiB
Plaintext
// Acceptance for P2.2 — the content-addressed artifact store.
|
|
//
|
|
// Drives a fresh store rooted under `.sx-tmp/` (never /tmp) and asserts
|
|
// the four Slice-3 invariants:
|
|
// 1. put → object lands at `objects/<sha256>` and its bytes round-trip;
|
|
// the storage key equals std.hash, an independent `shasum -a 256`,
|
|
// and the pinned SHA-256("abc") vector.
|
|
// 2. dedup — identical bytes are not stored twice and an existing
|
|
// object is never rewritten.
|
|
// 3. atomicity — a staged-but-unpublished write is invisible at the
|
|
// final path, and a publish that fails before/at the rename leaves
|
|
// no object.
|
|
// 4. put_file — a file source produces the same key and bytes.
|
|
// Exits 0 only if every assertion holds (process.assert aborts otherwise).
|
|
#import "modules/std.sx";
|
|
fs :: #import "modules/fs.sx";
|
|
hash :: #import "modules/std/hash.sx";
|
|
process :: #import "modules/process.sx";
|
|
#import "../src/store/store.sx";
|
|
|
|
// SHA-256("abc"), the FIPS 180-4 one-block known-answer vector.
|
|
ABC_SHA256 :: "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
|
|
|
|
// std.hash digest of `s` as a heap string key (independent of the store).
|
|
stdhash_key :: (s: string) -> string {
|
|
d := hash.sha256_hex(s);
|
|
view := string.{ ptr = @d[0], len = 64 };
|
|
return substr(view, 0, 64);
|
|
}
|
|
|
|
// First 64 hex chars of `shasum -a 256` over `bytes`, via the shell.
|
|
// `bytes` must be shell-safe (the fixtures here are plain ASCII).
|
|
shasum_key :: (bytes: string) -> string {
|
|
cmd := concat("printf '%s' ", concat(bytes, " | shasum -a 256"));
|
|
r := process.run(cmd);
|
|
process.assert(r != null, "shasum -a 256 must run");
|
|
res := r!;
|
|
process.assert(res.exit_code == 0, "shasum -a 256 must exit 0");
|
|
return substr(res.stdout, 0, 64);
|
|
}
|
|
|
|
// Number of directory entries under `dir`, parsed from `ls -1 | wc -l`.
|
|
entry_count :: (dir: string) -> string {
|
|
cmd := concat("ls -1 ", concat(dir, " | wc -l | tr -dc '0-9'"));
|
|
r := process.run(cmd);
|
|
process.assert(r != null, "ls/wc must run");
|
|
res := r!;
|
|
return res.stdout;
|
|
}
|
|
|
|
// Number of `put_file` staging temps (`incoming-*`) left under `dir`.
|
|
// 0 means every file-source put cleaned up its staging copy.
|
|
incoming_count :: (dir: string) -> string {
|
|
cmd := concat("ls -1 ", concat(dir, " 2>/dev/null | grep -c '^incoming-' | tr -dc '0-9'"));
|
|
r := process.run(cmd);
|
|
process.assert(r != null, "ls/grep must run");
|
|
res := r!;
|
|
if res.stdout.len == 0 { return "0"; }
|
|
return res.stdout;
|
|
}
|
|
|
|
main :: () -> s32 {
|
|
root := ".sx-tmp/store-cas";
|
|
process.run(concat("rm -rf ", root)); // fresh root, even after a crashed prior run
|
|
|
|
st := Store.init(root);
|
|
|
|
// ── 1. put + content addressing ─────────────────────────────────────
|
|
fixture := "abc";
|
|
key, e := st.put_bytes(fixture);
|
|
process.assert(!e, "put_bytes(abc) must succeed");
|
|
process.assert(key == ABC_SHA256, "key must equal pinned SHA-256(abc) vector");
|
|
process.assert(key == stdhash_key(fixture), "store key must equal std.hash digest");
|
|
process.assert(key == shasum_key(fixture), "store key must equal shasum -a 256");
|
|
print(" store == std.hash == shasum == vector: {}\n", key);
|
|
|
|
process.assert(st.has(key), "object must exist at objects/<sha256>");
|
|
stored := fs.read_file(st.object_path(key));
|
|
process.assert(stored != null, "stored object must be readable");
|
|
process.assert(stored! == fixture, "stored bytes must equal the input");
|
|
|
|
// ── 2. dedup: same bytes, one object, never rewritten ───────────────
|
|
// Overwrite the object on disk; a deduped re-put must NOT touch it.
|
|
process.assert(fs.write_file(st.object_path(key), "TAMPERED"), "tamper write must succeed");
|
|
key2, e2 := st.put_bytes(fixture);
|
|
process.assert(!e2, "second put_bytes must succeed");
|
|
process.assert(key2 == key, "dedup: identical bytes yield the same key");
|
|
after := fs.read_file(st.object_path(key));
|
|
process.assert(after! == "TAMPERED", "dedup: existing object must not be rewritten");
|
|
process.assert(entry_count(st.objects_dir()) == "1", "dedup: exactly one object stored");
|
|
// Restore the real bytes so the store is left consistent.
|
|
process.assert(fs.write_file(st.object_path(key), fixture), "restore write must succeed");
|
|
print(" dedup: one object, copy skipped on re-put\n");
|
|
|
|
// ── 3. atomicity: staged write is invisible until publish ───────────
|
|
pending := "interrupted-upload-bytes";
|
|
pkey := stdhash_key(pending);
|
|
process.assert(!st.has(pkey), "fresh store: pending object must be absent");
|
|
sp, se := st.stage_write(pkey, pending);
|
|
process.assert(!se, "stage_write must succeed");
|
|
process.assert(fs.exists(sp), "staged file must exist after stage_write");
|
|
process.assert(!st.has(pkey), "atomicity: object must NOT exist before the rename");
|
|
|
|
// A publish whose staging source is missing fails and creates nothing.
|
|
missing := "1111111111111111111111111111111111111111111111111111111111111111";
|
|
process.assert(!st.has(missing), "precondition: no object for the missing key");
|
|
failed := false;
|
|
st.publish(st.staging_path(missing), missing) catch { failed = true; };
|
|
process.assert(failed, "publish of a missing staging file must fail");
|
|
process.assert(!st.has(missing), "failed publish must leave no object");
|
|
print(" atomicity: staged write invisible; failed publish leaves no object\n");
|
|
|
|
// ── 4. put_file: single source read, key == digest of published object
|
|
src := ".sx-tmp/store-cas-src.bin";
|
|
file_bytes := "file-source-bytes-123"; // shell-safe: no spaces/newlines
|
|
process.assert(fs.write_file(src, file_bytes), "fixture source file must be written");
|
|
|
|
fkey, fe := st.put_file(src);
|
|
process.assert(!fe, "put_file must succeed");
|
|
process.assert(st.has(fkey), "put_file object must be published");
|
|
|
|
// The returned key must be the SHA-256 of the bytes ACTUALLY published —
|
|
// re-hash the stored object and confirm it equals the key (and equals
|
|
// std.hash + shasum -a 256 of the original fixture).
|
|
fstored := fs.read_file(st.object_path(fkey));
|
|
process.assert(fstored != null, "published object must be readable");
|
|
process.assert(fstored! == file_bytes, "put_file stored bytes must equal the file");
|
|
process.assert(stdhash_key(fstored!) == fkey, "key must equal SHA-256 of the published object");
|
|
process.assert(fkey == stdhash_key(file_bytes), "put_file key must equal std.hash of the file bytes");
|
|
process.assert(fkey == shasum_key(file_bytes), "put_file key must equal shasum -a 256");
|
|
process.assert(incoming_count(st.staging_dir()) == "0", "put_file must clean up its staging temp");
|
|
objs_after_file := entry_count(st.objects_dir());
|
|
print(" put_file: key {} == digest(published object)\n", fkey);
|
|
|
|
// Cross-path dedup: put_bytes of identical content yields the SAME key
|
|
// and adds no second object; the stored bytes are not rewritten.
|
|
bkey, be := st.put_bytes(file_bytes);
|
|
process.assert(!be, "cross-path put_bytes must succeed");
|
|
process.assert(bkey == fkey, "put_file and put_bytes of identical content share a key");
|
|
process.assert(entry_count(st.objects_dir()) == objs_after_file, "cross-path dedup adds no object");
|
|
afterb := fs.read_file(st.object_path(fkey));
|
|
process.assert(afterb! == file_bytes, "cross-path dedup must not rewrite the object");
|
|
|
|
// A repeat put_file hits dedup and also drops its staging temp.
|
|
fkey2, fe2 := st.put_file(src);
|
|
process.assert(!fe2, "repeat put_file must succeed");
|
|
process.assert(fkey2 == fkey, "repeat put_file dedup yields the same key");
|
|
process.assert(entry_count(st.objects_dir()) == objs_after_file, "repeat put_file adds no object");
|
|
process.assert(incoming_count(st.staging_dir()) == "0", "dedup put_file must clean up its staging temp");
|
|
print(" put_file: cross-path dedup, one object, staging cleaned\n");
|
|
|
|
// ── cleanup ─────────────────────────────────────────────────────────
|
|
process.run(concat("rm -rf ", root));
|
|
fs.delete_file(src);
|
|
|
|
print("store_content_addressed: ALL CASES PASS\n");
|
|
return 0;
|
|
}
|