P2.2: content-addressed artifact store (staging -> atomic move, dedup)

Local blob store under src/store/, the first real consumer of std.hash.
Objects are addressed by lowercase-hex SHA-256: the digest is the storage
key and bytes live at <root>/objects/<sha256>.

- put_bytes / put_file compute the digest via std.hash, write to a
  staging file, then atomically rename into objects/<sha256>. The rename
  is the only step that publishes, so an interrupted/failed write never
  leaves a torn object at the final path.
- Dedup: an already-published object short-circuits without re-staging.
- stage_write/stage_copy + publish expose the two phases for the test.

tests/store_content_addressed.sx asserts the storage key equals std.hash,
an independent `shasum -a 256`, and the pinned SHA-256("abc") vector;
that dedup stores one object and never rewrites it; that a staged write
is invisible until publish and a failed publish leaves no object; and
that put_file round-trips bytes. Gate: make build + make test both green.
This commit is contained in:
agra
2026-06-06 00:34:21 +03:00
parent b552958378
commit 68c002ab06
3 changed files with 240 additions and 0 deletions

117
src/store/store.sx Normal file
View File

@@ -0,0 +1,117 @@
// =====================================================================
// store.sx — content-addressed blob store (subplan 02, Slice 3).
//
// Objects are addressed by the lowercase-hex SHA-256 of their bytes:
// the digest IS the storage key, and the bytes live at
// `<root>/objects/<digest>`. This key is what populates an
// Artifact.sha256 / Artifact.storage_key at the domain boundary.
//
// Publish is a two-phase write: bytes are first written to
// `<root>/staging/<key>`, then atomically renamed into
// `<root>/objects/<key>`. The rename is the only operation that makes an
// object visible at its final path, so an interrupted or failed write
// never leaves a torn object — a half-written staging file is not
// reachable as `objects/<key>`. Staging and objects share `<root>` (one
// filesystem), so the rename is atomic.
//
// Dedup: identical bytes hash to the same key, so a put whose object
// already exists returns immediately without re-staging or rewriting.
// =====================================================================
#import "modules/std.sx";
fs :: #import "modules/fs.sx";
hash :: #import "modules/std/hash.sx";
// Failure classes for a put. `Stage` covers a failed staging write,
// `Publish` a failed atomic rename, `Source` an unreadable input file.
StoreErr :: error {
Stage,
Publish,
Source,
}
// Copy a by-value `[64]u8` digest into a heap `string` key. The hash
// modules return the digest on the stack, so the view over it is only
// valid until the array dies; this materialises an owned, null-terminated
// copy safe to store and use as a path component.
digest_to_key :: (d: [64]u8) -> string {
view := string.{ ptr = @d[0], len = 64 };
return substr(view, 0, 64);
}
// SHA-256 of an in-memory buffer, as the lowercase-hex storage key.
digest_of_bytes :: (bytes: string) -> string {
d := hash.sha256_hex(bytes);
return digest_to_key(d);
}
// SHA-256 of a file's contents (streamed in fixed chunks), as the
// storage key. Raises `Source` if the file can't be opened/read.
digest_of_file :: (path: string) -> (string, !StoreErr) {
maybe := hash.sha256_file(path);
if maybe == null { raise error.Source; }
d := maybe!;
return digest_to_key(d);
}
Store :: struct {
root: string;
init :: (root: string) -> Store {
return Store.{ root = root };
}
objects_dir :: (self: *Store) -> string { return path_join(self.root, "objects"); }
staging_dir :: (self: *Store) -> string { return path_join(self.root, "staging"); }
object_path :: (self: *Store, key: string) -> string { return path_join(self.root, "objects", key); }
staging_path :: (self: *Store, key: string) -> string { return path_join(self.root, "staging", key); }
// True once `key`'s bytes are published at their final path.
has :: (self: *Store, key: string) -> bool {
return fs.exists(self.object_path(key));
}
// Phase 1: write `bytes` to `staging/<key>`, returning the staging
// path. The bytes are not yet visible at `objects/<key>`.
stage_write :: (self: *Store, key: string, bytes: string) -> (string, !StoreErr) {
if !fs.create_dir_all(self.staging_dir()) { raise error.Stage; }
sp := self.staging_path(key);
if !fs.write_file(sp, bytes) { raise error.Stage; }
return sp;
}
// Phase 1 (file source): copy `src`'s bytes into `staging/<key>`.
stage_copy :: (self: *Store, key: string, src: string) -> (string, !StoreErr) {
if !fs.create_dir_all(self.staging_dir()) { raise error.Stage; }
sp := self.staging_path(key);
if !fs.copy_file(src, sp) { raise error.Stage; }
return sp;
}
// Phase 2: atomically move a staged file into `objects/<key>`. After
// this returns the object is published; before it, it never is.
publish :: (self: *Store, staged: string, key: string) -> !StoreErr {
if !fs.create_dir_all(self.objects_dir()) { raise error.Publish; }
if !fs.move(staged, self.object_path(key)) { raise error.Publish; }
return;
}
// Store in-memory bytes and return their storage key. Dedup: an
// already-published object is returned without re-staging.
put_bytes :: (self: *Store, bytes: string) -> (string, !StoreErr) {
key := digest_of_bytes(bytes);
if self.has(key) { return key; }
sp := try self.stage_write(key, bytes);
try self.publish(sp, key);
return key;
}
// Store a file's bytes and return their storage key. Dedup as above.
put_file :: (self: *Store, path: string) -> (string, !StoreErr) {
key := try digest_of_file(path);
if self.has(key) { return key; }
sp := try self.stage_copy(key, path);
try self.publish(sp, key);
return key;
}
}