P2.2: content-addressed artifact store (staging -> atomic move, dedup)
Local blob store under src/store/, the first real consumer of std.hash.
Objects are addressed by lowercase-hex SHA-256: the digest is the storage
key and bytes live at <root>/objects/<sha256>.
- put_bytes / put_file compute the digest via std.hash, write to a
staging file, then atomically rename into objects/<sha256>. The rename
is the only step that publishes, so an interrupted/failed write never
leaves a torn object at the final path.
- Dedup: an already-published object short-circuits without re-staging.
- stage_write/stage_copy + publish expose the two phases for the test.
tests/store_content_addressed.sx asserts the storage key equals std.hash,
an independent `shasum -a 256`, and the pinned SHA-256("abc") vector;
that dedup stores one object and never rewrites it; that a staged write
is invisible until publish and a failed publish leaves no object; and
that put_file round-trips bytes. Gate: make build + make test both green.
This commit is contained in:
117
src/store/store.sx
Normal file
117
src/store/store.sx
Normal file
@@ -0,0 +1,117 @@
|
||||
// =====================================================================
|
||||
// store.sx — content-addressed blob store (subplan 02, Slice 3).
|
||||
//
|
||||
// Objects are addressed by the lowercase-hex SHA-256 of their bytes:
|
||||
// the digest IS the storage key, and the bytes live at
|
||||
// `<root>/objects/<digest>`. This key is what populates an
|
||||
// Artifact.sha256 / Artifact.storage_key at the domain boundary.
|
||||
//
|
||||
// Publish is a two-phase write: bytes are first written to
|
||||
// `<root>/staging/<key>`, then atomically renamed into
|
||||
// `<root>/objects/<key>`. The rename is the only operation that makes an
|
||||
// object visible at its final path, so an interrupted or failed write
|
||||
// never leaves a torn object — a half-written staging file is not
|
||||
// reachable as `objects/<key>`. Staging and objects share `<root>` (one
|
||||
// filesystem), so the rename is atomic.
|
||||
//
|
||||
// Dedup: identical bytes hash to the same key, so a put whose object
|
||||
// already exists returns immediately without re-staging or rewriting.
|
||||
// =====================================================================
|
||||
|
||||
#import "modules/std.sx";
|
||||
fs :: #import "modules/fs.sx";
|
||||
hash :: #import "modules/std/hash.sx";
|
||||
|
||||
// Failure classes for a put. `Stage` covers a failed staging write,
|
||||
// `Publish` a failed atomic rename, `Source` an unreadable input file.
|
||||
StoreErr :: error {
|
||||
Stage,
|
||||
Publish,
|
||||
Source,
|
||||
}
|
||||
|
||||
// Copy a by-value `[64]u8` digest into a heap `string` key. The hash
|
||||
// modules return the digest on the stack, so the view over it is only
|
||||
// valid until the array dies; this materialises an owned, null-terminated
|
||||
// copy safe to store and use as a path component.
|
||||
digest_to_key :: (d: [64]u8) -> string {
|
||||
view := string.{ ptr = @d[0], len = 64 };
|
||||
return substr(view, 0, 64);
|
||||
}
|
||||
|
||||
// SHA-256 of an in-memory buffer, as the lowercase-hex storage key.
|
||||
digest_of_bytes :: (bytes: string) -> string {
|
||||
d := hash.sha256_hex(bytes);
|
||||
return digest_to_key(d);
|
||||
}
|
||||
|
||||
// SHA-256 of a file's contents (streamed in fixed chunks), as the
|
||||
// storage key. Raises `Source` if the file can't be opened/read.
|
||||
digest_of_file :: (path: string) -> (string, !StoreErr) {
|
||||
maybe := hash.sha256_file(path);
|
||||
if maybe == null { raise error.Source; }
|
||||
d := maybe!;
|
||||
return digest_to_key(d);
|
||||
}
|
||||
|
||||
Store :: struct {
|
||||
root: string;
|
||||
|
||||
init :: (root: string) -> Store {
|
||||
return Store.{ root = root };
|
||||
}
|
||||
|
||||
objects_dir :: (self: *Store) -> string { return path_join(self.root, "objects"); }
|
||||
staging_dir :: (self: *Store) -> string { return path_join(self.root, "staging"); }
|
||||
object_path :: (self: *Store, key: string) -> string { return path_join(self.root, "objects", key); }
|
||||
staging_path :: (self: *Store, key: string) -> string { return path_join(self.root, "staging", key); }
|
||||
|
||||
// True once `key`'s bytes are published at their final path.
|
||||
has :: (self: *Store, key: string) -> bool {
|
||||
return fs.exists(self.object_path(key));
|
||||
}
|
||||
|
||||
// Phase 1: write `bytes` to `staging/<key>`, returning the staging
|
||||
// path. The bytes are not yet visible at `objects/<key>`.
|
||||
stage_write :: (self: *Store, key: string, bytes: string) -> (string, !StoreErr) {
|
||||
if !fs.create_dir_all(self.staging_dir()) { raise error.Stage; }
|
||||
sp := self.staging_path(key);
|
||||
if !fs.write_file(sp, bytes) { raise error.Stage; }
|
||||
return sp;
|
||||
}
|
||||
|
||||
// Phase 1 (file source): copy `src`'s bytes into `staging/<key>`.
|
||||
stage_copy :: (self: *Store, key: string, src: string) -> (string, !StoreErr) {
|
||||
if !fs.create_dir_all(self.staging_dir()) { raise error.Stage; }
|
||||
sp := self.staging_path(key);
|
||||
if !fs.copy_file(src, sp) { raise error.Stage; }
|
||||
return sp;
|
||||
}
|
||||
|
||||
// Phase 2: atomically move a staged file into `objects/<key>`. After
|
||||
// this returns the object is published; before it, it never is.
|
||||
publish :: (self: *Store, staged: string, key: string) -> !StoreErr {
|
||||
if !fs.create_dir_all(self.objects_dir()) { raise error.Publish; }
|
||||
if !fs.move(staged, self.object_path(key)) { raise error.Publish; }
|
||||
return;
|
||||
}
|
||||
|
||||
// Store in-memory bytes and return their storage key. Dedup: an
|
||||
// already-published object is returned without re-staging.
|
||||
put_bytes :: (self: *Store, bytes: string) -> (string, !StoreErr) {
|
||||
key := digest_of_bytes(bytes);
|
||||
if self.has(key) { return key; }
|
||||
sp := try self.stage_write(key, bytes);
|
||||
try self.publish(sp, key);
|
||||
return key;
|
||||
}
|
||||
|
||||
// Store a file's bytes and return their storage key. Dedup as above.
|
||||
put_file :: (self: *Store, path: string) -> (string, !StoreErr) {
|
||||
key := try digest_of_file(path);
|
||||
if self.has(key) { return key; }
|
||||
sp := try self.stage_copy(key, path);
|
||||
try self.publish(sp, key);
|
||||
return key;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user