F1.2: std.hash zero-heap [64]u8 hex API + chunked file + pinned vectors

Make the SHA-256 digest path allocation-free (foundation heap-discipline):

- final() and sha256_hex() now return the 64-char lowercase hex digest as
  a [64]u8 by value on the stack; the cstring(64) heap allocation is gone.
- sha256_file() streams the file in fixed 64KB stack chunks via open_file/
  File.read/File.close (defer-closed on every path) instead of slurping it
  with read_file; peak memory is O(chunk), not O(filesize).

Tests (compare via a zero-copy string view over the [64]u8):
- 0710 updated to the by-value API (output unchanged).
- 0711 known-answer vectors: "", "abc", NIST-56/112, padding boundaries
  {0,55,56,57,63,64,65,119,120}, and 1000 / 1,000,000 'a' repeats, each
  pinned to its published digest (cross-checked with shasum -a 256).
- 0712 streaming equivalence (one-shot == byte-at-a-time == split-mid-block
  == split-on-boundary) plus sha256_file(temp) == in-memory digest.

src/ untouched. zig build && zig build test && tests/run_examples.sh green.
This commit is contained in:
agra
2026-06-04 00:08:46 +03:00
parent ee1e097335
commit f9bc593bb8
10 changed files with 232 additions and 19 deletions

View File

@@ -3,15 +3,19 @@
// Known-answer vectors (empty, "abc", the 112-byte NIST multi-block
// vector) plus the streaming invariant: feeding the same bytes in
// several `update` chunks yields the same digest as the one-shot call.
//
// The digest is a zero-heap `[64]u8` returned by value; tests build a
// `string` view over it (no copy) to compare against the pinned hex.
#import "modules/std.sx";
#import "modules/std/hash.sx";
check :: (label: string, got: string, want: string) {
if got == want {
check :: (label: string, got: [64]u8, want: string) {
view := string.{ ptr = @got[0], len = 64 };
if view == want {
print("{}: ok\n", label);
} else {
print("{}: FAIL got {} want {}\n", label, got, want);
print("{}: FAIL got {} want {}\n", label, view, want);
}
}
@@ -31,7 +35,6 @@ main :: () {
h.update("abcdefghbcdefghicdefghijdefghijke"); // 33 bytes
h.update("fghijklfghijklmghijklmnhijklmno"); // crosses block edge
h.update("ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu");
streamed := h.final();
check("stream-eq-oneshot",
if streamed == sha256_hex(multi) then "yes" else "no", "yes");
check("stream-eq-oneshot", h.final(),
"cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1");
}

View File

@@ -0,0 +1,82 @@
// SHA-256 known-answer vectors for `modules/std/hash.sx`.
//
// Pins published digests for: the empty input, "abc", the two NIST
// multi-block sample vectors, the padding/length boundaries around the
// 56-byte (one-block-with-length) and 64-byte (block) edges, and the
// classic large repeats (1000 and 1,000,000 'a'). Each expected hex is
// hard-coded from FIPS 180-4 / NIST CAVP and cross-checked with
// `shasum -a 256`.
//
// The digest is a zero-heap `[64]u8`; we compare it via a `string` view
// (no copy). Repeat vectors are built by streaming an 'a'-filled stack
// buffer, so even the 1,000,000 case allocates nothing on the heap.
#import "modules/std.sx";
#import "modules/std/hash.sx";
check :: (label: string, got: [64]u8, want: string) {
view := string.{ ptr = @got[0], len = 64 };
if view == want {
print("{}: ok\n", label);
} else {
print("{}: FAIL got {} want {}\n", label, view, want);
}
}
// Digest of `total` bytes of 'a', streamed in 1000-byte chunks so peak
// memory stays O(chunk) for the 1,000,000 case. `total == 0` yields the
// empty-input digest (the loop body never runs).
hash_a :: (total: s64) -> [64]u8 {
chunk : [1000]u8 = ---;
k := 0;
while k < 1000 { chunk[k] = 97; k += 1; } // 97 = 'a'
h := init();
remaining := total;
while remaining > 0 {
take := if remaining < 1000 then remaining else 1000;
h.update(string.{ ptr = @chunk[0], len = take });
remaining -= take;
}
h.final()
}
main :: () {
check("empty", sha256_hex(""),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
check("abc", sha256_hex("abc"),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad");
// NIST CAVP sample vectors (56-byte and 112-byte multi-block).
check("nist-56", sha256_hex("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"),
"248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1");
check("nist-112", sha256_hex("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"),
"cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1");
// Padding/length boundaries around the 56- and 64-byte edges, using
// 'a' repeats so the boundary is exercised independently of content.
check("len-0", hash_a(0),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
check("len-55", hash_a(55),
"9f4390f8d30c2dd92ec9f095b65e2b9ae9b0a925a5258e241c9f1e910f734318");
check("len-56", hash_a(56),
"b35439a4ac6f0948b6d6f9e3c6af0f5f590ce20f1bde7090ef7970686ec6738a");
check("len-57", hash_a(57),
"f13b2d724659eb3bf47f2dd6af1accc87b81f09f59f2b75e5c0bed6589dfe8c6");
check("len-63", hash_a(63),
"7d3e74a05d7db15bce4ad9ec0658ea98e3f06eeecf16b4c6fff2da457ddc2f34");
check("len-64", hash_a(64),
"ffe054fe7ae0cb6dc65c3af9b61d5209f439851db43d0ba5997337df154668eb");
check("len-65", hash_a(65),
"635361c48bb9eab14198e76ea8ab7f1a41685d6ad62aa9146d301d4f17eb0ae0");
check("len-119", hash_a(119),
"31eba51c313a5c08226adf18d4a359cfdfd8d2e816b13f4af952f7ea6584dcfb");
check("len-120", hash_a(120),
"2f3d335432c70b580af0e8e1b3674a7c020d683aa5f73aaaedfdc55af904c21c");
// Large repeats (FIPS 180-4 / classic vectors).
check("a-1000", hash_a(1000),
"41edece42d63e8d9bf515a9ba6932e1c20cbc9f5a5d134645adb5db1b9737ea3");
check("a-1000000", hash_a(1000000),
"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0");
}

View File

@@ -0,0 +1,76 @@
// SHA-256 streaming-equivalence + file hashing for `modules/std/hash.sx`.
//
// The chunk boundary must not affect the result: feeding the same bytes
// one-shot, one byte at a time, split mid-block, and split exactly on a
// 64-byte block boundary all yield the same digest, anchored to a pinned
// value. Then `sha256_file` of a written temp file must equal the
// in-memory digest of the same bytes — the streaming file path agrees
// with the buffered path.
//
// All comparisons go through `string` views over the zero-heap `[64]u8`
// digests; the byte/split updates view directly into the input buffer
// (no `substr`, no copies).
#import "modules/std.sx";
#import "modules/std/hash.sx";
#import "modules/fs.sx";
// 112-byte NIST multi-block vector — long enough that a 64-byte split is
// a genuine block boundary and a 30-byte split lands mid-block.
MSG :: "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
PIN :: "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1";
check :: (label: string, got: [64]u8, want: string) {
view := string.{ ptr = @got[0], len = 64 };
if view == want {
print("{}: ok\n", label);
} else {
print("{}: FAIL got {} want {}\n", label, view, want);
}
}
report :: (label: string, ok: bool) {
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
}
// Absorb `data` one byte at a time (views of length 1 into the buffer).
stream_by_byte :: (data: string) -> [64]u8 {
h := init();
i := 0;
while i < data.len {
h.update(string.{ ptr = @data[i], len = 1 });
i += 1;
}
h.final()
}
// Absorb `data` as two updates split at `at` (views into the buffer).
stream_split :: (data: string, at: s64) -> [64]u8 {
h := init();
h.update(string.{ ptr = @data[0], len = at });
h.update(string.{ ptr = @data[at], len = data.len - at });
h.final()
}
main :: () {
check("oneshot-pinned", sha256_hex(MSG), PIN);
check("byte-at-a-time", stream_by_byte(MSG), PIN);
check("split-mid-block", stream_split(MSG, 30), PIN); // 30: mid first block
check("split-on-boundary", stream_split(MSG, 64), PIN); // 64: exact block edge
// sha256_file (streaming) must equal the in-memory digest.
path := "/tmp/sx_0712_stream.bin";
if !write_file(path, MSG) { print("file-write: FAIL\n"); return; }
maybe := sha256_file(path);
if maybe == null { print("file-eq-memory: FAIL (open)\n"); return; }
file_digest := maybe!;
mem_digest := sha256_hex(MSG);
fv := string.{ ptr = @file_digest[0], len = 64 };
mv := string.{ ptr = @mem_digest[0], len = 64 };
report("file-eq-memory", fv == mv);
check("file-pinned", file_digest, PIN);
delete_file(path);
}

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,15 @@
empty: ok
abc: ok
nist-56: ok
nist-112: ok
len-0: ok
len-55: ok
len-56: ok
len-57: ok
len-63: ok
len-64: ok
len-65: ok
len-119: ok
len-120: ok
a-1000: ok
a-1000000: ok

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,6 @@
oneshot-pinned: ok
byte-at-a-time: ok
split-mid-block: ok
split-on-boundary: ok
file-eq-memory: ok
file-pinned: ok