F1.2: std.hash zero-heap [64]u8 hex API + chunked file + pinned vectors
Make the SHA-256 digest path allocation-free (foundation heap-discipline):
- final() and sha256_hex() now return the 64-char lowercase hex digest as
a [64]u8 by value on the stack; the cstring(64) heap allocation is gone.
- sha256_file() streams the file in fixed 64KB stack chunks via open_file/
File.read/File.close (defer-closed on every path) instead of slurping it
with read_file; peak memory is O(chunk), not O(filesize).
Tests (compare via a zero-copy string view over the [64]u8):
- 0710 updated to the by-value API (output unchanged).
- 0711 known-answer vectors: "", "abc", NIST-56/112, padding boundaries
{0,55,56,57,63,64,65,119,120}, and 1000 / 1,000,000 'a' repeats, each
pinned to its published digest (cross-checked with shasum -a 256).
- 0712 streaming equivalence (one-shot == byte-at-a-time == split-mid-block
== split-on-boundary) plus sha256_file(temp) == in-memory digest.
src/ untouched. zig build && zig build test && tests/run_examples.sh green.
This commit is contained in:
@@ -3,15 +3,19 @@
|
||||
// Known-answer vectors (empty, "abc", the 112-byte NIST multi-block
|
||||
// vector) plus the streaming invariant: feeding the same bytes in
|
||||
// several `update` chunks yields the same digest as the one-shot call.
|
||||
//
|
||||
// The digest is a zero-heap `[64]u8` returned by value; tests build a
|
||||
// `string` view over it (no copy) to compare against the pinned hex.
|
||||
|
||||
#import "modules/std.sx";
|
||||
#import "modules/std/hash.sx";
|
||||
|
||||
check :: (label: string, got: string, want: string) {
|
||||
if got == want {
|
||||
check :: (label: string, got: [64]u8, want: string) {
|
||||
view := string.{ ptr = @got[0], len = 64 };
|
||||
if view == want {
|
||||
print("{}: ok\n", label);
|
||||
} else {
|
||||
print("{}: FAIL got {} want {}\n", label, got, want);
|
||||
print("{}: FAIL got {} want {}\n", label, view, want);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +35,6 @@ main :: () {
|
||||
h.update("abcdefghbcdefghicdefghijdefghijke"); // 33 bytes
|
||||
h.update("fghijklfghijklmghijklmnhijklmno"); // crosses block edge
|
||||
h.update("ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu");
|
||||
streamed := h.final();
|
||||
check("stream-eq-oneshot",
|
||||
if streamed == sha256_hex(multi) then "yes" else "no", "yes");
|
||||
check("stream-eq-oneshot", h.final(),
|
||||
"cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1");
|
||||
}
|
||||
|
||||
82
examples/0711-modules-sha256-vectors.sx
Normal file
82
examples/0711-modules-sha256-vectors.sx
Normal file
@@ -0,0 +1,82 @@
|
||||
// SHA-256 known-answer vectors for `modules/std/hash.sx`.
|
||||
//
|
||||
// Pins published digests for: the empty input, "abc", the two NIST
|
||||
// multi-block sample vectors, the padding/length boundaries around the
|
||||
// 56-byte (one-block-with-length) and 64-byte (block) edges, and the
|
||||
// classic large repeats (1000 and 1,000,000 'a'). Each expected hex is
|
||||
// hard-coded from FIPS 180-4 / NIST CAVP and cross-checked with
|
||||
// `shasum -a 256`.
|
||||
//
|
||||
// The digest is a zero-heap `[64]u8`; we compare it via a `string` view
|
||||
// (no copy). Repeat vectors are built by streaming an 'a'-filled stack
|
||||
// buffer, so even the 1,000,000 case allocates nothing on the heap.
|
||||
|
||||
#import "modules/std.sx";
|
||||
#import "modules/std/hash.sx";
|
||||
|
||||
check :: (label: string, got: [64]u8, want: string) {
|
||||
view := string.{ ptr = @got[0], len = 64 };
|
||||
if view == want {
|
||||
print("{}: ok\n", label);
|
||||
} else {
|
||||
print("{}: FAIL got {} want {}\n", label, view, want);
|
||||
}
|
||||
}
|
||||
|
||||
// Digest of `total` bytes of 'a', streamed in 1000-byte chunks so peak
|
||||
// memory stays O(chunk) for the 1,000,000 case. `total == 0` yields the
|
||||
// empty-input digest (the loop body never runs).
|
||||
hash_a :: (total: s64) -> [64]u8 {
|
||||
chunk : [1000]u8 = ---;
|
||||
k := 0;
|
||||
while k < 1000 { chunk[k] = 97; k += 1; } // 97 = 'a'
|
||||
|
||||
h := init();
|
||||
remaining := total;
|
||||
while remaining > 0 {
|
||||
take := if remaining < 1000 then remaining else 1000;
|
||||
h.update(string.{ ptr = @chunk[0], len = take });
|
||||
remaining -= take;
|
||||
}
|
||||
h.final()
|
||||
}
|
||||
|
||||
main :: () {
|
||||
check("empty", sha256_hex(""),
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
|
||||
check("abc", sha256_hex("abc"),
|
||||
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad");
|
||||
|
||||
// NIST CAVP sample vectors (56-byte and 112-byte multi-block).
|
||||
check("nist-56", sha256_hex("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"),
|
||||
"248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1");
|
||||
check("nist-112", sha256_hex("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"),
|
||||
"cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1");
|
||||
|
||||
// Padding/length boundaries around the 56- and 64-byte edges, using
|
||||
// 'a' repeats so the boundary is exercised independently of content.
|
||||
check("len-0", hash_a(0),
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
|
||||
check("len-55", hash_a(55),
|
||||
"9f4390f8d30c2dd92ec9f095b65e2b9ae9b0a925a5258e241c9f1e910f734318");
|
||||
check("len-56", hash_a(56),
|
||||
"b35439a4ac6f0948b6d6f9e3c6af0f5f590ce20f1bde7090ef7970686ec6738a");
|
||||
check("len-57", hash_a(57),
|
||||
"f13b2d724659eb3bf47f2dd6af1accc87b81f09f59f2b75e5c0bed6589dfe8c6");
|
||||
check("len-63", hash_a(63),
|
||||
"7d3e74a05d7db15bce4ad9ec0658ea98e3f06eeecf16b4c6fff2da457ddc2f34");
|
||||
check("len-64", hash_a(64),
|
||||
"ffe054fe7ae0cb6dc65c3af9b61d5209f439851db43d0ba5997337df154668eb");
|
||||
check("len-65", hash_a(65),
|
||||
"635361c48bb9eab14198e76ea8ab7f1a41685d6ad62aa9146d301d4f17eb0ae0");
|
||||
check("len-119", hash_a(119),
|
||||
"31eba51c313a5c08226adf18d4a359cfdfd8d2e816b13f4af952f7ea6584dcfb");
|
||||
check("len-120", hash_a(120),
|
||||
"2f3d335432c70b580af0e8e1b3674a7c020d683aa5f73aaaedfdc55af904c21c");
|
||||
|
||||
// Large repeats (FIPS 180-4 / classic vectors).
|
||||
check("a-1000", hash_a(1000),
|
||||
"41edece42d63e8d9bf515a9ba6932e1c20cbc9f5a5d134645adb5db1b9737ea3");
|
||||
check("a-1000000", hash_a(1000000),
|
||||
"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0");
|
||||
}
|
||||
76
examples/0712-modules-sha256-streaming.sx
Normal file
76
examples/0712-modules-sha256-streaming.sx
Normal file
@@ -0,0 +1,76 @@
|
||||
// SHA-256 streaming-equivalence + file hashing for `modules/std/hash.sx`.
|
||||
//
|
||||
// The chunk boundary must not affect the result: feeding the same bytes
|
||||
// one-shot, one byte at a time, split mid-block, and split exactly on a
|
||||
// 64-byte block boundary all yield the same digest, anchored to a pinned
|
||||
// value. Then `sha256_file` of a written temp file must equal the
|
||||
// in-memory digest of the same bytes — the streaming file path agrees
|
||||
// with the buffered path.
|
||||
//
|
||||
// All comparisons go through `string` views over the zero-heap `[64]u8`
|
||||
// digests; the byte/split updates view directly into the input buffer
|
||||
// (no `substr`, no copies).
|
||||
|
||||
#import "modules/std.sx";
|
||||
#import "modules/std/hash.sx";
|
||||
#import "modules/fs.sx";
|
||||
|
||||
// 112-byte NIST multi-block vector — long enough that a 64-byte split is
|
||||
// a genuine block boundary and a 30-byte split lands mid-block.
|
||||
MSG :: "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
|
||||
PIN :: "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1";
|
||||
|
||||
check :: (label: string, got: [64]u8, want: string) {
|
||||
view := string.{ ptr = @got[0], len = 64 };
|
||||
if view == want {
|
||||
print("{}: ok\n", label);
|
||||
} else {
|
||||
print("{}: FAIL got {} want {}\n", label, view, want);
|
||||
}
|
||||
}
|
||||
|
||||
report :: (label: string, ok: bool) {
|
||||
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
|
||||
}
|
||||
|
||||
// Absorb `data` one byte at a time (views of length 1 into the buffer).
|
||||
stream_by_byte :: (data: string) -> [64]u8 {
|
||||
h := init();
|
||||
i := 0;
|
||||
while i < data.len {
|
||||
h.update(string.{ ptr = @data[i], len = 1 });
|
||||
i += 1;
|
||||
}
|
||||
h.final()
|
||||
}
|
||||
|
||||
// Absorb `data` as two updates split at `at` (views into the buffer).
|
||||
stream_split :: (data: string, at: s64) -> [64]u8 {
|
||||
h := init();
|
||||
h.update(string.{ ptr = @data[0], len = at });
|
||||
h.update(string.{ ptr = @data[at], len = data.len - at });
|
||||
h.final()
|
||||
}
|
||||
|
||||
main :: () {
|
||||
check("oneshot-pinned", sha256_hex(MSG), PIN);
|
||||
check("byte-at-a-time", stream_by_byte(MSG), PIN);
|
||||
check("split-mid-block", stream_split(MSG, 30), PIN); // 30: mid first block
|
||||
check("split-on-boundary", stream_split(MSG, 64), PIN); // 64: exact block edge
|
||||
|
||||
// sha256_file (streaming) must equal the in-memory digest.
|
||||
path := "/tmp/sx_0712_stream.bin";
|
||||
if !write_file(path, MSG) { print("file-write: FAIL\n"); return; }
|
||||
|
||||
maybe := sha256_file(path);
|
||||
if maybe == null { print("file-eq-memory: FAIL (open)\n"); return; }
|
||||
file_digest := maybe!;
|
||||
mem_digest := sha256_hex(MSG);
|
||||
|
||||
fv := string.{ ptr = @file_digest[0], len = 64 };
|
||||
mv := string.{ ptr = @mem_digest[0], len = 64 };
|
||||
report("file-eq-memory", fv == mv);
|
||||
check("file-pinned", file_digest, PIN);
|
||||
|
||||
delete_file(path);
|
||||
}
|
||||
1
examples/expected/0711-modules-sha256-vectors.exit
Normal file
1
examples/expected/0711-modules-sha256-vectors.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
1
examples/expected/0711-modules-sha256-vectors.stderr
Normal file
1
examples/expected/0711-modules-sha256-vectors.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
15
examples/expected/0711-modules-sha256-vectors.stdout
Normal file
15
examples/expected/0711-modules-sha256-vectors.stdout
Normal file
@@ -0,0 +1,15 @@
|
||||
empty: ok
|
||||
abc: ok
|
||||
nist-56: ok
|
||||
nist-112: ok
|
||||
len-0: ok
|
||||
len-55: ok
|
||||
len-56: ok
|
||||
len-57: ok
|
||||
len-63: ok
|
||||
len-64: ok
|
||||
len-65: ok
|
||||
len-119: ok
|
||||
len-120: ok
|
||||
a-1000: ok
|
||||
a-1000000: ok
|
||||
1
examples/expected/0712-modules-sha256-streaming.exit
Normal file
1
examples/expected/0712-modules-sha256-streaming.exit
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
1
examples/expected/0712-modules-sha256-streaming.stderr
Normal file
1
examples/expected/0712-modules-sha256-streaming.stderr
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
6
examples/expected/0712-modules-sha256-streaming.stdout
Normal file
6
examples/expected/0712-modules-sha256-streaming.stdout
Normal file
@@ -0,0 +1,6 @@
|
||||
oneshot-pinned: ok
|
||||
byte-at-a-time: ok
|
||||
split-mid-block: ok
|
||||
split-on-boundary: ok
|
||||
file-eq-memory: ok
|
||||
file-pinned: ok
|
||||
@@ -7,17 +7,27 @@
|
||||
// `& MASK32`, so the result is identical regardless of the host's
|
||||
// native integer width or overflow behaviour.
|
||||
//
|
||||
// Zero-heap: the digest path never touches `context.allocator`. The
|
||||
// hash is a fixed `[64]u8` of lowercase hex returned by value on the
|
||||
// stack, and file hashing streams the input in fixed-size chunks, so
|
||||
// peak memory is O(chunk) regardless of file size.
|
||||
//
|
||||
// Streaming API (the by-value `init` / `*self` pattern):
|
||||
//
|
||||
// h := hash.init(); // Sha256, stack-local
|
||||
// h.update("hello, "); // absorb across calls
|
||||
// h.update("world");
|
||||
// digest := h.final(); // 64-char lowercase hex
|
||||
// digest := h.final(); // [64]u8, 64-char lowercase hex, by value
|
||||
//
|
||||
// One-shot convenience:
|
||||
//
|
||||
// digest := hash.sha256_hex("abc");
|
||||
// digest := hash.sha256_file("path\0"); // ?string, null on I/O error
|
||||
// digest := hash.sha256_hex("abc"); // [64]u8 by value
|
||||
// digest := hash.sha256_file("path\0"); // ?[64]u8, null on I/O error
|
||||
//
|
||||
// To print or compare a digest, build a `string` VIEW over it (no copy):
|
||||
//
|
||||
// d := hash.sha256_hex("abc");
|
||||
// view := string.{ ptr = @d[0], len = 64 };
|
||||
// =====================================================================
|
||||
|
||||
#import "modules/std.sx";
|
||||
@@ -127,8 +137,9 @@ Sha256 :: struct {
|
||||
}
|
||||
|
||||
// Finish: apply FIPS padding and emit the 32-byte digest as 64
|
||||
// lowercase hex characters. The state is consumed by this call.
|
||||
final :: (self: *Sha256) -> string {
|
||||
// lowercase hex characters in a stack `[64]u8`, returned by value.
|
||||
// The state is consumed by this call. No heap allocation.
|
||||
final :: (self: *Sha256) -> [64]u8 {
|
||||
bit_len := self.total_len * 8;
|
||||
|
||||
// 0x80 terminator, then zero-pad until 56 bytes mod 64.
|
||||
@@ -156,7 +167,7 @@ Sha256 :: struct {
|
||||
self.process_block();
|
||||
self.buf_len = 0;
|
||||
|
||||
digest := cstring(64);
|
||||
digest : [64]u8 = ---;
|
||||
i := 0;
|
||||
while i < 8 {
|
||||
word := self.h[i] & MASK32;
|
||||
@@ -196,16 +207,32 @@ init :: () -> Sha256 {
|
||||
s
|
||||
}
|
||||
|
||||
// One-shot: digest of a single buffer as 64-char lowercase hex.
|
||||
sha256_hex :: (data: string) -> string {
|
||||
// One-shot: digest of a single buffer as 64-char lowercase hex,
|
||||
// returned by value. No heap allocation.
|
||||
sha256_hex :: (data: string) -> [64]u8 {
|
||||
h := init();
|
||||
h.update(data);
|
||||
h.final()
|
||||
}
|
||||
|
||||
// Digest of a file's contents. Returns null if the file can't be read.
|
||||
sha256_file :: (path: [:0]u8) -> ?string {
|
||||
content := read_file(path);
|
||||
if content == null { return null; }
|
||||
sha256_hex(content!)
|
||||
// Digest of a file's contents, returned by value. Streams the file in
|
||||
// fixed 64KB chunks, so peak memory is O(chunk) even for multi-hundred-
|
||||
// MB artifacts. Returns null if the file can't be opened. No heap
|
||||
// allocation: the chunk buffer is a stack array.
|
||||
sha256_file :: (path: [:0]u8) -> ?[64]u8 {
|
||||
handle := open_file(path, .read);
|
||||
if handle == null { return null; }
|
||||
file := handle!;
|
||||
defer file.close();
|
||||
|
||||
h := init();
|
||||
chunk : [65536]u8 = ---;
|
||||
reading := true;
|
||||
while reading {
|
||||
n := file.read(string.{ ptr = @chunk[0], len = 65536 });
|
||||
if n < 0 { return null; }
|
||||
if n == 0 { reading = false; }
|
||||
if n > 0 { h.update(string.{ ptr = @chunk[0], len = n }); }
|
||||
}
|
||||
h.final()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user