F1.2: std.hash zero-heap [64]u8 hex API + chunked file + pinned vectors
Make the SHA-256 digest path allocation-free (foundation heap-discipline):
- final() and sha256_hex() now return the 64-char lowercase hex digest as
a [64]u8 by value on the stack; the cstring(64) heap allocation is gone.
- sha256_file() streams the file in fixed 64KB stack chunks via open_file/
File.read/File.close (defer-closed on every path) instead of slurping it
with read_file; peak memory is O(chunk), not O(filesize).
Tests (compare via a zero-copy string view over the [64]u8):
- 0710 updated to the by-value API (output unchanged).
- 0711 known-answer vectors: "", "abc", NIST-56/112, padding boundaries
{0,55,56,57,63,64,65,119,120}, and 1000 / 1,000,000 'a' repeats, each
pinned to its published digest (cross-checked with shasum -a 256).
- 0712 streaming equivalence (one-shot == byte-at-a-time == split-mid-block
== split-on-boundary) plus sha256_file(temp) == in-memory digest.
src/ untouched. zig build && zig build test && tests/run_examples.sh green.
This commit is contained in:
@@ -3,15 +3,19 @@
|
|||||||
// Known-answer vectors (empty, "abc", the 112-byte NIST multi-block
|
// Known-answer vectors (empty, "abc", the 112-byte NIST multi-block
|
||||||
// vector) plus the streaming invariant: feeding the same bytes in
|
// vector) plus the streaming invariant: feeding the same bytes in
|
||||||
// several `update` chunks yields the same digest as the one-shot call.
|
// several `update` chunks yields the same digest as the one-shot call.
|
||||||
|
//
|
||||||
|
// The digest is a zero-heap `[64]u8` returned by value; tests build a
|
||||||
|
// `string` view over it (no copy) to compare against the pinned hex.
|
||||||
|
|
||||||
#import "modules/std.sx";
|
#import "modules/std.sx";
|
||||||
#import "modules/std/hash.sx";
|
#import "modules/std/hash.sx";
|
||||||
|
|
||||||
check :: (label: string, got: string, want: string) {
|
check :: (label: string, got: [64]u8, want: string) {
|
||||||
if got == want {
|
view := string.{ ptr = @got[0], len = 64 };
|
||||||
|
if view == want {
|
||||||
print("{}: ok\n", label);
|
print("{}: ok\n", label);
|
||||||
} else {
|
} else {
|
||||||
print("{}: FAIL got {} want {}\n", label, got, want);
|
print("{}: FAIL got {} want {}\n", label, view, want);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -31,7 +35,6 @@ main :: () {
|
|||||||
h.update("abcdefghbcdefghicdefghijdefghijke"); // 33 bytes
|
h.update("abcdefghbcdefghicdefghijdefghijke"); // 33 bytes
|
||||||
h.update("fghijklfghijklmghijklmnhijklmno"); // crosses block edge
|
h.update("fghijklfghijklmghijklmnhijklmno"); // crosses block edge
|
||||||
h.update("ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu");
|
h.update("ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu");
|
||||||
streamed := h.final();
|
check("stream-eq-oneshot", h.final(),
|
||||||
check("stream-eq-oneshot",
|
"cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1");
|
||||||
if streamed == sha256_hex(multi) then "yes" else "no", "yes");
|
|
||||||
}
|
}
|
||||||
|
|||||||
82
examples/0711-modules-sha256-vectors.sx
Normal file
82
examples/0711-modules-sha256-vectors.sx
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
// SHA-256 known-answer vectors for `modules/std/hash.sx`.
|
||||||
|
//
|
||||||
|
// Pins published digests for: the empty input, "abc", the two NIST
|
||||||
|
// multi-block sample vectors, the padding/length boundaries around the
|
||||||
|
// 56-byte (one-block-with-length) and 64-byte (block) edges, and the
|
||||||
|
// classic large repeats (1000 and 1,000,000 'a'). Each expected hex is
|
||||||
|
// hard-coded from FIPS 180-4 / NIST CAVP and cross-checked with
|
||||||
|
// `shasum -a 256`.
|
||||||
|
//
|
||||||
|
// The digest is a zero-heap `[64]u8`; we compare it via a `string` view
|
||||||
|
// (no copy). Repeat vectors are built by streaming an 'a'-filled stack
|
||||||
|
// buffer, so even the 1,000,000 case allocates nothing on the heap.
|
||||||
|
|
||||||
|
#import "modules/std.sx";
|
||||||
|
#import "modules/std/hash.sx";
|
||||||
|
|
||||||
|
check :: (label: string, got: [64]u8, want: string) {
|
||||||
|
view := string.{ ptr = @got[0], len = 64 };
|
||||||
|
if view == want {
|
||||||
|
print("{}: ok\n", label);
|
||||||
|
} else {
|
||||||
|
print("{}: FAIL got {} want {}\n", label, view, want);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Digest of `total` bytes of 'a', streamed in 1000-byte chunks so peak
|
||||||
|
// memory stays O(chunk) for the 1,000,000 case. `total == 0` yields the
|
||||||
|
// empty-input digest (the loop body never runs).
|
||||||
|
hash_a :: (total: s64) -> [64]u8 {
|
||||||
|
chunk : [1000]u8 = ---;
|
||||||
|
k := 0;
|
||||||
|
while k < 1000 { chunk[k] = 97; k += 1; } // 97 = 'a'
|
||||||
|
|
||||||
|
h := init();
|
||||||
|
remaining := total;
|
||||||
|
while remaining > 0 {
|
||||||
|
take := if remaining < 1000 then remaining else 1000;
|
||||||
|
h.update(string.{ ptr = @chunk[0], len = take });
|
||||||
|
remaining -= take;
|
||||||
|
}
|
||||||
|
h.final()
|
||||||
|
}
|
||||||
|
|
||||||
|
main :: () {
|
||||||
|
check("empty", sha256_hex(""),
|
||||||
|
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
|
||||||
|
check("abc", sha256_hex("abc"),
|
||||||
|
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad");
|
||||||
|
|
||||||
|
// NIST CAVP sample vectors (56-byte and 112-byte multi-block).
|
||||||
|
check("nist-56", sha256_hex("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"),
|
||||||
|
"248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1");
|
||||||
|
check("nist-112", sha256_hex("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"),
|
||||||
|
"cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1");
|
||||||
|
|
||||||
|
// Padding/length boundaries around the 56- and 64-byte edges, using
|
||||||
|
// 'a' repeats so the boundary is exercised independently of content.
|
||||||
|
check("len-0", hash_a(0),
|
||||||
|
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
|
||||||
|
check("len-55", hash_a(55),
|
||||||
|
"9f4390f8d30c2dd92ec9f095b65e2b9ae9b0a925a5258e241c9f1e910f734318");
|
||||||
|
check("len-56", hash_a(56),
|
||||||
|
"b35439a4ac6f0948b6d6f9e3c6af0f5f590ce20f1bde7090ef7970686ec6738a");
|
||||||
|
check("len-57", hash_a(57),
|
||||||
|
"f13b2d724659eb3bf47f2dd6af1accc87b81f09f59f2b75e5c0bed6589dfe8c6");
|
||||||
|
check("len-63", hash_a(63),
|
||||||
|
"7d3e74a05d7db15bce4ad9ec0658ea98e3f06eeecf16b4c6fff2da457ddc2f34");
|
||||||
|
check("len-64", hash_a(64),
|
||||||
|
"ffe054fe7ae0cb6dc65c3af9b61d5209f439851db43d0ba5997337df154668eb");
|
||||||
|
check("len-65", hash_a(65),
|
||||||
|
"635361c48bb9eab14198e76ea8ab7f1a41685d6ad62aa9146d301d4f17eb0ae0");
|
||||||
|
check("len-119", hash_a(119),
|
||||||
|
"31eba51c313a5c08226adf18d4a359cfdfd8d2e816b13f4af952f7ea6584dcfb");
|
||||||
|
check("len-120", hash_a(120),
|
||||||
|
"2f3d335432c70b580af0e8e1b3674a7c020d683aa5f73aaaedfdc55af904c21c");
|
||||||
|
|
||||||
|
// Large repeats (FIPS 180-4 / classic vectors).
|
||||||
|
check("a-1000", hash_a(1000),
|
||||||
|
"41edece42d63e8d9bf515a9ba6932e1c20cbc9f5a5d134645adb5db1b9737ea3");
|
||||||
|
check("a-1000000", hash_a(1000000),
|
||||||
|
"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0");
|
||||||
|
}
|
||||||
76
examples/0712-modules-sha256-streaming.sx
Normal file
76
examples/0712-modules-sha256-streaming.sx
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
// SHA-256 streaming-equivalence + file hashing for `modules/std/hash.sx`.
|
||||||
|
//
|
||||||
|
// The chunk boundary must not affect the result: feeding the same bytes
|
||||||
|
// one-shot, one byte at a time, split mid-block, and split exactly on a
|
||||||
|
// 64-byte block boundary all yield the same digest, anchored to a pinned
|
||||||
|
// value. Then `sha256_file` of a written temp file must equal the
|
||||||
|
// in-memory digest of the same bytes — the streaming file path agrees
|
||||||
|
// with the buffered path.
|
||||||
|
//
|
||||||
|
// All comparisons go through `string` views over the zero-heap `[64]u8`
|
||||||
|
// digests; the byte/split updates view directly into the input buffer
|
||||||
|
// (no `substr`, no copies).
|
||||||
|
|
||||||
|
#import "modules/std.sx";
|
||||||
|
#import "modules/std/hash.sx";
|
||||||
|
#import "modules/fs.sx";
|
||||||
|
|
||||||
|
// 112-byte NIST multi-block vector — long enough that a 64-byte split is
|
||||||
|
// a genuine block boundary and a 30-byte split lands mid-block.
|
||||||
|
MSG :: "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
|
||||||
|
PIN :: "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1";
|
||||||
|
|
||||||
|
check :: (label: string, got: [64]u8, want: string) {
|
||||||
|
view := string.{ ptr = @got[0], len = 64 };
|
||||||
|
if view == want {
|
||||||
|
print("{}: ok\n", label);
|
||||||
|
} else {
|
||||||
|
print("{}: FAIL got {} want {}\n", label, view, want);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
report :: (label: string, ok: bool) {
|
||||||
|
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Absorb `data` one byte at a time (views of length 1 into the buffer).
|
||||||
|
stream_by_byte :: (data: string) -> [64]u8 {
|
||||||
|
h := init();
|
||||||
|
i := 0;
|
||||||
|
while i < data.len {
|
||||||
|
h.update(string.{ ptr = @data[i], len = 1 });
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
h.final()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Absorb `data` as two updates split at `at` (views into the buffer).
|
||||||
|
stream_split :: (data: string, at: s64) -> [64]u8 {
|
||||||
|
h := init();
|
||||||
|
h.update(string.{ ptr = @data[0], len = at });
|
||||||
|
h.update(string.{ ptr = @data[at], len = data.len - at });
|
||||||
|
h.final()
|
||||||
|
}
|
||||||
|
|
||||||
|
main :: () {
|
||||||
|
check("oneshot-pinned", sha256_hex(MSG), PIN);
|
||||||
|
check("byte-at-a-time", stream_by_byte(MSG), PIN);
|
||||||
|
check("split-mid-block", stream_split(MSG, 30), PIN); // 30: mid first block
|
||||||
|
check("split-on-boundary", stream_split(MSG, 64), PIN); // 64: exact block edge
|
||||||
|
|
||||||
|
// sha256_file (streaming) must equal the in-memory digest.
|
||||||
|
path := "/tmp/sx_0712_stream.bin";
|
||||||
|
if !write_file(path, MSG) { print("file-write: FAIL\n"); return; }
|
||||||
|
|
||||||
|
maybe := sha256_file(path);
|
||||||
|
if maybe == null { print("file-eq-memory: FAIL (open)\n"); return; }
|
||||||
|
file_digest := maybe!;
|
||||||
|
mem_digest := sha256_hex(MSG);
|
||||||
|
|
||||||
|
fv := string.{ ptr = @file_digest[0], len = 64 };
|
||||||
|
mv := string.{ ptr = @mem_digest[0], len = 64 };
|
||||||
|
report("file-eq-memory", fv == mv);
|
||||||
|
check("file-pinned", file_digest, PIN);
|
||||||
|
|
||||||
|
delete_file(path);
|
||||||
|
}
|
||||||
1
examples/expected/0711-modules-sha256-vectors.exit
Normal file
1
examples/expected/0711-modules-sha256-vectors.exit
Normal file
@@ -0,0 +1 @@
|
|||||||
|
0
|
||||||
1
examples/expected/0711-modules-sha256-vectors.stderr
Normal file
1
examples/expected/0711-modules-sha256-vectors.stderr
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
15
examples/expected/0711-modules-sha256-vectors.stdout
Normal file
15
examples/expected/0711-modules-sha256-vectors.stdout
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
empty: ok
|
||||||
|
abc: ok
|
||||||
|
nist-56: ok
|
||||||
|
nist-112: ok
|
||||||
|
len-0: ok
|
||||||
|
len-55: ok
|
||||||
|
len-56: ok
|
||||||
|
len-57: ok
|
||||||
|
len-63: ok
|
||||||
|
len-64: ok
|
||||||
|
len-65: ok
|
||||||
|
len-119: ok
|
||||||
|
len-120: ok
|
||||||
|
a-1000: ok
|
||||||
|
a-1000000: ok
|
||||||
1
examples/expected/0712-modules-sha256-streaming.exit
Normal file
1
examples/expected/0712-modules-sha256-streaming.exit
Normal file
@@ -0,0 +1 @@
|
|||||||
|
0
|
||||||
1
examples/expected/0712-modules-sha256-streaming.stderr
Normal file
1
examples/expected/0712-modules-sha256-streaming.stderr
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
6
examples/expected/0712-modules-sha256-streaming.stdout
Normal file
6
examples/expected/0712-modules-sha256-streaming.stdout
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
oneshot-pinned: ok
|
||||||
|
byte-at-a-time: ok
|
||||||
|
split-mid-block: ok
|
||||||
|
split-on-boundary: ok
|
||||||
|
file-eq-memory: ok
|
||||||
|
file-pinned: ok
|
||||||
@@ -7,17 +7,27 @@
|
|||||||
// `& MASK32`, so the result is identical regardless of the host's
|
// `& MASK32`, so the result is identical regardless of the host's
|
||||||
// native integer width or overflow behaviour.
|
// native integer width or overflow behaviour.
|
||||||
//
|
//
|
||||||
|
// Zero-heap: the digest path never touches `context.allocator`. The
|
||||||
|
// hash is a fixed `[64]u8` of lowercase hex returned by value on the
|
||||||
|
// stack, and file hashing streams the input in fixed-size chunks, so
|
||||||
|
// peak memory is O(chunk) regardless of file size.
|
||||||
|
//
|
||||||
// Streaming API (the by-value `init` / `*self` pattern):
|
// Streaming API (the by-value `init` / `*self` pattern):
|
||||||
//
|
//
|
||||||
// h := hash.init(); // Sha256, stack-local
|
// h := hash.init(); // Sha256, stack-local
|
||||||
// h.update("hello, "); // absorb across calls
|
// h.update("hello, "); // absorb across calls
|
||||||
// h.update("world");
|
// h.update("world");
|
||||||
// digest := h.final(); // 64-char lowercase hex
|
// digest := h.final(); // [64]u8, 64-char lowercase hex, by value
|
||||||
//
|
//
|
||||||
// One-shot convenience:
|
// One-shot convenience:
|
||||||
//
|
//
|
||||||
// digest := hash.sha256_hex("abc");
|
// digest := hash.sha256_hex("abc"); // [64]u8 by value
|
||||||
// digest := hash.sha256_file("path\0"); // ?string, null on I/O error
|
// digest := hash.sha256_file("path\0"); // ?[64]u8, null on I/O error
|
||||||
|
//
|
||||||
|
// To print or compare a digest, build a `string` VIEW over it (no copy):
|
||||||
|
//
|
||||||
|
// d := hash.sha256_hex("abc");
|
||||||
|
// view := string.{ ptr = @d[0], len = 64 };
|
||||||
// =====================================================================
|
// =====================================================================
|
||||||
|
|
||||||
#import "modules/std.sx";
|
#import "modules/std.sx";
|
||||||
@@ -127,8 +137,9 @@ Sha256 :: struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Finish: apply FIPS padding and emit the 32-byte digest as 64
|
// Finish: apply FIPS padding and emit the 32-byte digest as 64
|
||||||
// lowercase hex characters. The state is consumed by this call.
|
// lowercase hex characters in a stack `[64]u8`, returned by value.
|
||||||
final :: (self: *Sha256) -> string {
|
// The state is consumed by this call. No heap allocation.
|
||||||
|
final :: (self: *Sha256) -> [64]u8 {
|
||||||
bit_len := self.total_len * 8;
|
bit_len := self.total_len * 8;
|
||||||
|
|
||||||
// 0x80 terminator, then zero-pad until 56 bytes mod 64.
|
// 0x80 terminator, then zero-pad until 56 bytes mod 64.
|
||||||
@@ -156,7 +167,7 @@ Sha256 :: struct {
|
|||||||
self.process_block();
|
self.process_block();
|
||||||
self.buf_len = 0;
|
self.buf_len = 0;
|
||||||
|
|
||||||
digest := cstring(64);
|
digest : [64]u8 = ---;
|
||||||
i := 0;
|
i := 0;
|
||||||
while i < 8 {
|
while i < 8 {
|
||||||
word := self.h[i] & MASK32;
|
word := self.h[i] & MASK32;
|
||||||
@@ -196,16 +207,32 @@ init :: () -> Sha256 {
|
|||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
// One-shot: digest of a single buffer as 64-char lowercase hex.
|
// One-shot: digest of a single buffer as 64-char lowercase hex,
|
||||||
sha256_hex :: (data: string) -> string {
|
// returned by value. No heap allocation.
|
||||||
|
sha256_hex :: (data: string) -> [64]u8 {
|
||||||
h := init();
|
h := init();
|
||||||
h.update(data);
|
h.update(data);
|
||||||
h.final()
|
h.final()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Digest of a file's contents. Returns null if the file can't be read.
|
// Digest of a file's contents, returned by value. Streams the file in
|
||||||
sha256_file :: (path: [:0]u8) -> ?string {
|
// fixed 64KB chunks, so peak memory is O(chunk) even for multi-hundred-
|
||||||
content := read_file(path);
|
// MB artifacts. Returns null if the file can't be opened. No heap
|
||||||
if content == null { return null; }
|
// allocation: the chunk buffer is a stack array.
|
||||||
sha256_hex(content!)
|
sha256_file :: (path: [:0]u8) -> ?[64]u8 {
|
||||||
|
handle := open_file(path, .read);
|
||||||
|
if handle == null { return null; }
|
||||||
|
file := handle!;
|
||||||
|
defer file.close();
|
||||||
|
|
||||||
|
h := init();
|
||||||
|
chunk : [65536]u8 = ---;
|
||||||
|
reading := true;
|
||||||
|
while reading {
|
||||||
|
n := file.read(string.{ ptr = @chunk[0], len = 65536 });
|
||||||
|
if n < 0 { return null; }
|
||||||
|
if n == 0 { reading = false; }
|
||||||
|
if n > 0 { h.update(string.{ ptr = @chunk[0], len = n }); }
|
||||||
|
}
|
||||||
|
h.final()
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user