diff --git a/examples/0710-modules-sha256.sx b/examples/0710-modules-sha256.sx index 0ce8df8..e89602d 100644 --- a/examples/0710-modules-sha256.sx +++ b/examples/0710-modules-sha256.sx @@ -3,15 +3,19 @@ // Known-answer vectors (empty, "abc", the 112-byte NIST multi-block // vector) plus the streaming invariant: feeding the same bytes in // several `update` chunks yields the same digest as the one-shot call. +// +// The digest is a zero-heap `[64]u8` returned by value; tests build a +// `string` view over it (no copy) to compare against the pinned hex. #import "modules/std.sx"; #import "modules/std/hash.sx"; -check :: (label: string, got: string, want: string) { - if got == want { +check :: (label: string, got: [64]u8, want: string) { + view := string.{ ptr = @got[0], len = 64 }; + if view == want { print("{}: ok\n", label); } else { - print("{}: FAIL got {} want {}\n", label, got, want); + print("{}: FAIL got {} want {}\n", label, view, want); } } @@ -31,7 +35,6 @@ main :: () { h.update("abcdefghbcdefghicdefghijdefghijke"); // 33 bytes h.update("fghijklfghijklmghijklmnhijklmno"); // crosses block edge h.update("ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"); - streamed := h.final(); - check("stream-eq-oneshot", - if streamed == sha256_hex(multi) then "yes" else "no", "yes"); + check("stream-eq-oneshot", h.final(), + "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1"); } diff --git a/examples/0711-modules-sha256-vectors.sx b/examples/0711-modules-sha256-vectors.sx new file mode 100644 index 0000000..2d5138d --- /dev/null +++ b/examples/0711-modules-sha256-vectors.sx @@ -0,0 +1,82 @@ +// SHA-256 known-answer vectors for `modules/std/hash.sx`. +// +// Pins published digests for: the empty input, "abc", the two NIST +// multi-block sample vectors, the padding/length boundaries around the +// 56-byte (one-block-with-length) and 64-byte (block) edges, and the +// classic large repeats (1000 and 1,000,000 'a'). Each expected hex is +// hard-coded from FIPS 180-4 / NIST CAVP and cross-checked with +// `shasum -a 256`. +// +// The digest is a zero-heap `[64]u8`; we compare it via a `string` view +// (no copy). Repeat vectors are built by streaming an 'a'-filled stack +// buffer, so even the 1,000,000 case allocates nothing on the heap. + +#import "modules/std.sx"; +#import "modules/std/hash.sx"; + +check :: (label: string, got: [64]u8, want: string) { + view := string.{ ptr = @got[0], len = 64 }; + if view == want { + print("{}: ok\n", label); + } else { + print("{}: FAIL got {} want {}\n", label, view, want); + } +} + +// Digest of `total` bytes of 'a', streamed in 1000-byte chunks so peak +// memory stays O(chunk) for the 1,000,000 case. `total == 0` yields the +// empty-input digest (the loop body never runs). +hash_a :: (total: s64) -> [64]u8 { + chunk : [1000]u8 = ---; + k := 0; + while k < 1000 { chunk[k] = 97; k += 1; } // 97 = 'a' + + h := init(); + remaining := total; + while remaining > 0 { + take := if remaining < 1000 then remaining else 1000; + h.update(string.{ ptr = @chunk[0], len = take }); + remaining -= take; + } + h.final() +} + +main :: () { + check("empty", sha256_hex(""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + check("abc", sha256_hex("abc"), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"); + + // NIST CAVP sample vectors (56-byte and 112-byte multi-block). + check("nist-56", sha256_hex("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"), + "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1"); + check("nist-112", sha256_hex("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"), + "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1"); + + // Padding/length boundaries around the 56- and 64-byte edges, using + // 'a' repeats so the boundary is exercised independently of content. + check("len-0", hash_a(0), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + check("len-55", hash_a(55), + "9f4390f8d30c2dd92ec9f095b65e2b9ae9b0a925a5258e241c9f1e910f734318"); + check("len-56", hash_a(56), + "b35439a4ac6f0948b6d6f9e3c6af0f5f590ce20f1bde7090ef7970686ec6738a"); + check("len-57", hash_a(57), + "f13b2d724659eb3bf47f2dd6af1accc87b81f09f59f2b75e5c0bed6589dfe8c6"); + check("len-63", hash_a(63), + "7d3e74a05d7db15bce4ad9ec0658ea98e3f06eeecf16b4c6fff2da457ddc2f34"); + check("len-64", hash_a(64), + "ffe054fe7ae0cb6dc65c3af9b61d5209f439851db43d0ba5997337df154668eb"); + check("len-65", hash_a(65), + "635361c48bb9eab14198e76ea8ab7f1a41685d6ad62aa9146d301d4f17eb0ae0"); + check("len-119", hash_a(119), + "31eba51c313a5c08226adf18d4a359cfdfd8d2e816b13f4af952f7ea6584dcfb"); + check("len-120", hash_a(120), + "2f3d335432c70b580af0e8e1b3674a7c020d683aa5f73aaaedfdc55af904c21c"); + + // Large repeats (FIPS 180-4 / classic vectors). + check("a-1000", hash_a(1000), + "41edece42d63e8d9bf515a9ba6932e1c20cbc9f5a5d134645adb5db1b9737ea3"); + check("a-1000000", hash_a(1000000), + "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"); +} diff --git a/examples/0712-modules-sha256-streaming.sx b/examples/0712-modules-sha256-streaming.sx new file mode 100644 index 0000000..de01e41 --- /dev/null +++ b/examples/0712-modules-sha256-streaming.sx @@ -0,0 +1,76 @@ +// SHA-256 streaming-equivalence + file hashing for `modules/std/hash.sx`. +// +// The chunk boundary must not affect the result: feeding the same bytes +// one-shot, one byte at a time, split mid-block, and split exactly on a +// 64-byte block boundary all yield the same digest, anchored to a pinned +// value. Then `sha256_file` of a written temp file must equal the +// in-memory digest of the same bytes — the streaming file path agrees +// with the buffered path. +// +// All comparisons go through `string` views over the zero-heap `[64]u8` +// digests; the byte/split updates view directly into the input buffer +// (no `substr`, no copies). + +#import "modules/std.sx"; +#import "modules/std/hash.sx"; +#import "modules/fs.sx"; + +// 112-byte NIST multi-block vector — long enough that a 64-byte split is +// a genuine block boundary and a 30-byte split lands mid-block. +MSG :: "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"; +PIN :: "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1"; + +check :: (label: string, got: [64]u8, want: string) { + view := string.{ ptr = @got[0], len = 64 }; + if view == want { + print("{}: ok\n", label); + } else { + print("{}: FAIL got {} want {}\n", label, view, want); + } +} + +report :: (label: string, ok: bool) { + if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); } +} + +// Absorb `data` one byte at a time (views of length 1 into the buffer). +stream_by_byte :: (data: string) -> [64]u8 { + h := init(); + i := 0; + while i < data.len { + h.update(string.{ ptr = @data[i], len = 1 }); + i += 1; + } + h.final() +} + +// Absorb `data` as two updates split at `at` (views into the buffer). +stream_split :: (data: string, at: s64) -> [64]u8 { + h := init(); + h.update(string.{ ptr = @data[0], len = at }); + h.update(string.{ ptr = @data[at], len = data.len - at }); + h.final() +} + +main :: () { + check("oneshot-pinned", sha256_hex(MSG), PIN); + check("byte-at-a-time", stream_by_byte(MSG), PIN); + check("split-mid-block", stream_split(MSG, 30), PIN); // 30: mid first block + check("split-on-boundary", stream_split(MSG, 64), PIN); // 64: exact block edge + + // sha256_file (streaming) must equal the in-memory digest. + path := "/tmp/sx_0712_stream.bin"; + if !write_file(path, MSG) { print("file-write: FAIL\n"); return; } + + maybe := sha256_file(path); + if maybe == null { print("file-eq-memory: FAIL (open)\n"); return; } + file_digest := maybe!; + mem_digest := sha256_hex(MSG); + + fv := string.{ ptr = @file_digest[0], len = 64 }; + mv := string.{ ptr = @mem_digest[0], len = 64 }; + report("file-eq-memory", fv == mv); + check("file-pinned", file_digest, PIN); + + delete_file(path); +} diff --git a/examples/expected/0711-modules-sha256-vectors.exit b/examples/expected/0711-modules-sha256-vectors.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0711-modules-sha256-vectors.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0711-modules-sha256-vectors.stderr b/examples/expected/0711-modules-sha256-vectors.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0711-modules-sha256-vectors.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0711-modules-sha256-vectors.stdout b/examples/expected/0711-modules-sha256-vectors.stdout new file mode 100644 index 0000000..a4efed1 --- /dev/null +++ b/examples/expected/0711-modules-sha256-vectors.stdout @@ -0,0 +1,15 @@ +empty: ok +abc: ok +nist-56: ok +nist-112: ok +len-0: ok +len-55: ok +len-56: ok +len-57: ok +len-63: ok +len-64: ok +len-65: ok +len-119: ok +len-120: ok +a-1000: ok +a-1000000: ok diff --git a/examples/expected/0712-modules-sha256-streaming.exit b/examples/expected/0712-modules-sha256-streaming.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0712-modules-sha256-streaming.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0712-modules-sha256-streaming.stderr b/examples/expected/0712-modules-sha256-streaming.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0712-modules-sha256-streaming.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0712-modules-sha256-streaming.stdout b/examples/expected/0712-modules-sha256-streaming.stdout new file mode 100644 index 0000000..c54646f --- /dev/null +++ b/examples/expected/0712-modules-sha256-streaming.stdout @@ -0,0 +1,6 @@ +oneshot-pinned: ok +byte-at-a-time: ok +split-mid-block: ok +split-on-boundary: ok +file-eq-memory: ok +file-pinned: ok diff --git a/library/modules/std/hash.sx b/library/modules/std/hash.sx index 62385e1..99c065f 100644 --- a/library/modules/std/hash.sx +++ b/library/modules/std/hash.sx @@ -7,17 +7,27 @@ // `& MASK32`, so the result is identical regardless of the host's // native integer width or overflow behaviour. // +// Zero-heap: the digest path never touches `context.allocator`. The +// hash is a fixed `[64]u8` of lowercase hex returned by value on the +// stack, and file hashing streams the input in fixed-size chunks, so +// peak memory is O(chunk) regardless of file size. +// // Streaming API (the by-value `init` / `*self` pattern): // // h := hash.init(); // Sha256, stack-local // h.update("hello, "); // absorb across calls // h.update("world"); -// digest := h.final(); // 64-char lowercase hex +// digest := h.final(); // [64]u8, 64-char lowercase hex, by value // // One-shot convenience: // -// digest := hash.sha256_hex("abc"); -// digest := hash.sha256_file("path\0"); // ?string, null on I/O error +// digest := hash.sha256_hex("abc"); // [64]u8 by value +// digest := hash.sha256_file("path\0"); // ?[64]u8, null on I/O error +// +// To print or compare a digest, build a `string` VIEW over it (no copy): +// +// d := hash.sha256_hex("abc"); +// view := string.{ ptr = @d[0], len = 64 }; // ===================================================================== #import "modules/std.sx"; @@ -127,8 +137,9 @@ Sha256 :: struct { } // Finish: apply FIPS padding and emit the 32-byte digest as 64 - // lowercase hex characters. The state is consumed by this call. - final :: (self: *Sha256) -> string { + // lowercase hex characters in a stack `[64]u8`, returned by value. + // The state is consumed by this call. No heap allocation. + final :: (self: *Sha256) -> [64]u8 { bit_len := self.total_len * 8; // 0x80 terminator, then zero-pad until 56 bytes mod 64. @@ -156,7 +167,7 @@ Sha256 :: struct { self.process_block(); self.buf_len = 0; - digest := cstring(64); + digest : [64]u8 = ---; i := 0; while i < 8 { word := self.h[i] & MASK32; @@ -196,16 +207,32 @@ init :: () -> Sha256 { s } -// One-shot: digest of a single buffer as 64-char lowercase hex. -sha256_hex :: (data: string) -> string { +// One-shot: digest of a single buffer as 64-char lowercase hex, +// returned by value. No heap allocation. +sha256_hex :: (data: string) -> [64]u8 { h := init(); h.update(data); h.final() } -// Digest of a file's contents. Returns null if the file can't be read. -sha256_file :: (path: [:0]u8) -> ?string { - content := read_file(path); - if content == null { return null; } - sha256_hex(content!) +// Digest of a file's contents, returned by value. Streams the file in +// fixed 64KB chunks, so peak memory is O(chunk) even for multi-hundred- +// MB artifacts. Returns null if the file can't be opened. No heap +// allocation: the chunk buffer is a stack array. +sha256_file :: (path: [:0]u8) -> ?[64]u8 { + handle := open_file(path, .read); + if handle == null { return null; } + file := handle!; + defer file.close(); + + h := init(); + chunk : [65536]u8 = ---; + reading := true; + while reading { + n := file.read(string.{ ptr = @chunk[0], len = 65536 }); + if n < 0 { return null; } + if n == 0 { reading = false; } + if n > 0 { h.update(string.{ ptr = @chunk[0], len = n }); } + } + h.final() }