Make the SHA-256 digest path allocation-free (foundation heap-discipline):
- final() and sha256_hex() now return the 64-char lowercase hex digest as
a [64]u8 by value on the stack; the cstring(64) heap allocation is gone.
- sha256_file() streams the file in fixed 64KB stack chunks via open_file/
File.read/File.close (defer-closed on every path) instead of slurping it
with read_file; peak memory is O(chunk), not O(filesize).
Tests (compare via a zero-copy string view over the [64]u8):
- 0710 updated to the by-value API (output unchanged).
- 0711 known-answer vectors: "", "abc", NIST-56/112, padding boundaries
{0,55,56,57,63,64,65,119,120}, and 1000 / 1,000,000 'a' repeats, each
pinned to its published digest (cross-checked with shasum -a 256).
- 0712 streaming equivalence (one-shot == byte-at-a-time == split-mid-block
== split-on-boundary) plus sha256_file(temp) == in-memory digest.
src/ untouched. zig build && zig build test && tests/run_examples.sh green.
239 lines
8.2 KiB
Plaintext
239 lines
8.2 KiB
Plaintext
// =====================================================================
|
|
// hash.sx — streaming SHA-256 (FIPS 180-4), pure sx.
|
|
//
|
|
// Content addressing is security-critical, so the digest is computed
|
|
// in-process: no shelling out, no platform crypto library. All 32-bit
|
|
// word arithmetic is done in s64 and masked back to 32 bits with
|
|
// `& MASK32`, so the result is identical regardless of the host's
|
|
// native integer width or overflow behaviour.
|
|
//
|
|
// Zero-heap: the digest path never touches `context.allocator`. The
|
|
// hash is a fixed `[64]u8` of lowercase hex returned by value on the
|
|
// stack, and file hashing streams the input in fixed-size chunks, so
|
|
// peak memory is O(chunk) regardless of file size.
|
|
//
|
|
// Streaming API (the by-value `init` / `*self` pattern):
|
|
//
|
|
// h := hash.init(); // Sha256, stack-local
|
|
// h.update("hello, "); // absorb across calls
|
|
// h.update("world");
|
|
// digest := h.final(); // [64]u8, 64-char lowercase hex, by value
|
|
//
|
|
// One-shot convenience:
|
|
//
|
|
// digest := hash.sha256_hex("abc"); // [64]u8 by value
|
|
// digest := hash.sha256_file("path\0"); // ?[64]u8, null on I/O error
|
|
//
|
|
// To print or compare a digest, build a `string` VIEW over it (no copy):
|
|
//
|
|
// d := hash.sha256_hex("abc");
|
|
// view := string.{ ptr = @d[0], len = 64 };
|
|
// =====================================================================
|
|
|
|
#import "modules/std.sx";
|
|
#import "modules/fs.sx";
|
|
|
|
// Low 32 bits. SHA-256 is defined over 32-bit words; every add/rotate
|
|
// result is masked back through this so the carry never escapes bit 31.
|
|
MASK32 :: 0xFFFFFFFF;
|
|
|
|
// Round constants K[0..63] — the first 32 bits of the fractional parts
|
|
// of the cube roots of the first 64 primes (FIPS 180-4 §4.2.2).
|
|
K : [64]s64 = .[
|
|
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
|
];
|
|
|
|
// 32-bit right rotate. `word` must already be masked to 32 bits.
|
|
rotr :: (word: s64, n: s64) -> s64 {
|
|
((word >> n) | (word << (32 - n))) & MASK32
|
|
}
|
|
|
|
big_sigma0 :: (x: s64) -> s64 { rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22) }
|
|
big_sigma1 :: (x: s64) -> s64 { rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25) }
|
|
small_sigma0 :: (x: s64) -> s64 { rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3) }
|
|
small_sigma1 :: (x: s64) -> s64 { rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10) }
|
|
|
|
Sha256 :: struct {
|
|
h: [8]s64; // running hash state (each entry masked to 32 bits)
|
|
buf: [64]u8; // partial-block buffer
|
|
buf_len: s64; // bytes currently in `buf` (0..63)
|
|
total_len: s64; // total bytes absorbed so far
|
|
|
|
// Crunch the 64-byte block currently in `buf` into the state.
|
|
process_block :: (self: *Sha256) {
|
|
w : [64]s64 = ---;
|
|
t := 0;
|
|
while t < 16 {
|
|
base := t * 4;
|
|
w[t] = ((cast(s64) self.buf[base]) << 24)
|
|
| ((cast(s64) self.buf[base + 1]) << 16)
|
|
| ((cast(s64) self.buf[base + 2]) << 8)
|
|
| (cast(s64) self.buf[base + 3]);
|
|
t += 1;
|
|
}
|
|
t = 16;
|
|
while t < 64 {
|
|
w[t] = (small_sigma1(w[t - 2]) + w[t - 7] + small_sigma0(w[t - 15]) + w[t - 16]) & MASK32;
|
|
t += 1;
|
|
}
|
|
|
|
a := self.h[0];
|
|
b := self.h[1];
|
|
c := self.h[2];
|
|
d := self.h[3];
|
|
e := self.h[4];
|
|
f := self.h[5];
|
|
g := self.h[6];
|
|
hh := self.h[7];
|
|
|
|
t = 0;
|
|
while t < 64 {
|
|
ch := (e & f) ^ (~e & g);
|
|
temp1 := (hh + big_sigma1(e) + ch + K[t] + w[t]) & MASK32;
|
|
maj := (a & b) ^ (a & c) ^ (b & c);
|
|
temp2 := (big_sigma0(a) + maj) & MASK32;
|
|
hh = g;
|
|
g = f;
|
|
f = e;
|
|
e = (d + temp1) & MASK32;
|
|
d = c;
|
|
c = b;
|
|
b = a;
|
|
a = (temp1 + temp2) & MASK32;
|
|
t += 1;
|
|
}
|
|
|
|
self.h[0] = (self.h[0] + a) & MASK32;
|
|
self.h[1] = (self.h[1] + b) & MASK32;
|
|
self.h[2] = (self.h[2] + c) & MASK32;
|
|
self.h[3] = (self.h[3] + d) & MASK32;
|
|
self.h[4] = (self.h[4] + e) & MASK32;
|
|
self.h[5] = (self.h[5] + f) & MASK32;
|
|
self.h[6] = (self.h[6] + g) & MASK32;
|
|
self.h[7] = (self.h[7] + hh) & MASK32;
|
|
}
|
|
|
|
// Absorb `data`. Safe to call repeatedly; partial blocks are buffered
|
|
// and crunched as soon as 64 bytes accumulate.
|
|
update :: (self: *Sha256, data: string) {
|
|
i := 0;
|
|
while i < data.len {
|
|
self.buf[self.buf_len] = data[i];
|
|
self.buf_len += 1;
|
|
self.total_len += 1;
|
|
if self.buf_len == 64 {
|
|
self.process_block();
|
|
self.buf_len = 0;
|
|
}
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
// Finish: apply FIPS padding and emit the 32-byte digest as 64
|
|
// lowercase hex characters in a stack `[64]u8`, returned by value.
|
|
// The state is consumed by this call. No heap allocation.
|
|
final :: (self: *Sha256) -> [64]u8 {
|
|
bit_len := self.total_len * 8;
|
|
|
|
// 0x80 terminator, then zero-pad until 56 bytes mod 64.
|
|
self.buf[self.buf_len] = 0x80;
|
|
self.buf_len += 1;
|
|
if self.buf_len == 64 {
|
|
self.process_block();
|
|
self.buf_len = 0;
|
|
}
|
|
while self.buf_len != 56 {
|
|
self.buf[self.buf_len] = 0;
|
|
self.buf_len += 1;
|
|
if self.buf_len == 64 {
|
|
self.process_block();
|
|
self.buf_len = 0;
|
|
}
|
|
}
|
|
|
|
// 64-bit big-endian message length in bits.
|
|
b := 0;
|
|
while b < 8 {
|
|
self.buf[56 + b] = xx ((bit_len >> ((7 - b) * 8)) & 0xFF);
|
|
b += 1;
|
|
}
|
|
self.process_block();
|
|
self.buf_len = 0;
|
|
|
|
digest : [64]u8 = ---;
|
|
i := 0;
|
|
while i < 8 {
|
|
word := self.h[i] & MASK32;
|
|
j := 0;
|
|
while j < 4 {
|
|
byte_val := (word >> ((3 - j) * 8)) & 0xFF;
|
|
pos := (i * 4 + j) * 2;
|
|
digest[pos] = nibble_hex((byte_val >> 4) & 0xF);
|
|
digest[pos + 1] = nibble_hex(byte_val & 0xF);
|
|
j += 1;
|
|
}
|
|
i += 1;
|
|
}
|
|
digest
|
|
}
|
|
}
|
|
|
|
// Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'.
|
|
nibble_hex :: (n: s64) -> u8 {
|
|
if n < 10 then xx (n + 48) else xx (n - 10 + 97)
|
|
}
|
|
|
|
// Returns the SHA-256 state by value; the caller binds it to a local
|
|
// whose address backs the `*self` methods.
|
|
init :: () -> Sha256 {
|
|
s : Sha256 = ---;
|
|
s.h[0] = 0x6a09e667;
|
|
s.h[1] = 0xbb67ae85;
|
|
s.h[2] = 0x3c6ef372;
|
|
s.h[3] = 0xa54ff53a;
|
|
s.h[4] = 0x510e527f;
|
|
s.h[5] = 0x9b05688c;
|
|
s.h[6] = 0x1f83d9ab;
|
|
s.h[7] = 0x5be0cd19;
|
|
s.buf_len = 0;
|
|
s.total_len = 0;
|
|
s
|
|
}
|
|
|
|
// One-shot: digest of a single buffer as 64-char lowercase hex,
|
|
// returned by value. No heap allocation.
|
|
sha256_hex :: (data: string) -> [64]u8 {
|
|
h := init();
|
|
h.update(data);
|
|
h.final()
|
|
}
|
|
|
|
// Digest of a file's contents, returned by value. Streams the file in
|
|
// fixed 64KB chunks, so peak memory is O(chunk) even for multi-hundred-
|
|
// MB artifacts. Returns null if the file can't be opened. No heap
|
|
// allocation: the chunk buffer is a stack array.
|
|
sha256_file :: (path: [:0]u8) -> ?[64]u8 {
|
|
handle := open_file(path, .read);
|
|
if handle == null { return null; }
|
|
file := handle!;
|
|
defer file.close();
|
|
|
|
h := init();
|
|
chunk : [65536]u8 = ---;
|
|
reading := true;
|
|
while reading {
|
|
n := file.read(string.{ ptr = @chunk[0], len = 65536 });
|
|
if n < 0 { return null; }
|
|
if n == 0 { reading = false; }
|
|
if n > 0 { h.update(string.{ ptr = @chunk[0], len = n }); }
|
|
}
|
|
h.final()
|
|
}
|