Files
sx/library/modules/std/hash.sx
agra f9bc593bb8 F1.2: std.hash zero-heap [64]u8 hex API + chunked file + pinned vectors
Make the SHA-256 digest path allocation-free (foundation heap-discipline):

- final() and sha256_hex() now return the 64-char lowercase hex digest as
  a [64]u8 by value on the stack; the cstring(64) heap allocation is gone.
- sha256_file() streams the file in fixed 64KB stack chunks via open_file/
  File.read/File.close (defer-closed on every path) instead of slurping it
  with read_file; peak memory is O(chunk), not O(filesize).

Tests (compare via a zero-copy string view over the [64]u8):
- 0710 updated to the by-value API (output unchanged).
- 0711 known-answer vectors: "", "abc", NIST-56/112, padding boundaries
  {0,55,56,57,63,64,65,119,120}, and 1000 / 1,000,000 'a' repeats, each
  pinned to its published digest (cross-checked with shasum -a 256).
- 0712 streaming equivalence (one-shot == byte-at-a-time == split-mid-block
  == split-on-boundary) plus sha256_file(temp) == in-memory digest.

src/ untouched. zig build && zig build test && tests/run_examples.sh green.
2026-06-04 00:08:46 +03:00

239 lines
8.2 KiB
Plaintext

// =====================================================================
// hash.sx — streaming SHA-256 (FIPS 180-4), pure sx.
//
// Content addressing is security-critical, so the digest is computed
// in-process: no shelling out, no platform crypto library. All 32-bit
// word arithmetic is done in s64 and masked back to 32 bits with
// `& MASK32`, so the result is identical regardless of the host's
// native integer width or overflow behaviour.
//
// Zero-heap: the digest path never touches `context.allocator`. The
// hash is a fixed `[64]u8` of lowercase hex returned by value on the
// stack, and file hashing streams the input in fixed-size chunks, so
// peak memory is O(chunk) regardless of file size.
//
// Streaming API (the by-value `init` / `*self` pattern):
//
// h := hash.init(); // Sha256, stack-local
// h.update("hello, "); // absorb across calls
// h.update("world");
// digest := h.final(); // [64]u8, 64-char lowercase hex, by value
//
// One-shot convenience:
//
// digest := hash.sha256_hex("abc"); // [64]u8 by value
// digest := hash.sha256_file("path\0"); // ?[64]u8, null on I/O error
//
// To print or compare a digest, build a `string` VIEW over it (no copy):
//
// d := hash.sha256_hex("abc");
// view := string.{ ptr = @d[0], len = 64 };
// =====================================================================
#import "modules/std.sx";
#import "modules/fs.sx";
// Low 32 bits. SHA-256 is defined over 32-bit words; every add/rotate
// result is masked back through this so the carry never escapes bit 31.
MASK32 :: 0xFFFFFFFF;
// Round constants K[0..63] — the first 32 bits of the fractional parts
// of the cube roots of the first 64 primes (FIPS 180-4 §4.2.2).
K : [64]s64 = .[
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
];
// 32-bit right rotate. `word` must already be masked to 32 bits.
rotr :: (word: s64, n: s64) -> s64 {
((word >> n) | (word << (32 - n))) & MASK32
}
big_sigma0 :: (x: s64) -> s64 { rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22) }
big_sigma1 :: (x: s64) -> s64 { rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25) }
small_sigma0 :: (x: s64) -> s64 { rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3) }
small_sigma1 :: (x: s64) -> s64 { rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10) }
Sha256 :: struct {
h: [8]s64; // running hash state (each entry masked to 32 bits)
buf: [64]u8; // partial-block buffer
buf_len: s64; // bytes currently in `buf` (0..63)
total_len: s64; // total bytes absorbed so far
// Crunch the 64-byte block currently in `buf` into the state.
process_block :: (self: *Sha256) {
w : [64]s64 = ---;
t := 0;
while t < 16 {
base := t * 4;
w[t] = ((cast(s64) self.buf[base]) << 24)
| ((cast(s64) self.buf[base + 1]) << 16)
| ((cast(s64) self.buf[base + 2]) << 8)
| (cast(s64) self.buf[base + 3]);
t += 1;
}
t = 16;
while t < 64 {
w[t] = (small_sigma1(w[t - 2]) + w[t - 7] + small_sigma0(w[t - 15]) + w[t - 16]) & MASK32;
t += 1;
}
a := self.h[0];
b := self.h[1];
c := self.h[2];
d := self.h[3];
e := self.h[4];
f := self.h[5];
g := self.h[6];
hh := self.h[7];
t = 0;
while t < 64 {
ch := (e & f) ^ (~e & g);
temp1 := (hh + big_sigma1(e) + ch + K[t] + w[t]) & MASK32;
maj := (a & b) ^ (a & c) ^ (b & c);
temp2 := (big_sigma0(a) + maj) & MASK32;
hh = g;
g = f;
f = e;
e = (d + temp1) & MASK32;
d = c;
c = b;
b = a;
a = (temp1 + temp2) & MASK32;
t += 1;
}
self.h[0] = (self.h[0] + a) & MASK32;
self.h[1] = (self.h[1] + b) & MASK32;
self.h[2] = (self.h[2] + c) & MASK32;
self.h[3] = (self.h[3] + d) & MASK32;
self.h[4] = (self.h[4] + e) & MASK32;
self.h[5] = (self.h[5] + f) & MASK32;
self.h[6] = (self.h[6] + g) & MASK32;
self.h[7] = (self.h[7] + hh) & MASK32;
}
// Absorb `data`. Safe to call repeatedly; partial blocks are buffered
// and crunched as soon as 64 bytes accumulate.
update :: (self: *Sha256, data: string) {
i := 0;
while i < data.len {
self.buf[self.buf_len] = data[i];
self.buf_len += 1;
self.total_len += 1;
if self.buf_len == 64 {
self.process_block();
self.buf_len = 0;
}
i += 1;
}
}
// Finish: apply FIPS padding and emit the 32-byte digest as 64
// lowercase hex characters in a stack `[64]u8`, returned by value.
// The state is consumed by this call. No heap allocation.
final :: (self: *Sha256) -> [64]u8 {
bit_len := self.total_len * 8;
// 0x80 terminator, then zero-pad until 56 bytes mod 64.
self.buf[self.buf_len] = 0x80;
self.buf_len += 1;
if self.buf_len == 64 {
self.process_block();
self.buf_len = 0;
}
while self.buf_len != 56 {
self.buf[self.buf_len] = 0;
self.buf_len += 1;
if self.buf_len == 64 {
self.process_block();
self.buf_len = 0;
}
}
// 64-bit big-endian message length in bits.
b := 0;
while b < 8 {
self.buf[56 + b] = xx ((bit_len >> ((7 - b) * 8)) & 0xFF);
b += 1;
}
self.process_block();
self.buf_len = 0;
digest : [64]u8 = ---;
i := 0;
while i < 8 {
word := self.h[i] & MASK32;
j := 0;
while j < 4 {
byte_val := (word >> ((3 - j) * 8)) & 0xFF;
pos := (i * 4 + j) * 2;
digest[pos] = nibble_hex((byte_val >> 4) & 0xF);
digest[pos + 1] = nibble_hex(byte_val & 0xF);
j += 1;
}
i += 1;
}
digest
}
}
// Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'.
nibble_hex :: (n: s64) -> u8 {
if n < 10 then xx (n + 48) else xx (n - 10 + 97)
}
// Returns the SHA-256 state by value; the caller binds it to a local
// whose address backs the `*self` methods.
init :: () -> Sha256 {
s : Sha256 = ---;
s.h[0] = 0x6a09e667;
s.h[1] = 0xbb67ae85;
s.h[2] = 0x3c6ef372;
s.h[3] = 0xa54ff53a;
s.h[4] = 0x510e527f;
s.h[5] = 0x9b05688c;
s.h[6] = 0x1f83d9ab;
s.h[7] = 0x5be0cd19;
s.buf_len = 0;
s.total_len = 0;
s
}
// One-shot: digest of a single buffer as 64-char lowercase hex,
// returned by value. No heap allocation.
sha256_hex :: (data: string) -> [64]u8 {
h := init();
h.update(data);
h.final()
}
// Digest of a file's contents, returned by value. Streams the file in
// fixed 64KB chunks, so peak memory is O(chunk) even for multi-hundred-
// MB artifacts. Returns null if the file can't be opened. No heap
// allocation: the chunk buffer is a stack array.
sha256_file :: (path: [:0]u8) -> ?[64]u8 {
handle := open_file(path, .read);
if handle == null { return null; }
file := handle!;
defer file.close();
h := init();
chunk : [65536]u8 = ---;
reading := true;
while reading {
n := file.read(string.{ ptr = @chunk[0], len = 65536 });
if n < 0 { return null; }
if n == 0 { reading = false; }
if n > 0 { h.update(string.{ ptr = @chunk[0], len = n }); }
}
h.final()
}