F1.1: std.hash — streaming SHA-256 in library/modules/std/hash.sx
Add a pure-sx streaming SHA-256 (FIPS 180-4) stdlib module, importable as `#import "modules/std/hash.sx";`. All 32-bit word arithmetic is done in s64 and masked back with `& MASK32`, so digests are deterministic and platform-independent — no shelling out, no native crypto. API: - init() -> Sha256 (by-value *self pattern) - update(*Sha256, string) (multi-block + partial-block buffering) - final(*Sha256) -> string (32-byte digest as lowercase hex) - sha256_hex(string) -> string (one-shot) - sha256_file([:0]u8) -> ?string (digest of a file via fs.read_file) Verified against FIPS/NIST known-answer vectors and `shasum -a 256`: "" , "abc", the 56- and 112-byte multi-block vectors, 1000×'a', and the 64/65-byte block boundaries; chunked update() matches the one-shot call. examples/0710-modules-sha256.sx pins the KAT vectors + the streaming invariant; gate green (zig build, zig build test, run_examples 370/0/0/0).
This commit is contained in:
211
library/modules/std/hash.sx
Normal file
211
library/modules/std/hash.sx
Normal file
@@ -0,0 +1,211 @@
|
||||
// =====================================================================
|
||||
// hash.sx — streaming SHA-256 (FIPS 180-4), pure sx.
|
||||
//
|
||||
// Content addressing is security-critical, so the digest is computed
|
||||
// in-process: no shelling out, no platform crypto library. All 32-bit
|
||||
// word arithmetic is done in s64 and masked back to 32 bits with
|
||||
// `& MASK32`, so the result is identical regardless of the host's
|
||||
// native integer width or overflow behaviour.
|
||||
//
|
||||
// Streaming API (the by-value `init` / `*self` pattern):
|
||||
//
|
||||
// h := hash.init(); // Sha256, stack-local
|
||||
// h.update("hello, "); // absorb across calls
|
||||
// h.update("world");
|
||||
// digest := h.final(); // 64-char lowercase hex
|
||||
//
|
||||
// One-shot convenience:
|
||||
//
|
||||
// digest := hash.sha256_hex("abc");
|
||||
// digest := hash.sha256_file("path\0"); // ?string, null on I/O error
|
||||
// =====================================================================
|
||||
|
||||
#import "modules/std.sx";
|
||||
#import "modules/fs.sx";
|
||||
|
||||
// Low 32 bits. SHA-256 is defined over 32-bit words; every add/rotate
|
||||
// result is masked back through this so the carry never escapes bit 31.
|
||||
MASK32 :: 0xFFFFFFFF;
|
||||
|
||||
// Round constants K[0..63] — the first 32 bits of the fractional parts
|
||||
// of the cube roots of the first 64 primes (FIPS 180-4 §4.2.2).
|
||||
K : [64]s64 = .[
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
||||
];
|
||||
|
||||
// 32-bit right rotate. `word` must already be masked to 32 bits.
|
||||
rotr :: (word: s64, n: s64) -> s64 {
|
||||
((word >> n) | (word << (32 - n))) & MASK32
|
||||
}
|
||||
|
||||
big_sigma0 :: (x: s64) -> s64 { rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22) }
|
||||
big_sigma1 :: (x: s64) -> s64 { rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25) }
|
||||
small_sigma0 :: (x: s64) -> s64 { rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3) }
|
||||
small_sigma1 :: (x: s64) -> s64 { rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10) }
|
||||
|
||||
Sha256 :: struct {
|
||||
h: [8]s64; // running hash state (each entry masked to 32 bits)
|
||||
buf: [64]u8; // partial-block buffer
|
||||
buf_len: s64; // bytes currently in `buf` (0..63)
|
||||
total_len: s64; // total bytes absorbed so far
|
||||
|
||||
// Crunch the 64-byte block currently in `buf` into the state.
|
||||
process_block :: (self: *Sha256) {
|
||||
w : [64]s64 = ---;
|
||||
t := 0;
|
||||
while t < 16 {
|
||||
base := t * 4;
|
||||
w[t] = ((cast(s64) self.buf[base]) << 24)
|
||||
| ((cast(s64) self.buf[base + 1]) << 16)
|
||||
| ((cast(s64) self.buf[base + 2]) << 8)
|
||||
| (cast(s64) self.buf[base + 3]);
|
||||
t += 1;
|
||||
}
|
||||
t = 16;
|
||||
while t < 64 {
|
||||
w[t] = (small_sigma1(w[t - 2]) + w[t - 7] + small_sigma0(w[t - 15]) + w[t - 16]) & MASK32;
|
||||
t += 1;
|
||||
}
|
||||
|
||||
a := self.h[0];
|
||||
b := self.h[1];
|
||||
c := self.h[2];
|
||||
d := self.h[3];
|
||||
e := self.h[4];
|
||||
f := self.h[5];
|
||||
g := self.h[6];
|
||||
hh := self.h[7];
|
||||
|
||||
t = 0;
|
||||
while t < 64 {
|
||||
ch := (e & f) ^ (~e & g);
|
||||
temp1 := (hh + big_sigma1(e) + ch + K[t] + w[t]) & MASK32;
|
||||
maj := (a & b) ^ (a & c) ^ (b & c);
|
||||
temp2 := (big_sigma0(a) + maj) & MASK32;
|
||||
hh = g;
|
||||
g = f;
|
||||
f = e;
|
||||
e = (d + temp1) & MASK32;
|
||||
d = c;
|
||||
c = b;
|
||||
b = a;
|
||||
a = (temp1 + temp2) & MASK32;
|
||||
t += 1;
|
||||
}
|
||||
|
||||
self.h[0] = (self.h[0] + a) & MASK32;
|
||||
self.h[1] = (self.h[1] + b) & MASK32;
|
||||
self.h[2] = (self.h[2] + c) & MASK32;
|
||||
self.h[3] = (self.h[3] + d) & MASK32;
|
||||
self.h[4] = (self.h[4] + e) & MASK32;
|
||||
self.h[5] = (self.h[5] + f) & MASK32;
|
||||
self.h[6] = (self.h[6] + g) & MASK32;
|
||||
self.h[7] = (self.h[7] + hh) & MASK32;
|
||||
}
|
||||
|
||||
// Absorb `data`. Safe to call repeatedly; partial blocks are buffered
|
||||
// and crunched as soon as 64 bytes accumulate.
|
||||
update :: (self: *Sha256, data: string) {
|
||||
i := 0;
|
||||
while i < data.len {
|
||||
self.buf[self.buf_len] = data[i];
|
||||
self.buf_len += 1;
|
||||
self.total_len += 1;
|
||||
if self.buf_len == 64 {
|
||||
self.process_block();
|
||||
self.buf_len = 0;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Finish: apply FIPS padding and emit the 32-byte digest as 64
|
||||
// lowercase hex characters. The state is consumed by this call.
|
||||
final :: (self: *Sha256) -> string {
|
||||
bit_len := self.total_len * 8;
|
||||
|
||||
// 0x80 terminator, then zero-pad until 56 bytes mod 64.
|
||||
self.buf[self.buf_len] = 0x80;
|
||||
self.buf_len += 1;
|
||||
if self.buf_len == 64 {
|
||||
self.process_block();
|
||||
self.buf_len = 0;
|
||||
}
|
||||
while self.buf_len != 56 {
|
||||
self.buf[self.buf_len] = 0;
|
||||
self.buf_len += 1;
|
||||
if self.buf_len == 64 {
|
||||
self.process_block();
|
||||
self.buf_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// 64-bit big-endian message length in bits.
|
||||
b := 0;
|
||||
while b < 8 {
|
||||
self.buf[56 + b] = xx ((bit_len >> ((7 - b) * 8)) & 0xFF);
|
||||
b += 1;
|
||||
}
|
||||
self.process_block();
|
||||
self.buf_len = 0;
|
||||
|
||||
digest := cstring(64);
|
||||
i := 0;
|
||||
while i < 8 {
|
||||
word := self.h[i] & MASK32;
|
||||
j := 0;
|
||||
while j < 4 {
|
||||
byte_val := (word >> ((3 - j) * 8)) & 0xFF;
|
||||
pos := (i * 4 + j) * 2;
|
||||
digest[pos] = nibble_hex((byte_val >> 4) & 0xF);
|
||||
digest[pos + 1] = nibble_hex(byte_val & 0xF);
|
||||
j += 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
digest
|
||||
}
|
||||
}
|
||||
|
||||
// Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'.
|
||||
nibble_hex :: (n: s64) -> u8 {
|
||||
if n < 10 then xx (n + 48) else xx (n - 10 + 97)
|
||||
}
|
||||
|
||||
// Returns the SHA-256 state by value; the caller binds it to a local
|
||||
// whose address backs the `*self` methods.
|
||||
init :: () -> Sha256 {
|
||||
s : Sha256 = ---;
|
||||
s.h[0] = 0x6a09e667;
|
||||
s.h[1] = 0xbb67ae85;
|
||||
s.h[2] = 0x3c6ef372;
|
||||
s.h[3] = 0xa54ff53a;
|
||||
s.h[4] = 0x510e527f;
|
||||
s.h[5] = 0x9b05688c;
|
||||
s.h[6] = 0x1f83d9ab;
|
||||
s.h[7] = 0x5be0cd19;
|
||||
s.buf_len = 0;
|
||||
s.total_len = 0;
|
||||
s
|
||||
}
|
||||
|
||||
// One-shot: digest of a single buffer as 64-char lowercase hex.
|
||||
sha256_hex :: (data: string) -> string {
|
||||
h := init();
|
||||
h.update(data);
|
||||
h.final()
|
||||
}
|
||||
|
||||
// Digest of a file's contents. Returns null if the file can't be read.
|
||||
sha256_file :: (path: [:0]u8) -> ?string {
|
||||
content := read_file(path);
|
||||
if content == null { return null; }
|
||||
sha256_hex(content!)
|
||||
}
|
||||
Reference in New Issue
Block a user