diff --git a/examples/0710-modules-sha256.sx b/examples/0710-modules-sha256.sx new file mode 100644 index 0000000..0ce8df8 --- /dev/null +++ b/examples/0710-modules-sha256.sx @@ -0,0 +1,37 @@ +// Streaming SHA-256 (FIPS 180-4) from `modules/std/hash.sx`. +// +// Known-answer vectors (empty, "abc", the 112-byte NIST multi-block +// vector) plus the streaming invariant: feeding the same bytes in +// several `update` chunks yields the same digest as the one-shot call. + +#import "modules/std.sx"; +#import "modules/std/hash.sx"; + +check :: (label: string, got: string, want: string) { + if got == want { + print("{}: ok\n", label); + } else { + print("{}: FAIL got {} want {}\n", label, got, want); + } +} + +main :: () { + check("empty", sha256_hex(""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + check("abc", sha256_hex("abc"), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"); + + // 112-byte input โ€” spans more than one 64-byte block. + multi := "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"; + check("multi", sha256_hex(multi), + "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1"); + + // Streaming must equal one-shot regardless of chunk boundaries. + h := init(); + h.update("abcdefghbcdefghicdefghijdefghijke"); // 33 bytes + h.update("fghijklfghijklmghijklmnhijklmno"); // crosses block edge + h.update("ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"); + streamed := h.final(); + check("stream-eq-oneshot", + if streamed == sha256_hex(multi) then "yes" else "no", "yes"); +} diff --git a/examples/expected/0710-modules-sha256.exit b/examples/expected/0710-modules-sha256.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0710-modules-sha256.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0710-modules-sha256.stderr b/examples/expected/0710-modules-sha256.stderr new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/expected/0710-modules-sha256.stderr @@ -0,0 +1 @@ + diff --git a/examples/expected/0710-modules-sha256.stdout b/examples/expected/0710-modules-sha256.stdout new file mode 100644 index 0000000..dff482f --- /dev/null +++ b/examples/expected/0710-modules-sha256.stdout @@ -0,0 +1,4 @@ +empty: ok +abc: ok +multi: ok +stream-eq-oneshot: ok diff --git a/library/modules/std/hash.sx b/library/modules/std/hash.sx new file mode 100644 index 0000000..62385e1 --- /dev/null +++ b/library/modules/std/hash.sx @@ -0,0 +1,211 @@ +// ===================================================================== +// hash.sx โ€” streaming SHA-256 (FIPS 180-4), pure sx. +// +// Content addressing is security-critical, so the digest is computed +// in-process: no shelling out, no platform crypto library. All 32-bit +// word arithmetic is done in s64 and masked back to 32 bits with +// `& MASK32`, so the result is identical regardless of the host's +// native integer width or overflow behaviour. +// +// Streaming API (the by-value `init` / `*self` pattern): +// +// h := hash.init(); // Sha256, stack-local +// h.update("hello, "); // absorb across calls +// h.update("world"); +// digest := h.final(); // 64-char lowercase hex +// +// One-shot convenience: +// +// digest := hash.sha256_hex("abc"); +// digest := hash.sha256_file("path\0"); // ?string, null on I/O error +// ===================================================================== + +#import "modules/std.sx"; +#import "modules/fs.sx"; + +// Low 32 bits. SHA-256 is defined over 32-bit words; every add/rotate +// result is masked back through this so the carry never escapes bit 31. +MASK32 :: 0xFFFFFFFF; + +// Round constants K[0..63] โ€” the first 32 bits of the fractional parts +// of the cube roots of the first 64 primes (FIPS 180-4 ยง4.2.2). +K : [64]s64 = .[ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +// 32-bit right rotate. `word` must already be masked to 32 bits. +rotr :: (word: s64, n: s64) -> s64 { + ((word >> n) | (word << (32 - n))) & MASK32 +} + +big_sigma0 :: (x: s64) -> s64 { rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22) } +big_sigma1 :: (x: s64) -> s64 { rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25) } +small_sigma0 :: (x: s64) -> s64 { rotr(x, 7) ^ rotr(x, 18) ^ (x >> 3) } +small_sigma1 :: (x: s64) -> s64 { rotr(x, 17) ^ rotr(x, 19) ^ (x >> 10) } + +Sha256 :: struct { + h: [8]s64; // running hash state (each entry masked to 32 bits) + buf: [64]u8; // partial-block buffer + buf_len: s64; // bytes currently in `buf` (0..63) + total_len: s64; // total bytes absorbed so far + + // Crunch the 64-byte block currently in `buf` into the state. + process_block :: (self: *Sha256) { + w : [64]s64 = ---; + t := 0; + while t < 16 { + base := t * 4; + w[t] = ((cast(s64) self.buf[base]) << 24) + | ((cast(s64) self.buf[base + 1]) << 16) + | ((cast(s64) self.buf[base + 2]) << 8) + | (cast(s64) self.buf[base + 3]); + t += 1; + } + t = 16; + while t < 64 { + w[t] = (small_sigma1(w[t - 2]) + w[t - 7] + small_sigma0(w[t - 15]) + w[t - 16]) & MASK32; + t += 1; + } + + a := self.h[0]; + b := self.h[1]; + c := self.h[2]; + d := self.h[3]; + e := self.h[4]; + f := self.h[5]; + g := self.h[6]; + hh := self.h[7]; + + t = 0; + while t < 64 { + ch := (e & f) ^ (~e & g); + temp1 := (hh + big_sigma1(e) + ch + K[t] + w[t]) & MASK32; + maj := (a & b) ^ (a & c) ^ (b & c); + temp2 := (big_sigma0(a) + maj) & MASK32; + hh = g; + g = f; + f = e; + e = (d + temp1) & MASK32; + d = c; + c = b; + b = a; + a = (temp1 + temp2) & MASK32; + t += 1; + } + + self.h[0] = (self.h[0] + a) & MASK32; + self.h[1] = (self.h[1] + b) & MASK32; + self.h[2] = (self.h[2] + c) & MASK32; + self.h[3] = (self.h[3] + d) & MASK32; + self.h[4] = (self.h[4] + e) & MASK32; + self.h[5] = (self.h[5] + f) & MASK32; + self.h[6] = (self.h[6] + g) & MASK32; + self.h[7] = (self.h[7] + hh) & MASK32; + } + + // Absorb `data`. Safe to call repeatedly; partial blocks are buffered + // and crunched as soon as 64 bytes accumulate. + update :: (self: *Sha256, data: string) { + i := 0; + while i < data.len { + self.buf[self.buf_len] = data[i]; + self.buf_len += 1; + self.total_len += 1; + if self.buf_len == 64 { + self.process_block(); + self.buf_len = 0; + } + i += 1; + } + } + + // Finish: apply FIPS padding and emit the 32-byte digest as 64 + // lowercase hex characters. The state is consumed by this call. + final :: (self: *Sha256) -> string { + bit_len := self.total_len * 8; + + // 0x80 terminator, then zero-pad until 56 bytes mod 64. + self.buf[self.buf_len] = 0x80; + self.buf_len += 1; + if self.buf_len == 64 { + self.process_block(); + self.buf_len = 0; + } + while self.buf_len != 56 { + self.buf[self.buf_len] = 0; + self.buf_len += 1; + if self.buf_len == 64 { + self.process_block(); + self.buf_len = 0; + } + } + + // 64-bit big-endian message length in bits. + b := 0; + while b < 8 { + self.buf[56 + b] = xx ((bit_len >> ((7 - b) * 8)) & 0xFF); + b += 1; + } + self.process_block(); + self.buf_len = 0; + + digest := cstring(64); + i := 0; + while i < 8 { + word := self.h[i] & MASK32; + j := 0; + while j < 4 { + byte_val := (word >> ((3 - j) * 8)) & 0xFF; + pos := (i * 4 + j) * 2; + digest[pos] = nibble_hex((byte_val >> 4) & 0xF); + digest[pos + 1] = nibble_hex(byte_val & 0xF); + j += 1; + } + i += 1; + } + digest + } +} + +// Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'. +nibble_hex :: (n: s64) -> u8 { + if n < 10 then xx (n + 48) else xx (n - 10 + 97) +} + +// Returns the SHA-256 state by value; the caller binds it to a local +// whose address backs the `*self` methods. +init :: () -> Sha256 { + s : Sha256 = ---; + s.h[0] = 0x6a09e667; + s.h[1] = 0xbb67ae85; + s.h[2] = 0x3c6ef372; + s.h[3] = 0xa54ff53a; + s.h[4] = 0x510e527f; + s.h[5] = 0x9b05688c; + s.h[6] = 0x1f83d9ab; + s.h[7] = 0x5be0cd19; + s.buf_len = 0; + s.total_len = 0; + s +} + +// One-shot: digest of a single buffer as 64-char lowercase hex. +sha256_hex :: (data: string) -> string { + h := init(); + h.update(data); + h.final() +} + +// Digest of a file's contents. Returns null if the file can't be read. +sha256_file :: (path: [:0]u8) -> ?string { + content := read_file(path); + if content == null { return null; } + sha256_hex(content!) +}