From 4552ed61f6602c7c9a2abe9a8fc5fa4436d5199a Mon Sep 17 00:00:00 2001 From: agra Date: Thu, 4 Jun 2026 00:47:30 +0300 Subject: [PATCH] std/json: value model + zero-alloc writer with stable key order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add library/modules/std/json.sx — the JSON value model and writer (reader lands in a later step). Value model: a tagged union over null/bool/integer(s64)/string/array/ object. Objects are an ORDERED list of (key,value) pairs preserving INSERTION ORDER (no hash map, never sorted/deduped). Integers only — no fraction/exponent this milestone. Heap discipline: - Scalars carry no heap; string values are VIEWS into caller memory (never copied into the node). - Composite nodes (Array/Object) own growable child storage, allocated through an EXPLICIT allocator parameter on the builder methods (arr.add(v, alloc) / obj.put(key, val, alloc), mirroring List.append) — never the implicit context allocator. - The writer adds ZERO output allocations: it emits into a caller- provided Sink, either a fixed []u8 buffer (overflow raises, never truncates) or streaming straight to an fs.File through a small caller staging buffer (no whole-document string; peak memory O(staging)). Integer digits format in a stack [20]u8; s64 MIN is handled by formatting in negative space. Sink/IO/overflow surface on the ! error channel. examples/0713-modules-json-writer.sx builds a nested object + array + string with every escape kind + negative int + bool + null, then asserts the EXACT bytes (insertion order, escaping) from both the buffer sink and the file-streaming sink, plus the overflow-raises path. --- examples/0713-modules-json-writer.sx | 95 +++++ .../expected/0713-modules-json-writer.exit | 1 + .../expected/0713-modules-json-writer.stderr | 0 .../expected/0713-modules-json-writer.stdout | 5 + library/modules/std/json.sx | 335 ++++++++++++++++++ 5 files changed, 436 insertions(+) create mode 100644 examples/0713-modules-json-writer.sx create mode 100644 examples/expected/0713-modules-json-writer.exit create mode 100644 examples/expected/0713-modules-json-writer.stderr create mode 100644 examples/expected/0713-modules-json-writer.stdout create mode 100644 library/modules/std/json.sx diff --git a/examples/0713-modules-json-writer.sx b/examples/0713-modules-json-writer.sx new file mode 100644 index 0000000..67e2db8 --- /dev/null +++ b/examples/0713-modules-json-writer.sx @@ -0,0 +1,95 @@ +// JSON value model + writer from `modules/std/json.sx`. +// +// Builds a representative value — a nested object holding a string with +// every escape kind (quote, newline, tab, backslash, a raw control byte), +// a negative integer, a bool, null, an array, and a nested object — then +// serializes it two ways and asserts the EXACT bytes: +// +// 1. into a caller-owned `[]u8` buffer (returns bytes written), +// 2. streaming straight to a file through an 8-byte staging buffer +// (small on purpose, so the writer flushes many times and no +// whole-document string is ever held). +// +// Both must yield byte-for-byte the same pinned document, with keys in +// INSERTION ORDER. A too-small buffer must raise `error.Overflow` rather +// than truncate. The model is built through an explicit Arena allocator +// and freed in one `deinit`; the writer path allocates nothing. + +#import "modules/std.sx"; +#import "modules/std/json.sx"; +#import "modules/fs.sx"; + +// The exact document the writer must produce (insertion order, escaping). +EXPECT :: "{\"name\":\"a\\\"b\\n\",\"tab\":\"x\\ty\",\"bs\":\"c\\\\d\",\"ctrl\":\"\\u0001\",\"n\":-7,\"ok\":true,\"nil\":null,\"xs\":[1,-2,3],\"nested\":{\"k\":\"v\"}}"; + +report :: (label: string, ok: bool) { + if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); } +} + +build :: (alloc: Allocator) -> Value { + // A raw control byte (0x01) viewed as a 1-byte string — exercises the + // `\u00XX` path that has no named shorthand. String values are VIEWS, + // so the bytes must outlive the writes: back them with `alloc` (the + // arena), not a local that dies when `build` returns. + cbytes : [*]u8 = xx alloc.alloc(1); + cbytes[0] = 1; + ctrl := string.{ ptr = cbytes, len = 1 }; + + nested : Object = .{}; + nested.put("k", .str("v"), alloc); + + xs : Array = .{}; + xs.add(.int_(1), alloc); + xs.add(.int_(0 - 2), alloc); + xs.add(.int_(3), alloc); + + obj : Object = .{}; + obj.put("name", .str("a\"b\n"), alloc); // quote + newline + obj.put("tab", .str("x\ty"), alloc); // tab + obj.put("bs", .str("c\\d"), alloc); // backslash + obj.put("ctrl", .str(ctrl), alloc); // raw control byte ->  + obj.put("n", .int_(0 - 7), alloc); // negative int + obj.put("ok", .bool_(true), alloc); + obj.put("nil", .null_, alloc); + obj.put("xs", .array(xs), alloc); + obj.put("nested", .object(nested), alloc); + + return .object(obj); +} + +main :: () -> ! { + gpa := GPA.init(); + arena := Arena.init(xx gpa, 4096); + defer arena.deinit(); + + root := build(xx arena); + + // 1. Write into a caller buffer; assert exact bytes + byte count. + buf : [512]u8 = ---; + n := try write_to_buffer(root, string.{ ptr = @buf[0], len = 512 }); + view := string.{ ptr = @buf[0], len = n }; + print("doc: {}\n", view); + report("buffer-exact", view == EXPECT); + report("buffer-len", n == EXPECT.len); + + // 2. A buffer that is one byte too small must raise Overflow. + tight : []u8 = string.{ ptr = @buf[256], len = EXPECT.len - 1 }; + _, oerr := write_to_buffer(root, tight); + report("overflow-raised", oerr == error.Overflow); + + // 3. Stream to a file through a tiny staging buffer (forces flushes); + // read it back and assert it equals the same document. + path := "/tmp/sx_0713_json.json"; + fh := open_file(path, .write); + if fh == null { print("open: FAIL\n"); return; } + f := fh!; + stage : [8]u8 = ---; + try write_to_file(root, @f, string.{ ptr = @stage[0], len = 8 }); + f.close(); + + back := read_file(path); + if back == null { print("file-read: FAIL\n"); return; } + report("file-exact", back! == EXPECT); + delete_file(path); + return; +} diff --git a/examples/expected/0713-modules-json-writer.exit b/examples/expected/0713-modules-json-writer.exit new file mode 100644 index 0000000..573541a --- /dev/null +++ b/examples/expected/0713-modules-json-writer.exit @@ -0,0 +1 @@ +0 diff --git a/examples/expected/0713-modules-json-writer.stderr b/examples/expected/0713-modules-json-writer.stderr new file mode 100644 index 0000000..e69de29 diff --git a/examples/expected/0713-modules-json-writer.stdout b/examples/expected/0713-modules-json-writer.stdout new file mode 100644 index 0000000..40d30cd --- /dev/null +++ b/examples/expected/0713-modules-json-writer.stdout @@ -0,0 +1,5 @@ +doc: {"name":"a\"b\n","tab":"x\ty","bs":"c\\d","ctrl":"\u0001","n":-7,"ok":true,"nil":null,"xs":[1,-2,3],"nested":{"k":"v"}} +buffer-exact: ok +buffer-len: ok +overflow-raised: ok +file-exact: ok diff --git a/library/modules/std/json.sx b/library/modules/std/json.sx new file mode 100644 index 0000000..f43c27c --- /dev/null +++ b/library/modules/std/json.sx @@ -0,0 +1,335 @@ +// ===================================================================== +// json.sx — JSON value model + writer (stable key order), pure sx. +// +// This module delivers the JSON VALUE MODEL and the WRITER. The reader +// (parser) lands separately; this file never reads JSON text. +// +// NUMBERS ARE INTEGERS ONLY (s64) for this milestone — there is no +// fraction or exponent. A JSON value is one of: null, bool, integer, +// string, array, object. +// +// STABLE KEY ORDER: an object is NOT a hash map. It is an ORDERED list +// of (key, value) pairs that preserves INSERTION ORDER. Keys are never +// sorted and never reordered to deduplicate — the order you `put` them +// in is the order the writer emits them. This is the "stable key order" +// guarantee the manifest / db.json rely on. +// +// HEAP DISCIPLINE (binding, see heap-discipline.md): +// - Scalars (null / bool / int) carry no heap. +// - String values are VIEWS (`string`) into caller-owned memory; the +// node never copies the input bytes. +// - Composite nodes (array / object) hold unbounded children, so they +// genuinely need dynamic storage — but every allocation goes through +// an EXPLICIT `allocator` parameter on the builder method +// (`arr.add(v, alloc)` / `obj.put(key, val, alloc)`, mirroring +// `List.append`). Allocation NEVER falls back to the implicit +// context allocator silently. +// - The WRITER adds ZERO output allocations. It emits into a +// CALLER-PROVIDED sink: either a fixed `[]u8` buffer the caller owns +// (overflow is reported, never silently truncated) or, streaming, +// straight to an `fs.File` through a small caller-provided staging +// buffer — so the db.json path holds no whole-document string in +// memory (peak memory is O(staging), not O(document)). Integer +// digits are formatted in a stack `[20]u8`. Sink/IO/overflow +// failures surface on the error channel (`!JsonError`). +// +// Building a value (`#import "modules/std/json.sx";` brings these in): +// +// obj : Object = .{}; +// obj.put("name", .str(name_view), alloc); // name_view: a `string` +// obj.put("size", .int_(123), alloc); +// arr : Array = .{}; +// arr.add(.int_(1), alloc); +// obj.put("xs", .array(arr), alloc); +// root : Value = .object(obj); +// +// Writing into a caller buffer (returns bytes written; raises on overflow): +// +// out : [4096]u8 = ---; +// n := try write_to_buffer(root, string.{ ptr = @out[0], len = 4096 }); +// +// Streaming straight to a file (no whole-document string): +// +// f := open_file("db.json\0", .write)!; +// stage : [4096]u8 = ---; +// try write_to_file(root, @f, string.{ ptr = @stage[0], len = 4096 }); +// f.close(); +// ===================================================================== + +#import "modules/std.sx"; +#import "modules/fs.sx"; + +// The writer's failure contract: a too-small caller buffer (Overflow) or +// a short/failed file write (Io). Surfaced on the error channel — never a +// silent truncation or default. +JsonError :: error { Overflow, Io } + +// ── Value model ────────────────────────────────────────────────────── +// +// `Value` is a tagged union over the six JSON kinds. `null_`/`bool_`/ +// `int_` are scalars; `str` is a VIEW into caller memory; `array` and +// `object` own growable child storage (see Array / Object). +// +// `Value` is defined before Array / Object so its `[*]Value` / +// `[*]Member` back-references resolve; Array / Object refer back to +// `Value` in turn (mutual recursion through pointers — each composite +// holds a pointer to its children, so the layout is finite). + +Value :: enum { + null_; + bool_: bool; + int_: s64; + str: string; // view into caller-owned bytes; not copied + array: Array; + object: Object; +} + +// One ordered object entry. `key` is a view (not copied); `val` is owned +// by value inside the object's backing store. +Member :: struct { + key: string; + val: Value; +} + +// Ordered list of values. Same growable layout as `List`, but concrete +// (not generic) so it can be an enum payload, and its builder takes the +// allocator explicitly. +Array :: struct { + items: [*]Value = null; + len: s64 = 0; + cap: s64 = 0; + + // Append `v`, preserving order. Grows the backing store through the + // explicit `alloc` when full (doubling), freeing the old buffer. + add :: (self: *Array, v: Value, alloc: Allocator) { + if self.len >= self.cap { + new_cap := if self.cap == 0 then 4 else self.cap * 2; + new_items : [*]Value = xx alloc.alloc(new_cap * size_of(Value)); + if self.len > 0 { + memcpy(new_items, self.items, self.len * size_of(Value)); + alloc.dealloc(self.items); + } + self.items = new_items; + self.cap = new_cap; + } + self.items[self.len] = v; + self.len += 1; + } + + deinit :: (self: *Array, alloc: Allocator) { + if self.items != null { alloc.dealloc(self.items); } + self.items = null; + self.len = 0; + self.cap = 0; + } +} + +// Ordered list of (key, value) pairs — INSERTION ORDER preserved, never +// sorted, never deduplicated. Growable backing store through the +// explicit `alloc`. +Object :: struct { + items: [*]Member = null; + len: s64 = 0; + cap: s64 = 0; + + // Append a (key, val) pair at the end. Does not check for or merge a + // duplicate key — insertion order is the contract; a repeated key is + // emitted twice, in the order added. + put :: (self: *Object, key: string, v: Value, alloc: Allocator) { + if self.len >= self.cap { + new_cap := if self.cap == 0 then 4 else self.cap * 2; + new_items : [*]Member = xx alloc.alloc(new_cap * size_of(Member)); + if self.len > 0 { + memcpy(new_items, self.items, self.len * size_of(Member)); + alloc.dealloc(self.items); + } + self.items = new_items; + self.cap = new_cap; + } + self.items[self.len] = Member.{ key = key, val = v }; + self.len += 1; + } + + deinit :: (self: *Object, alloc: Allocator) { + if self.items != null { alloc.dealloc(self.items); } + self.items = null; + self.len = 0; + self.cap = 0; + } +} + +// ── Sink ───────────────────────────────────────────────────────────── +// +// A single concrete output sink with two modes, chosen by `file`: +// - BUFFER mode (`file == null`): bytes land directly in the caller's +// `dst`; when `dst` fills, `put`/`put_byte` raise `error.Overflow`. +// `pos` is the running byte count. +// - FILE mode (`file != null`): `dst` is a caller-provided STAGING +// buffer; when it fills it is flushed to `file` and reused, so peak +// memory is O(dst) regardless of document size. `flush()` writes any +// remaining staged bytes. The staging buffer must be non-empty. +// +// The sink owns NO heap and holds NO whole-document string. + +Sink :: struct { + dst: []u8; // caller-owned destination (buffer mode) or staging (file mode) + pos: s64 = 0; // bytes currently in `dst` + file: *File = null; // null => buffer mode + + put_byte :: (self: *Sink, b: u8) -> !JsonError { + if self.pos >= self.dst.len { + if self.file == null { raise error.Overflow; } + try self.flush(); + } + self.dst[self.pos] = b; + self.pos += 1; + return; + } + + put :: (self: *Sink, bytes: string) -> !JsonError { + i := 0; + while i < bytes.len { + try self.put_byte(bytes[i]); + i += 1; + } + return; + } + + // File mode: write staged bytes and reset. Buffer mode: no-op. + flush :: (self: *Sink) -> !JsonError { + if self.file == null { return; } + if self.pos == 0 { return; } + n := self.file.write(string.{ ptr = @self.dst[0], len = self.pos }); + if n != self.pos { raise error.Io; } + self.pos = 0; + return; + } +} + +// ── Writer ─────────────────────────────────────────────────────────── + +// Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'. +hex_digit :: (n: s64) -> u8 { + if n < 10 then xx (n + 48) else xx (n - 10 + 97) +} + +// `\u00XX` escape for a control byte (< 0x20). Two hex nibbles; the high +// byte of the code point is always 0x00 here. +write_u_escape :: (c: u8, sink: *Sink) -> !JsonError { + try sink.put_byte(92); // backslash + try sink.put_byte(117); // 'u' + try sink.put_byte(48); // '0' + try sink.put_byte(48); // '0' + try sink.put_byte(hex_digit((cast(s64) c >> 4) & 0xF)); + try sink.put_byte(hex_digit(cast(s64) c & 0xF)); + return; +} + +// Emit a JSON string: opening quote, escaped body, closing quote. Escapes +// quote, backslash, the named control shorthands (\b \t \n \f \r), and +// any other control byte (< 0x20) as `\u00XX`. Bytes >= 0x20 (including +// UTF-8 continuation bytes) pass through unchanged. +write_string :: (s: string, sink: *Sink) -> !JsonError { + try sink.put_byte(34); // opening quote + i := 0; + while i < s.len { + c := s[i]; + if c == 34 { try sink.put_byte(92); try sink.put_byte(34); } // \" + else if c == 92 { try sink.put_byte(92); try sink.put_byte(92); } // \\ + else if c == 8 { try sink.put_byte(92); try sink.put_byte(98); } // \b + else if c == 9 { try sink.put_byte(92); try sink.put_byte(116); } // \t + else if c == 10 { try sink.put_byte(92); try sink.put_byte(110); } // \n + else if c == 12 { try sink.put_byte(92); try sink.put_byte(102); } // \f + else if c == 13 { try sink.put_byte(92); try sink.put_byte(114); } // \r + else if c < 32 { try write_u_escape(c, sink); } + else { try sink.put_byte(c); } + i += 1; + } + try sink.put_byte(34); // closing quote + return; +} + +// Emit a signed integer in decimal, no allocation. Digits are formed in a +// stack buffer working in NEGATIVE space so s64 MIN +// (-9223372036854775808) — whose magnitude is not representable as a +// positive s64 — serializes correctly. +write_int :: (n: s64, sink: *Sink) -> !JsonError { + if n == 0 { try sink.put_byte(48); return; } + tmp : [20]u8 = ---; // 19 digits + sign is the s64 worst case + neg := n < 0; + v := n; + if !neg { v = 0 - n; } // fold positives into negative space + i := 20; + while v < 0 { + i -= 1; + d := 0 - (v % 10); // sx `%` keeps the dividend's sign; this is 0..9 + tmp[i] = xx (d + 48); + v = v / 10; // truncates toward zero + } + if neg { i -= 1; tmp[i] = 45; } // '-' + try sink.put(string.{ ptr = @tmp[i], len = 20 - i }); + return; +} + +// Serialize one value into `sink`. Recurses for arrays / objects. +write_value :: (v: Value, sink: *Sink) -> !JsonError { + if v == { + case .null_: try sink.put("null"); + case .bool_: try sink.put(if v.bool_ then "true" else "false"); + case .int_: try write_int(v.int_, sink); + case .str: try write_string(v.str, sink); + case .array: try write_array(v.array, sink); + case .object: try write_object(v.object, sink); + } + return; +} + +write_array :: (arr: Array, sink: *Sink) -> !JsonError { + try sink.put_byte(91); // '[' + i := 0; + while i < arr.len { + if i > 0 { try sink.put_byte(44); } // ',' + try write_value(arr.items[i], sink); + i += 1; + } + try sink.put_byte(93); // ']' + return; +} + +// Emits members in stored order — the insertion order guarantee. +write_object :: (obj: Object, sink: *Sink) -> !JsonError { + try sink.put_byte(123); // '{' + i := 0; + while i < obj.len { + if i > 0 { try sink.put_byte(44); } // ',' + try write_string(obj.items[i].key, sink); + try sink.put_byte(58); // ':' + try write_value(obj.items[i].val, sink); + i += 1; + } + try sink.put_byte(125); // '}' + return; +} + +// ── Public entry points ────────────────────────────────────────────── + +// Serialize `v` into the caller-owned buffer `dst`. Returns the number of +// bytes written. Raises `error.Overflow` if `dst` is too small (the +// partial contents of `dst` are then undefined — nothing is truncated +// silently). No allocation. +write_to_buffer :: (v: Value, dst: []u8) -> (s64, !JsonError) { + sink := Sink.{ dst = dst }; + try write_value(v, @sink); + return sink.pos; +} + +// Serialize `v` straight to an open `file`, staging through the caller- +// owned `staging` buffer (which must be non-empty). No whole-document +// string is ever held — peak extra memory is O(staging). Raises +// `error.Io` on a short/failed write. No allocation. +write_to_file :: (v: Value, file: *File, staging: []u8) -> !JsonError { + sink := Sink.{ dst = staging, file = file }; + try write_value(v, @sink); + try sink.flush(); + return; +}