// ===================================================================== // json.sx — JSON value model + writer (stable key order), pure sx. // // This module delivers the JSON VALUE MODEL and the WRITER. The reader // (parser) lands separately; this file never reads JSON text. // // NUMBERS ARE INTEGERS ONLY (s64) for this milestone — there is no // fraction or exponent. A JSON value is one of: null, bool, integer, // string, array, object. // // STABLE KEY ORDER: an object is NOT a hash map. It is an ORDERED list // of (key, value) pairs that preserves INSERTION ORDER. Keys are never // sorted and never reordered to deduplicate — the order you `put` them // in is the order the writer emits them. This is the "stable key order" // guarantee the manifest / db.json rely on. // // HEAP DISCIPLINE (binding, see heap-discipline.md): // - Scalars (null / bool / int) carry no heap. // - String values are VIEWS (`string`) into caller-owned memory; the // node never copies the input bytes. // - Composite nodes (array / object) hold unbounded children, so they // genuinely need dynamic storage — but every allocation goes through // an EXPLICIT `allocator` parameter on the builder method // (`arr.add(v, alloc)` / `obj.put(key, val, alloc)`, mirroring // `List.append`). Allocation NEVER falls back to the implicit // context allocator silently. // - The WRITER adds ZERO output allocations. It emits into a // CALLER-PROVIDED sink: either a fixed `[]u8` buffer the caller owns // (overflow is reported, never silently truncated) or, streaming, // straight to an `fs.File` through a small caller-provided staging // buffer — so the db.json path holds no whole-document string in // memory (peak memory is O(staging), not O(document)). Integer // digits are formatted in a stack `[20]u8`. Sink/IO/overflow // failures surface on the error channel (`!JsonError`). // // Building a value (`#import "modules/std/json.sx";` brings these in): // // obj : Object = .{}; // obj.put("name", .str(name_view), alloc); // name_view: a `string` // obj.put("size", .int_(123), alloc); // arr : Array = .{}; // arr.add(.int_(1), alloc); // obj.put("xs", .array(arr), alloc); // root : Value = .object(obj); // // Writing into a caller buffer (returns bytes written; raises on overflow): // // out : [4096]u8 = ---; // n := try write_to_buffer(root, string.{ ptr = @out[0], len = 4096 }); // // Streaming straight to a file (no whole-document string): // // f := open_file("db.json\0", .write)!; // stage : [4096]u8 = ---; // try write_to_file(root, @f, string.{ ptr = @stage[0], len = 4096 }); // f.close(); // ===================================================================== #import "modules/std.sx"; #import "modules/fs.sx"; // The writer's failure contract: a too-small caller buffer (Overflow) or // a short/failed file write (Io). Surfaced on the error channel — never a // silent truncation or default. JsonError :: error { Overflow, Io } // ── Value model ────────────────────────────────────────────────────── // // `Value` is a tagged union over the six JSON kinds. `null_`/`bool_`/ // `int_` are scalars; `str` is a VIEW into caller memory; `array` and // `object` own growable child storage (see Array / Object). // // `Value` is defined before Array / Object so its `[*]Value` / // `[*]Member` back-references resolve; Array / Object refer back to // `Value` in turn (mutual recursion through pointers — each composite // holds a pointer to its children, so the layout is finite). Value :: enum { null_; bool_: bool; int_: s64; str: string; // view into caller-owned bytes; not copied array: Array; object: Object; } // One ordered object entry. `key` is a view (not copied); `val` is owned // by value inside the object's backing store. Member :: struct { key: string; val: Value; } // Ordered list of values. Same growable layout as `List`, but concrete // (not generic) so it can be an enum payload, and its builder takes the // allocator explicitly. Array :: struct { items: [*]Value = null; len: s64 = 0; cap: s64 = 0; // Append `v`, preserving order. Grows the backing store through the // explicit `alloc` when full (doubling), freeing the old buffer. add :: (self: *Array, v: Value, alloc: Allocator) { if self.len >= self.cap { new_cap := if self.cap == 0 then 4 else self.cap * 2; new_items : [*]Value = xx alloc.alloc(new_cap * size_of(Value)); if self.len > 0 { memcpy(new_items, self.items, self.len * size_of(Value)); alloc.dealloc(self.items); } self.items = new_items; self.cap = new_cap; } self.items[self.len] = v; self.len += 1; } deinit :: (self: *Array, alloc: Allocator) { if self.items != null { alloc.dealloc(self.items); } self.items = null; self.len = 0; self.cap = 0; } } // Ordered list of (key, value) pairs — INSERTION ORDER preserved, never // sorted, never deduplicated. Growable backing store through the // explicit `alloc`. Object :: struct { items: [*]Member = null; len: s64 = 0; cap: s64 = 0; // Append a (key, val) pair at the end. Does not check for or merge a // duplicate key — insertion order is the contract; a repeated key is // emitted twice, in the order added. put :: (self: *Object, key: string, v: Value, alloc: Allocator) { if self.len >= self.cap { new_cap := if self.cap == 0 then 4 else self.cap * 2; new_items : [*]Member = xx alloc.alloc(new_cap * size_of(Member)); if self.len > 0 { memcpy(new_items, self.items, self.len * size_of(Member)); alloc.dealloc(self.items); } self.items = new_items; self.cap = new_cap; } self.items[self.len] = Member.{ key = key, val = v }; self.len += 1; } deinit :: (self: *Object, alloc: Allocator) { if self.items != null { alloc.dealloc(self.items); } self.items = null; self.len = 0; self.cap = 0; } } // ── Sink ───────────────────────────────────────────────────────────── // // A single concrete output sink with two modes, chosen by `file`: // - BUFFER mode (`file == null`): bytes land directly in the caller's // `dst`; when `dst` fills, `put`/`put_byte` raise `error.Overflow`. // `pos` is the running byte count. // - FILE mode (`file != null`): `dst` is a caller-provided STAGING // buffer; when it fills it is flushed to `file` and reused, so peak // memory is O(dst) regardless of document size. `flush()` writes any // remaining staged bytes. The staging buffer must be non-empty. // // The sink owns NO heap and holds NO whole-document string. Sink :: struct { dst: []u8; // caller-owned destination (buffer mode) or staging (file mode) pos: s64 = 0; // bytes currently in `dst` file: *File = null; // null => buffer mode put_byte :: (self: *Sink, b: u8) -> !JsonError { if self.pos >= self.dst.len { if self.file == null { raise error.Overflow; } try self.flush(); } self.dst[self.pos] = b; self.pos += 1; return; } put :: (self: *Sink, bytes: string) -> !JsonError { i := 0; while i < bytes.len { try self.put_byte(bytes[i]); i += 1; } return; } // File mode: write staged bytes and reset. Buffer mode: no-op. flush :: (self: *Sink) -> !JsonError { if self.file == null { return; } if self.pos == 0 { return; } n := self.file.write(string.{ ptr = @self.dst[0], len = self.pos }); if n != self.pos { raise error.Io; } self.pos = 0; return; } } // ── Writer ─────────────────────────────────────────────────────────── // Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'. hex_digit :: (n: s64) -> u8 { if n < 10 then xx (n + 48) else xx (n - 10 + 97) } // `\u00XX` escape for a control byte (< 0x20). Two hex nibbles; the high // byte of the code point is always 0x00 here. write_u_escape :: (c: u8, sink: *Sink) -> !JsonError { try sink.put_byte(92); // backslash try sink.put_byte(117); // 'u' try sink.put_byte(48); // '0' try sink.put_byte(48); // '0' try sink.put_byte(hex_digit((cast(s64) c >> 4) & 0xF)); try sink.put_byte(hex_digit(cast(s64) c & 0xF)); return; } // Emit a JSON string: opening quote, escaped body, closing quote. Escapes // quote, backslash, the named control shorthands (\b \t \n \f \r), and // any other control byte (< 0x20) as `\u00XX`. Bytes >= 0x20 (including // UTF-8 continuation bytes) pass through unchanged. write_string :: (s: string, sink: *Sink) -> !JsonError { try sink.put_byte(34); // opening quote i := 0; while i < s.len { c := s[i]; if c == 34 { try sink.put_byte(92); try sink.put_byte(34); } // \" else if c == 92 { try sink.put_byte(92); try sink.put_byte(92); } // \\ else if c == 8 { try sink.put_byte(92); try sink.put_byte(98); } // \b else if c == 9 { try sink.put_byte(92); try sink.put_byte(116); } // \t else if c == 10 { try sink.put_byte(92); try sink.put_byte(110); } // \n else if c == 12 { try sink.put_byte(92); try sink.put_byte(102); } // \f else if c == 13 { try sink.put_byte(92); try sink.put_byte(114); } // \r else if c < 32 { try write_u_escape(c, sink); } else { try sink.put_byte(c); } i += 1; } try sink.put_byte(34); // closing quote return; } // Emit a signed integer in decimal, no allocation. Digits are formed in a // stack buffer working in NEGATIVE space so s64 MIN // (-9223372036854775808) — whose magnitude is not representable as a // positive s64 — serializes correctly. write_int :: (n: s64, sink: *Sink) -> !JsonError { if n == 0 { try sink.put_byte(48); return; } tmp : [20]u8 = ---; // 19 digits + sign is the s64 worst case neg := n < 0; v := n; if !neg { v = 0 - n; } // fold positives into negative space i := 20; while v < 0 { i -= 1; d := 0 - (v % 10); // sx `%` keeps the dividend's sign; this is 0..9 tmp[i] = xx (d + 48); v = v / 10; // truncates toward zero } if neg { i -= 1; tmp[i] = 45; } // '-' try sink.put(string.{ ptr = @tmp[i], len = 20 - i }); return; } // Serialize one value into `sink`. Recurses for arrays / objects. write_value :: (v: Value, sink: *Sink) -> !JsonError { if v == { case .null_: try sink.put("null"); case .bool_: try sink.put(if v.bool_ then "true" else "false"); case .int_: try write_int(v.int_, sink); case .str: try write_string(v.str, sink); case .array: try write_array(v.array, sink); case .object: try write_object(v.object, sink); } return; } write_array :: (arr: Array, sink: *Sink) -> !JsonError { try sink.put_byte(91); // '[' i := 0; while i < arr.len { if i > 0 { try sink.put_byte(44); } // ',' try write_value(arr.items[i], sink); i += 1; } try sink.put_byte(93); // ']' return; } // Emits members in stored order — the insertion order guarantee. write_object :: (obj: Object, sink: *Sink) -> !JsonError { try sink.put_byte(123); // '{' i := 0; while i < obj.len { if i > 0 { try sink.put_byte(44); } // ',' try write_string(obj.items[i].key, sink); try sink.put_byte(58); // ':' try write_value(obj.items[i].val, sink); i += 1; } try sink.put_byte(125); // '}' return; } // ── Public entry points ────────────────────────────────────────────── // Serialize `v` into the caller-owned buffer `dst`. Returns the number of // bytes written. Raises `error.Overflow` if `dst` is too small (the // partial contents of `dst` are then undefined — nothing is truncated // silently). No allocation. write_to_buffer :: (v: Value, dst: []u8) -> (s64, !JsonError) { sink := Sink.{ dst = dst }; try write_value(v, @sink); return sink.pos; } // Serialize `v` straight to an open `file`, staging through the caller- // owned `staging` buffer (which must be non-empty). No whole-document // string is ever held — peak extra memory is O(staging). Raises // `error.Io` on a short/failed write. No allocation. write_to_file :: (v: Value, file: *File, staging: []u8) -> !JsonError { sink := Sink.{ dst = staging, file = file }; try write_value(v, @sink); try sink.flush(); return; }