Merge branch 'flow/sx-foundation/F2.1' into dist-foundation

This commit is contained in:
agra
2026-06-04 01:15:26 +03:00
5 changed files with 447 additions and 0 deletions

View File

@@ -0,0 +1,106 @@
// JSON value model + writer from `modules/std/json.sx`.
//
// Builds a representative value — a nested object holding a string with
// every escape kind (quote, newline, tab, backslash, a raw control byte),
// integers spanning zero / a small negative / a small positive / s64 MIN
// (-9223372036854775808) / s64 MAX (9223372036854775807), a bool, null, an
// array, and a nested object — then serializes it two ways and asserts the
// EXACT bytes:
//
// 1. into a caller-owned `[]u8` buffer (returns bytes written),
// 2. streaming straight to a file through an 8-byte staging buffer
// (small on purpose, so the writer flushes many times and no
// whole-document string is ever held).
//
// Both must yield byte-for-byte the same pinned document, with keys in
// INSERTION ORDER. A too-small buffer must raise `error.Overflow` rather
// than truncate. The model is built through an explicit Arena allocator
// and freed in one `deinit`; the writer path allocates nothing.
#import "modules/std.sx";
#import "modules/std/json.sx";
#import "modules/fs.sx";
// The exact document the writer must produce (insertion order, escaping).
EXPECT :: "{\"name\":\"a\\\"b\\n\",\"tab\":\"x\\ty\",\"bs\":\"c\\\\d\",\"ctrl\":\"\\u0001\",\"n\":-7,\"zero\":0,\"pos\":7,\"min\":-9223372036854775808,\"max\":9223372036854775807,\"ok\":true,\"nil\":null,\"xs\":[1,-2,3],\"nested\":{\"k\":\"v\"}}";
report :: (label: string, ok: bool) {
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
}
build :: (alloc: Allocator) -> Value {
// A raw control byte (0x01) viewed as a 1-byte string — exercises the
// `\u00XX` path that has no named shorthand. String values are VIEWS,
// so the bytes must outlive the writes: back them with `alloc` (the
// arena), not a local that dies when `build` returns.
cbytes : [*]u8 = xx alloc.alloc(1);
cbytes[0] = 1;
ctrl := string.{ ptr = cbytes, len = 1 };
nested : Object = .{};
nested.put("k", .str("v"), alloc);
xs : Array = .{};
xs.add(.int_(1), alloc);
xs.add(.int_(0 - 2), alloc);
xs.add(.int_(3), alloc);
obj : Object = .{};
obj.put("name", .str("a\"b\n"), alloc); // quote + newline
obj.put("tab", .str("x\ty"), alloc); // tab
obj.put("bs", .str("c\\d"), alloc); // backslash
obj.put("ctrl", .str(ctrl), alloc); // raw control byte -> 
obj.put("n", .int_(0 - 7), alloc); // small negative int
obj.put("zero", .int_(0), alloc); // zero
obj.put("pos", .int_(7), alloc); // small positive int
// s64 MIN: its magnitude (9223372036854775808) is not a representable
// positive s64 literal, so build it from MAX-positive minus one.
obj.put("min", .int_(0 - 9223372036854775807 - 1), alloc);
obj.put("max", .int_(9223372036854775807), alloc); // s64 MAX
obj.put("ok", .bool_(true), alloc);
obj.put("nil", .null_, alloc);
obj.put("xs", .array(xs), alloc);
obj.put("nested", .object(nested), alloc);
return .object(obj);
}
main :: () -> ! {
gpa := GPA.init();
arena := Arena.init(xx gpa, 4096);
defer arena.deinit();
root := build(xx arena);
// 1. Write into a caller buffer; assert exact bytes + byte count.
buf : [512]u8 = ---;
n := try write_to_buffer(root, string.{ ptr = @buf[0], len = 512 });
view := string.{ ptr = @buf[0], len = n };
print("doc: {}\n", view);
report("buffer-exact", view == EXPECT);
report("buffer-len", n == EXPECT.len);
// 2. A buffer that is one byte too small must raise Overflow.
tight : []u8 = string.{ ptr = @buf[256], len = EXPECT.len - 1 };
_, oerr := write_to_buffer(root, tight);
report("overflow-raised", oerr == error.Overflow);
// 3. Stream to a file through a tiny staging buffer (forces flushes);
// read it back and assert it equals the same document. Write into the
// repo-local, gitignored scratch dir and unlink afterwards so nothing
// leaks and concurrent runs don't fight over a shared /tmp name.
if !create_dir_all(".sx-tmp") { print("mkdir: FAIL\n"); return; }
path := ".sx-tmp/sx_0713_json.json";
fh := open_file(path, .write);
if fh == null { print("open: FAIL\n"); return; }
f := fh!;
stage : [8]u8 = ---;
try write_to_file(root, @f, string.{ ptr = @stage[0], len = 8 });
f.close();
back := read_file(path);
delete_file(path);
if back == null { print("file-read: FAIL\n"); return; }
report("file-exact", back! == EXPECT);
return;
}

View File

@@ -0,0 +1 @@
0

View File

@@ -0,0 +1,5 @@
doc: {"name":"a\"b\n","tab":"x\ty","bs":"c\\d","ctrl":"\u0001","n":-7,"zero":0,"pos":7,"min":-9223372036854775808,"max":9223372036854775807,"ok":true,"nil":null,"xs":[1,-2,3],"nested":{"k":"v"}}
buffer-exact: ok
buffer-len: ok
overflow-raised: ok
file-exact: ok

335
library/modules/std/json.sx Normal file
View File

@@ -0,0 +1,335 @@
// =====================================================================
// json.sx — JSON value model + writer (stable key order), pure sx.
//
// This module delivers the JSON VALUE MODEL and the WRITER. The reader
// (parser) lands separately; this file never reads JSON text.
//
// NUMBERS ARE INTEGERS ONLY (s64) for this milestone — there is no
// fraction or exponent. A JSON value is one of: null, bool, integer,
// string, array, object.
//
// STABLE KEY ORDER: an object is NOT a hash map. It is an ORDERED list
// of (key, value) pairs that preserves INSERTION ORDER. Keys are never
// sorted and never reordered to deduplicate — the order you `put` them
// in is the order the writer emits them. This is the "stable key order"
// guarantee the manifest / db.json rely on.
//
// HEAP DISCIPLINE (binding, see heap-discipline.md):
// - Scalars (null / bool / int) carry no heap.
// - String values are VIEWS (`string`) into caller-owned memory; the
// node never copies the input bytes.
// - Composite nodes (array / object) hold unbounded children, so they
// genuinely need dynamic storage — but every allocation goes through
// an EXPLICIT `allocator` parameter on the builder method
// (`arr.add(v, alloc)` / `obj.put(key, val, alloc)`, mirroring
// `List.append`). Allocation NEVER falls back to the implicit
// context allocator silently.
// - The WRITER adds ZERO output allocations. It emits into a
// CALLER-PROVIDED sink: either a fixed `[]u8` buffer the caller owns
// (overflow is reported, never silently truncated) or, streaming,
// straight to an `fs.File` through a small caller-provided staging
// buffer — so the db.json path holds no whole-document string in
// memory (peak memory is O(staging), not O(document)). Integer
// digits are formatted in a stack `[20]u8`. Sink/IO/overflow
// failures surface on the error channel (`!JsonError`).
//
// Building a value (`#import "modules/std/json.sx";` brings these in):
//
// obj : Object = .{};
// obj.put("name", .str(name_view), alloc); // name_view: a `string`
// obj.put("size", .int_(123), alloc);
// arr : Array = .{};
// arr.add(.int_(1), alloc);
// obj.put("xs", .array(arr), alloc);
// root : Value = .object(obj);
//
// Writing into a caller buffer (returns bytes written; raises on overflow):
//
// out : [4096]u8 = ---;
// n := try write_to_buffer(root, string.{ ptr = @out[0], len = 4096 });
//
// Streaming straight to a file (no whole-document string):
//
// f := open_file("db.json\0", .write)!;
// stage : [4096]u8 = ---;
// try write_to_file(root, @f, string.{ ptr = @stage[0], len = 4096 });
// f.close();
// =====================================================================
#import "modules/std.sx";
#import "modules/fs.sx";
// The writer's failure contract: a too-small caller buffer (Overflow) or
// a short/failed file write (Io). Surfaced on the error channel — never a
// silent truncation or default.
JsonError :: error { Overflow, Io }
// ── Value model ──────────────────────────────────────────────────────
//
// `Value` is a tagged union over the six JSON kinds. `null_`/`bool_`/
// `int_` are scalars; `str` is a VIEW into caller memory; `array` and
// `object` own growable child storage (see Array / Object).
//
// `Value` is defined before Array / Object so its `[*]Value` /
// `[*]Member` back-references resolve; Array / Object refer back to
// `Value` in turn (mutual recursion through pointers — each composite
// holds a pointer to its children, so the layout is finite).
Value :: enum {
null_;
bool_: bool;
int_: s64;
str: string; // view into caller-owned bytes; not copied
array: Array;
object: Object;
}
// One ordered object entry. `key` is a view (not copied); `val` is owned
// by value inside the object's backing store.
Member :: struct {
key: string;
val: Value;
}
// Ordered list of values. Same growable layout as `List`, but concrete
// (not generic) so it can be an enum payload, and its builder takes the
// allocator explicitly.
Array :: struct {
items: [*]Value = null;
len: s64 = 0;
cap: s64 = 0;
// Append `v`, preserving order. Grows the backing store through the
// explicit `alloc` when full (doubling), freeing the old buffer.
add :: (self: *Array, v: Value, alloc: Allocator) {
if self.len >= self.cap {
new_cap := if self.cap == 0 then 4 else self.cap * 2;
new_items : [*]Value = xx alloc.alloc(new_cap * size_of(Value));
if self.len > 0 {
memcpy(new_items, self.items, self.len * size_of(Value));
alloc.dealloc(self.items);
}
self.items = new_items;
self.cap = new_cap;
}
self.items[self.len] = v;
self.len += 1;
}
deinit :: (self: *Array, alloc: Allocator) {
if self.items != null { alloc.dealloc(self.items); }
self.items = null;
self.len = 0;
self.cap = 0;
}
}
// Ordered list of (key, value) pairs — INSERTION ORDER preserved, never
// sorted, never deduplicated. Growable backing store through the
// explicit `alloc`.
Object :: struct {
items: [*]Member = null;
len: s64 = 0;
cap: s64 = 0;
// Append a (key, val) pair at the end. Does not check for or merge a
// duplicate key — insertion order is the contract; a repeated key is
// emitted twice, in the order added.
put :: (self: *Object, key: string, v: Value, alloc: Allocator) {
if self.len >= self.cap {
new_cap := if self.cap == 0 then 4 else self.cap * 2;
new_items : [*]Member = xx alloc.alloc(new_cap * size_of(Member));
if self.len > 0 {
memcpy(new_items, self.items, self.len * size_of(Member));
alloc.dealloc(self.items);
}
self.items = new_items;
self.cap = new_cap;
}
self.items[self.len] = Member.{ key = key, val = v };
self.len += 1;
}
deinit :: (self: *Object, alloc: Allocator) {
if self.items != null { alloc.dealloc(self.items); }
self.items = null;
self.len = 0;
self.cap = 0;
}
}
// ── Sink ─────────────────────────────────────────────────────────────
//
// A single concrete output sink with two modes, chosen by `file`:
// - BUFFER mode (`file == null`): bytes land directly in the caller's
// `dst`; when `dst` fills, `put`/`put_byte` raise `error.Overflow`.
// `pos` is the running byte count.
// - FILE mode (`file != null`): `dst` is a caller-provided STAGING
// buffer; when it fills it is flushed to `file` and reused, so peak
// memory is O(dst) regardless of document size. `flush()` writes any
// remaining staged bytes. The staging buffer must be non-empty.
//
// The sink owns NO heap and holds NO whole-document string.
Sink :: struct {
dst: []u8; // caller-owned destination (buffer mode) or staging (file mode)
pos: s64 = 0; // bytes currently in `dst`
file: *File = null; // null => buffer mode
put_byte :: (self: *Sink, b: u8) -> !JsonError {
if self.pos >= self.dst.len {
if self.file == null { raise error.Overflow; }
try self.flush();
}
self.dst[self.pos] = b;
self.pos += 1;
return;
}
put :: (self: *Sink, bytes: string) -> !JsonError {
i := 0;
while i < bytes.len {
try self.put_byte(bytes[i]);
i += 1;
}
return;
}
// File mode: write staged bytes and reset. Buffer mode: no-op.
flush :: (self: *Sink) -> !JsonError {
if self.file == null { return; }
if self.pos == 0 { return; }
n := self.file.write(string.{ ptr = @self.dst[0], len = self.pos });
if n != self.pos { raise error.Io; }
self.pos = 0;
return;
}
}
// ── Writer ───────────────────────────────────────────────────────────
// Lowercase-hex ASCII byte for a 0..15 nibble. 48='0', 97='a'.
hex_digit :: (n: s64) -> u8 {
if n < 10 then xx (n + 48) else xx (n - 10 + 97)
}
// `\u00XX` escape for a control byte (< 0x20). Two hex nibbles; the high
// byte of the code point is always 0x00 here.
write_u_escape :: (c: u8, sink: *Sink) -> !JsonError {
try sink.put_byte(92); // backslash
try sink.put_byte(117); // 'u'
try sink.put_byte(48); // '0'
try sink.put_byte(48); // '0'
try sink.put_byte(hex_digit((cast(s64) c >> 4) & 0xF));
try sink.put_byte(hex_digit(cast(s64) c & 0xF));
return;
}
// Emit a JSON string: opening quote, escaped body, closing quote. Escapes
// quote, backslash, the named control shorthands (\b \t \n \f \r), and
// any other control byte (< 0x20) as `\u00XX`. Bytes >= 0x20 (including
// UTF-8 continuation bytes) pass through unchanged.
write_string :: (s: string, sink: *Sink) -> !JsonError {
try sink.put_byte(34); // opening quote
i := 0;
while i < s.len {
c := s[i];
if c == 34 { try sink.put_byte(92); try sink.put_byte(34); } // \"
else if c == 92 { try sink.put_byte(92); try sink.put_byte(92); } // \\
else if c == 8 { try sink.put_byte(92); try sink.put_byte(98); } // \b
else if c == 9 { try sink.put_byte(92); try sink.put_byte(116); } // \t
else if c == 10 { try sink.put_byte(92); try sink.put_byte(110); } // \n
else if c == 12 { try sink.put_byte(92); try sink.put_byte(102); } // \f
else if c == 13 { try sink.put_byte(92); try sink.put_byte(114); } // \r
else if c < 32 { try write_u_escape(c, sink); }
else { try sink.put_byte(c); }
i += 1;
}
try sink.put_byte(34); // closing quote
return;
}
// Emit a signed integer in decimal, no allocation. Digits are formed in a
// stack buffer working in NEGATIVE space so s64 MIN
// (-9223372036854775808) — whose magnitude is not representable as a
// positive s64 — serializes correctly.
write_int :: (n: s64, sink: *Sink) -> !JsonError {
if n == 0 { try sink.put_byte(48); return; }
tmp : [20]u8 = ---; // 19 digits + sign is the s64 worst case
neg := n < 0;
v := n;
if !neg { v = 0 - n; } // fold positives into negative space
i := 20;
while v < 0 {
i -= 1;
d := 0 - (v % 10); // sx `%` keeps the dividend's sign; this is 0..9
tmp[i] = xx (d + 48);
v = v / 10; // truncates toward zero
}
if neg { i -= 1; tmp[i] = 45; } // '-'
try sink.put(string.{ ptr = @tmp[i], len = 20 - i });
return;
}
// Serialize one value into `sink`. Recurses for arrays / objects.
write_value :: (v: Value, sink: *Sink) -> !JsonError {
if v == {
case .null_: try sink.put("null");
case .bool_: try sink.put(if v.bool_ then "true" else "false");
case .int_: try write_int(v.int_, sink);
case .str: try write_string(v.str, sink);
case .array: try write_array(v.array, sink);
case .object: try write_object(v.object, sink);
}
return;
}
write_array :: (arr: Array, sink: *Sink) -> !JsonError {
try sink.put_byte(91); // '['
i := 0;
while i < arr.len {
if i > 0 { try sink.put_byte(44); } // ','
try write_value(arr.items[i], sink);
i += 1;
}
try sink.put_byte(93); // ']'
return;
}
// Emits members in stored order — the insertion order guarantee.
write_object :: (obj: Object, sink: *Sink) -> !JsonError {
try sink.put_byte(123); // '{'
i := 0;
while i < obj.len {
if i > 0 { try sink.put_byte(44); } // ','
try write_string(obj.items[i].key, sink);
try sink.put_byte(58); // ':'
try write_value(obj.items[i].val, sink);
i += 1;
}
try sink.put_byte(125); // '}'
return;
}
// ── Public entry points ──────────────────────────────────────────────
// Serialize `v` into the caller-owned buffer `dst`. Returns the number of
// bytes written. Raises `error.Overflow` if `dst` is too small (the
// partial contents of `dst` are then undefined — nothing is truncated
// silently). No allocation.
write_to_buffer :: (v: Value, dst: []u8) -> (s64, !JsonError) {
sink := Sink.{ dst = dst };
try write_value(v, @sink);
return sink.pos;
}
// Serialize `v` straight to an open `file`, staging through the caller-
// owned `staging` buffer (which must be non-empty). No whole-document
// string is ever held — peak extra memory is O(staging). Raises
// `error.Io` on a short/failed write. No allocation.
write_to_file :: (v: Value, file: *File, staging: []u8) -> !JsonError {
sink := Sink.{ dst = staging, file = file };
try write_value(v, @sink);
try sink.flush();
return;
}