Files
sx/examples/0715-modules-json-suite.sx
agra 1905d35507 F2.3: pin std.json round-trip + malformed-input suite (examples/0715)
Add 0715-modules-json-suite as the single comprehensive pinned suite for
std.json (mirrors 0711 for std.hash), alongside the focused 0713/0714 demos:

- ROUND-TRIP build->write->parse->write over a document covering EVERY value
  kind (a string with every escape form \" \\ \b \f \n \r \t plus a \u00XX
  control, integers 0 / negative / s64 MIN / s64 MAX, bool, null, array,
  nested object) with insertion-order assertions, exact writer bytes, and
  parse-then-rewrite idempotence.
- DECODE positives: \/, the full named-escape set, \uXXXX (BMP 1- and 2-byte)
  plus a surrogate pair, the escaped control forms, and raw multi-byte UTF-8
  round-tripping through writer + reader.
- MALFORMED matrix: one assertion per JsonParseError variant and its key
  edges (UnexpectedToken, UnexpectedEnd, BadEscape, BadNumber incl. leading
  zero / lone '-' / fraction / exponent / overflow, TrailingGarbage,
  BadControlChar), each asserted to raise.

Pure test work: src/ and library/ untouched, no json.sx change needed. Every
model is built through an explicit Arena allocator (heap discipline).
2026-06-04 02:57:32 +03:00

224 lines
11 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Comprehensive pinned suite for `modules/std/json.sx` (writer F2.1 +
// reader F2.2). Mirrors what 0711 did for std.hash: it LOCKS IN the full
// round-trip and the complete malformed-input matrix as one coherent
// pinned example. (0713/0714 stay as the focused writer/reader demos with
// their heap-discipline narrative; this file is the correctness lock-in.)
//
// PART A — ROUND-TRIP. Build a representative document covering EVERY
// value kind (nested object + array, a string carrying every escape
// kind `\" \\ \b \f \n \r \t` and a `\u00XX` control, integers 0 /
// small-negative / s64 MIN (-9223372036854775808) / s64 MAX
// (9223372036854775807), bool, null) through an explicit Arena, then
// `build -> write -> parse -> write`: assert the writer's EXACT bytes,
// assert `parse` then re-`write` reproduces them (idempotent), and
// spot-check the parsed tree's STRUCTURE incl. INSERTION ORDER.
// PART B — DECODE POSITIVES. `\/`, the full named-escape set, `\uXXXX`
// (BMP 1- and 2-byte) and a SURROGATE PAIR, the escaped control forms,
// and raw multi-byte UTF-8 round-tripping through writer + reader.
// PART C — MALFORMED MATRIX. One assertion per `JsonParseError` variant
// and its key edges, each asserted to RAISE (never crash, never accept).
//
// Every model is built through an explicit Arena allocator (heap
// discipline): scalars carry no heap, string values are views, composites
// and decoded strings go through `alloc`, and the writer allocates nothing.
#import "modules/std.sx";
#import "modules/std/json.sx";
// The writer's EXACT output for the PART A document (insertion order,
// canonical escaping). Hand-pinned so a writer regression fails loudly in
// the example itself, not only in the captured golden.
EXPECT :: "{\"esc\":\"\\\"\\\\\\b\\t\\n\\f\\r\\u0001\",\"zero\":0,\"neg\":-7,\"min\":-9223372036854775808,\"max\":9223372036854775807,\"ok\":true,\"nil\":null,\"xs\":[1,-2,3],\"nested\":{\"k\":\"v\"}}";
report :: (label: string, ok: bool) {
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
}
// Half-open containment [lo, hi).
in_range :: (x: s64, lo: s64, hi: s64) -> bool {
return x >= lo and x < hi;
}
// True when `parse(src)` raised exactly `want`. Destructure captures the
// error tag without `try`, so a malformed input never aborts the example.
raises :: (src: string, want: JsonParseError, alloc: Allocator) -> bool {
_, e := parse(src, alloc);
e == want
}
// True when parsing `"a<b>b"` (a string holding the RAW control byte `b`)
// raises BadControlChar. Built from a byte buffer because a raw control
// byte can't appear in an sx string literal.
ctrl_raises :: (b: u8, alloc: Allocator) -> bool {
raw : [5]u8 = ---;
raw[0] = 34; raw[1] = 97; raw[2] = b; raw[3] = 98; raw[4] = 34; // "a<b>b"
return raises(string.{ ptr = @raw[0], len = 5 }, error.BadControlChar, alloc);
}
// Build the PART A document: every value kind, in the insertion order the
// writer must emit. The `esc` value carries one byte per escape kind; its
// bytes are backed by `alloc` (string values are VIEWS, so they must
// outlive `build`).
build :: (alloc: Allocator) -> Value {
ebytes : [*]u8 = xx alloc.alloc(8);
ebytes[0] = 34; // " -> \"
ebytes[1] = 92; // \ -> \\
ebytes[2] = 8; // BS -> \b
ebytes[3] = 9; // TAB -> \t
ebytes[4] = 10; // LF -> \n
ebytes[5] = 12; // FF -> \f
ebytes[6] = 13; // CR -> \r
ebytes[7] = 1; // SOH ->  (control with no named shorthand)
esc := string.{ ptr = ebytes, len = 8 };
nested : Object = .{};
nested.put("k", .str("v"), alloc);
xs : Array = .{};
xs.add(.int_(1), alloc);
xs.add(.int_(0 - 2), alloc);
xs.add(.int_(3), alloc);
obj : Object = .{};
obj.put("esc", .str(esc), alloc);
obj.put("zero", .int_(0), alloc);
obj.put("neg", .int_(0 - 7), alloc);
// s64 MIN: |MIN| is not a representable positive s64 literal, so build
// it as MAX-positive minus one.
obj.put("min", .int_(0 - 9223372036854775807 - 1), alloc);
obj.put("max", .int_(9223372036854775807), alloc);
obj.put("ok", .bool_(true), alloc);
obj.put("nil", .null_, alloc);
obj.put("xs", .array(xs), alloc);
obj.put("nested", .object(nested), alloc);
return .object(obj);
}
main :: () -> ! {
gpa := GPA.init();
arena := Arena.init(xx gpa, 16384);
defer arena.deinit();
a : Allocator = xx arena;
// ── PART A. build -> write -> parse -> write ─────────────────────────
root := build(a);
buf : [512]u8 = ---;
n := try write_to_buffer(root, string.{ ptr = @buf[0], len = 512 });
canon := string.{ ptr = @buf[0], len = n };
print("doc: {}\n", canon); // golden pins the exact bytes
report("rt-exact", canon == EXPECT);
report("rt-len", n == EXPECT.len);
// parse the writer's output, then re-serialize: must reproduce it byte
// for byte (writer/reader are inverses on the canonical form).
tree2 := try parse(canon, a);
buf2 : [512]u8 = ---;
n2 := try write_to_buffer(tree2, string.{ ptr = @buf2[0], len = 512 });
canon2 := string.{ ptr = @buf2[0], len = n2 };
report("rt-idempotent", canon2 == canon);
// Structure of the parsed tree: insertion order + every value kind.
o := tree2.object;
report("st-count", o.len == 9);
report("st-order",
o.items[0].key == "esc" and o.items[1].key == "zero" and
o.items[2].key == "neg" and o.items[3].key == "min" and
o.items[4].key == "max" and o.items[5].key == "ok" and
o.items[6].key == "nil" and o.items[7].key == "xs" and
o.items[8].key == "nested");
// The escaped string survives the round-trip back to its 8 raw bytes.
eexp : [8]u8 = ---;
eexp[0] = 34; eexp[1] = 92; eexp[2] = 8; eexp[3] = 9;
eexp[4] = 10; eexp[5] = 12; eexp[6] = 13; eexp[7] = 1;
report("st-esc", o.items[0].val.str == string.{ ptr = @eexp[0], len = 8 });
report("st-zero", o.items[1].val.int_ == 0);
report("st-neg", o.items[2].val.int_ == 0 - 7);
report("st-min", o.items[3].val.int_ == 0 - 9223372036854775807 - 1);
report("st-max", o.items[4].val.int_ == 9223372036854775807);
report("st-bool", o.items[5].val.bool_ == true);
is_null := if o.items[6].val == { case .null_: true; else: false; };
report("st-null", is_null);
xs := o.items[7].val.array;
report("st-xs", xs.len == 3 and xs.items[0].int_ == 1 and
xs.items[1].int_ == 0 - 2 and xs.items[2].int_ == 3);
sub := o.items[8].val.object;
report("st-nested", sub.len == 1 and sub.items[0].key == "k" and
sub.items[0].val.str == "v");
// ── PART B. decode positives ─────────────────────────────────────────
// `\/` decodes to a bare slash (the writer emits it unescaped, so this
// is a parse-only form).
slash := try parse("\"\\/\"", a);
report("dec-slash", slash.str == "/");
// The full named-escape set in one string: \" \\ \/ \b \f \n \r \t.
esc := try parse("\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"", a);
sexp : [8]u8 = ---;
sexp[0] = 34; sexp[1] = 92; sexp[2] = 47; sexp[3] = 8;
sexp[4] = 12; sexp[5] = 10; sexp[6] = 13; sexp[7] = 9;
report("dec-escapes", esc.str == string.{ ptr = @sexp[0], len = 8 });
// \uXXXX: BMP 1-byte (A), BMP 2-byte (é), and a SURROGATE PAIR (😀).
// "Aé😀" -> 41 | C3 A9 | F0 9F 98 80 (7 bytes).
uni := try parse("\"\\u0041\\u00e9\\uD83D\\uDE00\"", a);
uexp : [7]u8 = ---;
uexp[0] = 0x41; uexp[1] = 0xC3; uexp[2] = 0xA9;
uexp[3] = 0xF0; uexp[4] = 0x9F; uexp[5] = 0x98; uexp[6] = 0x80;
report("dec-surrogate", uni.str == string.{ ptr = @uexp[0], len = 7 });
// POSITIVE counterpart to BadControlChar: the ESCAPED control forms
// backslash-t, backslash-n and backslash-u-0009 decode to 09 0A 09.
ectrl := try parse("\"\\t\\n\\u0009\"", a);
cexp : [3]u8 = ---;
cexp[0] = 9; cexp[1] = 10; cexp[2] = 9;
report("dec-esc-ctrl", ectrl.str == string.{ ptr = @cexp[0], len = 3 });
// Raw multi-byte UTF-8 (>= 0x80) round-trips writer -> reader unchanged.
ubytes : [*]u8 = xx a.alloc(7);
ubytes[0] = 0x41; ubytes[1] = 0xC3; ubytes[2] = 0xA9;
ubytes[3] = 0xF0; ubytes[4] = 0x9F; ubytes[5] = 0x98; ubytes[6] = 0x80;
uval : Value = .str(string.{ ptr = ubytes, len = 7 });
ubuf : [64]u8 = ---;
un := try write_to_buffer(uval, string.{ ptr = @ubuf[0], len = 64 });
uback := try parse(string.{ ptr = @ubuf[0], len = un }, a);
report("rt-utf8", uback.str == string.{ ptr = @ubytes[0], len = 7 });
// ── PART C. malformed-input matrix — one assertion per variant + edge ─
// UnexpectedToken: bad literal, non-string key, missing comma.
report("err-token-literal", raises("xyz", error.UnexpectedToken, a));
report("err-token-key", raises("{1:2}", error.UnexpectedToken, a));
report("err-token-comma", raises("[1 2]", error.UnexpectedToken, a));
// UnexpectedEnd: truncated object / array / string.
report("err-end-object", raises("{\"a\":", error.UnexpectedEnd, a));
report("err-end-array", raises("[1,", error.UnexpectedEnd, a));
report("err-end-string", raises("\"abc", error.UnexpectedEnd, a));
// BadEscape: unknown escape, non-hex \u, high surrogate not followed by
// a low surrogate.
report("err-esc-unknown", raises("\"a\\xb\"", error.BadEscape, a));
report("err-esc-bad-hex", raises("\"\\uZZZZ\"", error.BadEscape, a));
report("err-esc-surrogate", raises("\"\\uD83D\\u0041\"", error.BadEscape, a));
// BadNumber: leading zero, lone minus, fraction, exponent, and an
// integer just past s64 MAX (overflow).
report("err-num-leadzero", raises("01", error.BadNumber, a));
report("err-num-lonedash", raises("-", error.BadNumber, a));
report("err-num-fraction", raises("1.5", error.BadNumber, a));
report("err-num-exponent", raises("1e9", error.BadNumber, a));
report("err-num-overflow", raises("9223372036854775808", error.BadNumber, a));
// TrailingGarbage: junk after a complete value.
report("err-trail-array", raises("[1,2] x", error.TrailingGarbage, a));
report("err-trail-scalar", raises("null x", error.TrailingGarbage, a));
// BadControlChar: a raw control byte (< 0x20) inside a string.
report("err-ctrl-tab", ctrl_raises(9, a)); // raw 0x09
report("err-ctrl-lf", ctrl_raises(10, a)); // raw 0x0A
report("err-ctrl-nul", ctrl_raises(0, a)); // raw 0x00
print("=== DONE ===\n");
return;
}