225 lines
11 KiB
Plaintext
225 lines
11 KiB
Plaintext
// Comprehensive pinned suite for `modules/std/json.sx` (writer F2.1 +
|
||
// reader F2.2). Mirrors what 0711 did for std.hash: it LOCKS IN the full
|
||
// round-trip and the complete malformed-input matrix as one coherent
|
||
// pinned example. (0713/0714 stay as the focused writer/reader demos with
|
||
// their heap-discipline narrative; this file is the correctness lock-in.)
|
||
//
|
||
// PART A — ROUND-TRIP. Build a representative document covering EVERY
|
||
// value kind (nested object + array, a string carrying every escape
|
||
// kind `\" \\ \b \f \n \r \t` and a `\u00XX` control, integers 0 /
|
||
// small-negative / s64 MIN (-9223372036854775808) / s64 MAX
|
||
// (9223372036854775807), bool, null) through an explicit Arena, then
|
||
// `build -> write -> parse -> write`: assert the writer's EXACT bytes,
|
||
// assert `parse` then re-`write` reproduces them (idempotent), and
|
||
// spot-check the parsed tree's STRUCTURE incl. INSERTION ORDER.
|
||
// PART B — DECODE POSITIVES. `\/`, the full named-escape set, `\uXXXX`
|
||
// (BMP 1- and 2-byte) and a SURROGATE PAIR, the escaped control forms,
|
||
// and raw multi-byte UTF-8 round-tripping through writer + reader.
|
||
// PART C — MALFORMED MATRIX. One assertion per `JsonParseError` variant
|
||
// and its key edges, each asserted to RAISE (never crash, never accept).
|
||
//
|
||
// Every model is built through an explicit Arena allocator (heap
|
||
// discipline): scalars carry no heap, string values are views, composites
|
||
// and decoded strings go through `alloc`, and the writer allocates nothing.
|
||
|
||
#import "modules/std.sx";
|
||
#import "modules/std/mem.sx"; // `Allocator` is non-transitive: name it, import it.
|
||
#import "modules/std/json.sx";
|
||
|
||
// The writer's EXACT output for the PART A document (insertion order,
|
||
// canonical escaping). Hand-pinned so a writer regression fails loudly in
|
||
// the example itself, not only in the captured golden.
|
||
EXPECT :: "{\"esc\":\"\\\"\\\\\\b\\t\\n\\f\\r\\u0001\",\"zero\":0,\"neg\":-7,\"min\":-9223372036854775808,\"max\":9223372036854775807,\"ok\":true,\"nil\":null,\"xs\":[1,-2,3],\"nested\":{\"k\":\"v\"}}";
|
||
|
||
report :: (label: string, ok: bool) {
|
||
if ok { print("{}: ok\n", label); } else { print("{}: FAIL\n", label); }
|
||
}
|
||
|
||
// Half-open containment [lo, hi).
|
||
in_range :: (x: s64, lo: s64, hi: s64) -> bool {
|
||
return x >= lo and x < hi;
|
||
}
|
||
|
||
// True when `parse(src)` raised exactly `want`. Destructure captures the
|
||
// error tag without `try`, so a malformed input never aborts the example.
|
||
raises :: (src: string, want: JsonParseError, alloc: Allocator) -> bool {
|
||
_, e := parse(src, alloc);
|
||
e == want
|
||
}
|
||
|
||
// True when parsing `"a<b>b"` (a string holding the RAW control byte `b`)
|
||
// raises BadControlChar. Built from a byte buffer because a raw control
|
||
// byte can't appear in an sx string literal.
|
||
ctrl_raises :: (b: u8, alloc: Allocator) -> bool {
|
||
raw : [5]u8 = ---;
|
||
raw[0] = 34; raw[1] = 97; raw[2] = b; raw[3] = 98; raw[4] = 34; // "a<b>b"
|
||
return raises(string.{ ptr = @raw[0], len = 5 }, error.BadControlChar, alloc);
|
||
}
|
||
|
||
// Build the PART A document: every value kind, in the insertion order the
|
||
// writer must emit. The `esc` value carries one byte per escape kind; its
|
||
// bytes are backed by `alloc` (string values are VIEWS, so they must
|
||
// outlive `build`).
|
||
build :: (alloc: Allocator) -> Value {
|
||
ebytes : [*]u8 = xx alloc.alloc_bytes(8);
|
||
ebytes[0] = 34; // " -> \"
|
||
ebytes[1] = 92; // \ -> \\
|
||
ebytes[2] = 8; // BS -> \b
|
||
ebytes[3] = 9; // TAB -> \t
|
||
ebytes[4] = 10; // LF -> \n
|
||
ebytes[5] = 12; // FF -> \f
|
||
ebytes[6] = 13; // CR -> \r
|
||
ebytes[7] = 1; // SOH -> (control with no named shorthand)
|
||
esc := string.{ ptr = ebytes, len = 8 };
|
||
|
||
nested : Object = .{};
|
||
nested.put("k", .str("v"), alloc);
|
||
|
||
xs : Array = .{};
|
||
xs.add(.int_(1), alloc);
|
||
xs.add(.int_(0 - 2), alloc);
|
||
xs.add(.int_(3), alloc);
|
||
|
||
obj : Object = .{};
|
||
obj.put("esc", .str(esc), alloc);
|
||
obj.put("zero", .int_(0), alloc);
|
||
obj.put("neg", .int_(0 - 7), alloc);
|
||
// s64 MIN: |MIN| is not a representable positive s64 literal, so build
|
||
// it as MAX-positive minus one.
|
||
obj.put("min", .int_(0 - 9223372036854775807 - 1), alloc);
|
||
obj.put("max", .int_(9223372036854775807), alloc);
|
||
obj.put("ok", .bool_(true), alloc);
|
||
obj.put("nil", .null_, alloc);
|
||
obj.put("xs", .array(xs), alloc);
|
||
obj.put("nested", .object(nested), alloc);
|
||
return .object(obj);
|
||
}
|
||
|
||
main :: () -> ! {
|
||
gpa := GPA.init();
|
||
arena := Arena.init(xx gpa, 16384);
|
||
defer arena.deinit();
|
||
a : Allocator = xx arena;
|
||
|
||
// ── PART A. build -> write -> parse -> write ─────────────────────────
|
||
root := build(a);
|
||
|
||
buf : [512]u8 = ---;
|
||
n := try write_to_buffer(root, string.{ ptr = @buf[0], len = 512 });
|
||
canon := string.{ ptr = @buf[0], len = n };
|
||
print("doc: {}\n", canon); // golden pins the exact bytes
|
||
report("rt-exact", canon == EXPECT);
|
||
report("rt-len", n == EXPECT.len);
|
||
|
||
// parse the writer's output, then re-serialize: must reproduce it byte
|
||
// for byte (writer/reader are inverses on the canonical form).
|
||
tree2 := try parse(canon, a);
|
||
buf2 : [512]u8 = ---;
|
||
n2 := try write_to_buffer(tree2, string.{ ptr = @buf2[0], len = 512 });
|
||
canon2 := string.{ ptr = @buf2[0], len = n2 };
|
||
report("rt-idempotent", canon2 == canon);
|
||
|
||
// Structure of the parsed tree: insertion order + every value kind.
|
||
o := tree2.object;
|
||
report("st-count", o.len == 9);
|
||
report("st-order",
|
||
o.items[0].key == "esc" and o.items[1].key == "zero" and
|
||
o.items[2].key == "neg" and o.items[3].key == "min" and
|
||
o.items[4].key == "max" and o.items[5].key == "ok" and
|
||
o.items[6].key == "nil" and o.items[7].key == "xs" and
|
||
o.items[8].key == "nested");
|
||
// The escaped string survives the round-trip back to its 8 raw bytes.
|
||
eexp : [8]u8 = ---;
|
||
eexp[0] = 34; eexp[1] = 92; eexp[2] = 8; eexp[3] = 9;
|
||
eexp[4] = 10; eexp[5] = 12; eexp[6] = 13; eexp[7] = 1;
|
||
report("st-esc", o.items[0].val.str == string.{ ptr = @eexp[0], len = 8 });
|
||
report("st-zero", o.items[1].val.int_ == 0);
|
||
report("st-neg", o.items[2].val.int_ == 0 - 7);
|
||
report("st-min", o.items[3].val.int_ == 0 - 9223372036854775807 - 1);
|
||
report("st-max", o.items[4].val.int_ == 9223372036854775807);
|
||
report("st-bool", o.items[5].val.bool_ == true);
|
||
is_null := if o.items[6].val == { case .null_: true; else: false; };
|
||
report("st-null", is_null);
|
||
xs := o.items[7].val.array;
|
||
report("st-xs", xs.len == 3 and xs.items[0].int_ == 1 and
|
||
xs.items[1].int_ == 0 - 2 and xs.items[2].int_ == 3);
|
||
sub := o.items[8].val.object;
|
||
report("st-nested", sub.len == 1 and sub.items[0].key == "k" and
|
||
sub.items[0].val.str == "v");
|
||
|
||
// ── PART B. decode positives ─────────────────────────────────────────
|
||
// `\/` decodes to a bare slash (the writer emits it unescaped, so this
|
||
// is a parse-only form).
|
||
slash := try parse("\"\\/\"", a);
|
||
report("dec-slash", slash.str == "/");
|
||
|
||
// The full named-escape set in one string: \" \\ \/ \b \f \n \r \t.
|
||
esc := try parse("\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"", a);
|
||
sexp : [8]u8 = ---;
|
||
sexp[0] = 34; sexp[1] = 92; sexp[2] = 47; sexp[3] = 8;
|
||
sexp[4] = 12; sexp[5] = 10; sexp[6] = 13; sexp[7] = 9;
|
||
report("dec-escapes", esc.str == string.{ ptr = @sexp[0], len = 8 });
|
||
|
||
// \uXXXX: BMP 1-byte (A), BMP 2-byte (é), and a SURROGATE PAIR (😀).
|
||
// "Aé😀" -> 41 | C3 A9 | F0 9F 98 80 (7 bytes).
|
||
uni := try parse("\"\\u0041\\u00e9\\uD83D\\uDE00\"", a);
|
||
uexp : [7]u8 = ---;
|
||
uexp[0] = 0x41; uexp[1] = 0xC3; uexp[2] = 0xA9;
|
||
uexp[3] = 0xF0; uexp[4] = 0x9F; uexp[5] = 0x98; uexp[6] = 0x80;
|
||
report("dec-surrogate", uni.str == string.{ ptr = @uexp[0], len = 7 });
|
||
|
||
// POSITIVE counterpart to BadControlChar: the ESCAPED control forms
|
||
// backslash-t, backslash-n and backslash-u-0009 decode to 09 0A 09.
|
||
ectrl := try parse("\"\\t\\n\\u0009\"", a);
|
||
cexp : [3]u8 = ---;
|
||
cexp[0] = 9; cexp[1] = 10; cexp[2] = 9;
|
||
report("dec-esc-ctrl", ectrl.str == string.{ ptr = @cexp[0], len = 3 });
|
||
|
||
// Raw multi-byte UTF-8 (>= 0x80) round-trips writer -> reader unchanged.
|
||
ubytes : [*]u8 = xx a.alloc_bytes(7);
|
||
ubytes[0] = 0x41; ubytes[1] = 0xC3; ubytes[2] = 0xA9;
|
||
ubytes[3] = 0xF0; ubytes[4] = 0x9F; ubytes[5] = 0x98; ubytes[6] = 0x80;
|
||
uval : Value = .str(string.{ ptr = ubytes, len = 7 });
|
||
ubuf : [64]u8 = ---;
|
||
un := try write_to_buffer(uval, string.{ ptr = @ubuf[0], len = 64 });
|
||
uback := try parse(string.{ ptr = @ubuf[0], len = un }, a);
|
||
report("rt-utf8", uback.str == string.{ ptr = @ubytes[0], len = 7 });
|
||
|
||
// ── PART C. malformed-input matrix — one assertion per variant + edge ─
|
||
// UnexpectedToken: bad literal, non-string key, missing comma.
|
||
report("err-token-literal", raises("xyz", error.UnexpectedToken, a));
|
||
report("err-token-key", raises("{1:2}", error.UnexpectedToken, a));
|
||
report("err-token-comma", raises("[1 2]", error.UnexpectedToken, a));
|
||
|
||
// UnexpectedEnd: truncated object / array / string.
|
||
report("err-end-object", raises("{\"a\":", error.UnexpectedEnd, a));
|
||
report("err-end-array", raises("[1,", error.UnexpectedEnd, a));
|
||
report("err-end-string", raises("\"abc", error.UnexpectedEnd, a));
|
||
|
||
// BadEscape: unknown escape, non-hex \u, high surrogate not followed by
|
||
// a low surrogate.
|
||
report("err-esc-unknown", raises("\"a\\xb\"", error.BadEscape, a));
|
||
report("err-esc-bad-hex", raises("\"\\uZZZZ\"", error.BadEscape, a));
|
||
report("err-esc-surrogate", raises("\"\\uD83D\\u0041\"", error.BadEscape, a));
|
||
|
||
// BadNumber: leading zero, lone minus, fraction, exponent, and an
|
||
// integer just past s64 MAX (overflow).
|
||
report("err-num-leadzero", raises("01", error.BadNumber, a));
|
||
report("err-num-lonedash", raises("-", error.BadNumber, a));
|
||
report("err-num-fraction", raises("1.5", error.BadNumber, a));
|
||
report("err-num-exponent", raises("1e9", error.BadNumber, a));
|
||
report("err-num-overflow", raises("9223372036854775808", error.BadNumber, a));
|
||
|
||
// TrailingGarbage: junk after a complete value.
|
||
report("err-trail-array", raises("[1,2] x", error.TrailingGarbage, a));
|
||
report("err-trail-scalar", raises("null x", error.TrailingGarbage, a));
|
||
|
||
// BadControlChar: a raw control byte (< 0x20) inside a string.
|
||
report("err-ctrl-tab", ctrl_raises(9, a)); // raw 0x09
|
||
report("err-ctrl-lf", ctrl_raises(10, a)); // raw 0x0A
|
||
report("err-ctrl-nul", ctrl_raises(0, a)); // raw 0x00
|
||
|
||
print("=== DONE ===\n");
|
||
return;
|
||
}
|