diff --git a/library/modules/std/sched.sx b/library/modules/std/sched.sx index 97970b7a..60f337fd 100644 --- a/library/modules/std/sched.sx +++ b/library/modules/std/sched.sx @@ -613,7 +613,19 @@ impl Io for Scheduler { // `*void`; `arg` is its single payload pointer. Returns the `*Fiber` as the // opaque handle (the `Future` stashes it in `task`). The fiber inherits this // call's context (Phase 0), so the worker sees this scheduler as `context.io`. + // CONTRACT: `arg` must outlive the fiber's FIRST RUN (i.e. survive until + // `run`/`poll` schedules it), not merely until `spawn_raw` returns — the fiber + // dereferences it later, on its own stack. A stack local that dies before the + // driver loop runs is a use-after-free (the `async` layer keeps it alive by + // owning it on the heap `Future`). spawn_raw :: (self: *Scheduler, entry: *void, arg: *void, opts: SpawnOpts) -> *void { + // A null entry would jump to 0x0 once the fiber runs — bail loudly at the + // spawn site (where the caller is) instead of crashing deep in the loop. + // (A non-fn garbage pointer is undetectable — the caller owns that.) + if entry == null { + print("sched: spawn_raw called with a null entry function\n"); + abort(); + } f := self.spawn(() => { entry_fn : (*void) -> void = xx entry; entry_fn(arg); @@ -640,10 +652,20 @@ impl Io for Scheduler { // Advance the world one step (the unit `run` loops over). Drain every // currently-ready fiber, then fire the earliest pending timer (advancing the - // virtual clock). Returns the new clock after a timer fire, or -1 when nothing - // ran and no timer is pending. NOTE: fd-readiness (kqueue/epoll) blocking is - // NOT driven here yet — `run` remains the canonical driver for fd-bound work; - // `poll` covers the compute + virtual-timer path the async layer needs. + // virtual clock). Returns the new virtual clock after a timer fire, or -1 when + // the ready queue AND the timer set are both empty (the program is quiescent). + // + // `deadline_ms` is RESERVED — the virtual-time single-step jumps the clock to + // the earliest timer regardless of any real-time budget, so there is nothing + // for a deadline to bound here (it matches `CBlockingIo.poll`'s stub shape; a + // real-clock `Io` would honor it). Named for protocol conformance, intentionally + // unread — NOT a silently-dropped argument. + // + // fd-readiness (kqueue/epoll) is NOT driven by `poll` — `run` remains the + // canonical driver for fd-bound work. To avoid the silent-wrong-answer where a + // `-1` ("quiescent") is indistinguishable from "fibers are fd-blocked", bail + // loudly if any io-waiter is pending: a `poll`-only driver can't make progress + // on fds and must use `run` instead. poll :: (self: *Scheduler, deadline_ms: i64) -> i64 { while self.ready_head != null { f := dequeue(self); @@ -666,6 +688,10 @@ impl Io for Scheduler { self.wake(t.fiber); return self.clock_ms; } + if self.io_waiters.len > 0 { + print("sched: poll() cannot drive fd-readiness — use run() for fd-bound fibers\n"); + abort(); + } return -1; } @@ -678,6 +704,13 @@ impl Io for Scheduler { // null (a timer-cancel handle is Phase 3). The fired timer wakes the fiber // through the same `wake` path the run loop uses. arm_timer :: (self: *Scheduler, deadline_ms: i64, park: ParkToken) -> *void { + // MUST be called from inside a fiber — the timer wakes `self.current`, so a + // null current would arm a `fiber = null` timer that segfaults `wake` when + // it fires. Bail loudly, exactly like `sleep`/`suspend_self`/`block_on_fd`. + if self.current == null { + print("sched: arm_timer() called outside a fiber (no running fiber to wake)\n"); + abort(); + } t : Timer = .{ deadline_ms = deadline_ms, fiber = self.current }; self.timers.append(t, self.own_allocator); return null; @@ -753,11 +786,18 @@ T // regardless of linkage), never by an external name — so it needs the convention, // not a public symbol. `abi(.c)` states that intent and keeps the symbol private. // -// One consequence of the C-ABI boundary: an `abi(.c)` fn has no implicit -// `context` param, so `self.body()` runs under the static `__sx_default_context` -// — NOT whatever `push Context { allocator = ... }` was in force at the -// `run()` call site. Fiber bodies do not inherit a caller-scoped allocator; a -// body that needs one must capture it explicitly (the long-lived-container rule). +// The C-ABI boundary means this frame has no implicit `context` param, so the +// body would otherwise run under the static `__sx_default_context`. To make a +// fiber inherit its SPAWNER's context (so `context.io` reaches a fiber-backed +// scheduler, and `context.allocator`/`context.data` carry through), `spawn` +// snapshots the live `context` into `Fiber.dctx` and the body below re-pushes it. +// LIFETIME CONTRACT (consequence of the snapshot): every capability in the +// spawn-time context — `allocator`, `io`, `data` — must OUTLIVE the fiber, since +// the body dereferences them later through `dctx`. Protocol values (`io`, +// `allocator`) borrow a receiver (often a stack local via `xx`); that receiver, +// and any `data` pointee, must stay alive until the fiber is reaped. A fiber that +// grows a long-lived container through the inherited `context.allocator` is +// subject to the usual long-lived-container rule (CLAUDE.md). fib_dispatch :: (self: *Fiber) abi(.c) { // Run the body under the context captured at spawn (`Fiber.dctx`), so the // fiber inherits its spawner's `context` — notably `context.io` resolves to a