// std/net/epoll — raw epoll bindings: the linux twin of std/net/kqueue. // linux-only by definition; the OS-neutral Loop facade over both backends is // std.event. Import this module explicitly — like its kqueue sibling it // deliberately does not ride the std.sx barrel. // // One epoll instance multiplexes readiness for any number of fds: a registered // fd reports through `epoll_wait` when its interest mask (EPOLLIN / EPOLLOUT) // fires, and an idle registration costs nothing — the head-of-line-free // substrate the event Loop and an httpz-shaped server worker stand on. // // ── How this differs from kqueue (and why the surface is shaped this way) ── // - ONE registration per fd carries a combined events MASK; changing the mask // is EPOLL_CTL_MOD, not a second EVFILT_* add. The Loop (std.event) tracks // the per-fd mask and feeds the full mask on each change. // - `epoll_event` echoes back a single 64-bit `data` word, NOT the fd in a // separate field the way kqueue's `ident` is the fd. We stash the fd in the // low 32 bits of `data` (`data_lo`) so `epoll_wait` reports which fd fired; // a caller wanting a wider udata keeps its own fd→udata map. // - EOF is EPOLLHUP / EPOLLRDHUP flags on a readable event, not kqueue's // EV_EOF; an async registration error is EPOLLERR. // // ── struct epoll_event layout (the one real ABI landmine) ────────────────── // struct epoll_event { uint32_t events; epoll_data_t data; }; // data is a // union { void* ptr; int fd; uint32_t u32; uint64_t u64; } (8 bytes). // On x86_64 the struct is __attribute__((packed)) → 12 bytes, `data` at // offset 4. On every other arch (aarch64) it is naturally aligned → 16 bytes, // `data` at offset 8. sx has no packed-struct primitive, so we model the // 8-byte `data` union as two u32 halves and let the field layout fall out per // arch: // x86_64 : { events@0, data_lo@4, data_hi@8 } → 12 bytes // aarch64: { events@0, pad@4, data_lo@8, data_hi@12 } → 16 bytes // Every field is a u32 at a 4-aligned offset, so no packed attribute and no // unaligned 8-byte access is ever needed — yet `size_of(EpollEvent)` and the // `[N]EpollEvent` stride come out byte-exact for the kernel ABI on both // arches, and `epoll_wait` can fill a plain `[]EpollEvent` directly. (Both // arches are little-endian, so the fd — an `int` in the union — is the low // word, `data_lo`.) This struct-per-arch shape was chosen over raw byte-offset // poking deliberately: idiomatic field reads, no scalar-pointer indexing // (issue 0155), no unaligned u64. // // VALIDATION NOTE: the dev host is aarch64-macOS — there is no linux box to run // this against, so this module is currently IR-only verified: the arch-correct // layout (12-byte / 16-byte stride, fd offset) surfaces as the struct shape in // `sx ir --target *-linux`, and the whole module lowers clean. Runtime // correctness (syscall behavior, the kernel-filled event array, EPOLLRDHUP // semantics) validates end-to-end only on a linux runner — mirror of how the // Win64 switch was IR-only until a Windows VM appeared (CHECKPOINT-FIBERS // B1.3b-1). // // No `#import "modules/build.sx"` despite the `inline if ARCH` below: a // top-level `inline if OS/ARCH/POINTER_SIZE` conditional is resolved by the // compiler's flatten pre-pass (imports.zig — name-matched against the target), // NOT by reading build.sx's `ARCH` global as a value. Skipping the import keeps // this module's IR self-contained (libc only) — no std/compiler/bundle baggage. libc :: #library "c"; // struct epoll_event, arch-exact (see the header). Both variants expose the // same three load-bearing fields — `events`, `data_lo` (the fd), `data_hi` — so // consumer code is arch-agnostic; the aarch64 `pad` is never touched. inline if ARCH == .x86_64 { EpollEvent :: struct { events: u32 = 0; data_lo: u32 = 0; // the fd (union's low 32 bits) data_hi: u32 = 0; } } else { EpollEvent :: struct { events: u32 = 0; pad: u32 = 0; // alignment pad before the 8-aligned data union data_lo: u32 = 0; // the fd (union's low 32 bits) data_hi: u32 = 0; } } // ── interest mask (events) ───────────────────────────────────────────────── EPOLLIN :u32: 0x001; EPOLLPRI :u32: 0x002; EPOLLOUT :u32: 0x004; EPOLLERR :u32: 0x008; EPOLLHUP :u32: 0x010; EPOLLRDHUP :u32: 0x2000; // peer half-closed (drain, then close) EPOLLET :u32: 0x80000000; // edge-triggered EPOLLONESHOT:u32: 0x40000000; // disarm after one delivery // ── epoll_ctl ops ────────────────────────────────────────────────────────── EPOLL_CTL_ADD :i32: 1; EPOLL_CTL_DEL :i32: 2; EPOLL_CTL_MOD :i32: 3; // epoll_create1 / eventfd flags (== O_CLOEXEC). EPOLL_CLOEXEC :i32: 0x80000; EFD_CLOEXEC :i32: 0x80000; EFD_NONBLOCK :i32: 0x800; epoll_create1 :: (flags: i32) -> i32 extern libc; epoll_ctl :: (epfd: i32, op: i32, fd: i32, event: *EpollEvent) -> i32 extern libc; epoll_wait :: (epfd: i32, events: *EpollEvent, maxevents: i32, timeout: i32) -> i32 extern libc; // eventfd: the cross-thread wake channel (epoll's answer to EVFILT_USER). eventfd :: (initval: u32, flags: i32) -> i32 extern libc; // errno, bound locally on linux (`__errno_location`; darwin's is `__error`, // but this module only ever lowers under a linux target). errno_slot_ep :: () -> *i32 extern libc "__errno_location"; EINTR_EP :: 4; // ── readiness-flag helpers over one event ────────────────────────────────── ev_readable :: (e: EpollEvent) -> bool { return (e.events & EPOLLIN) != 0; } ev_writable :: (e: EpollEvent) -> bool { return (e.events & EPOLLOUT) != 0; } // EPOLLHUP (full close) or EPOLLRDHUP (peer half-closed) — drain then close. ev_eof :: (e: EpollEvent) -> bool { return (e.events & (EPOLLHUP | EPOLLRDHUP)) != 0; } ev_err :: (e: EpollEvent) -> bool { return (e.events & EPOLLERR) != 0; } // The fd stashed in `data` at registration. ev_fd :: (e: EpollEvent) -> i32 { return xx e.data_lo; } // ── thin wrappers ────────────────────────────────────────────────────────── // Create an epoll instance (close-on-exec). <0 on failure. ep_create :: () -> i32 { return epoll_create1(EPOLL_CLOEXEC); } // Apply one registration change: add / modify / delete `fd`'s interest // `events` on `epfd`, stashing `fd` in `data` so `epoll_wait` reports it. True // on success. For EPOLL_CTL_DEL the kernel ignores the event payload. ep_ctl :: (epfd: i32, op: i32, fd: i32, events: u32) -> bool { ev : EpollEvent = .{ events = events, data_lo = xx fd }; return epoll_ctl(epfd, op, fd, @ev) == 0; } // Drain ready events into `events` (room for `maxev` entries), waiting at most // `timeout_ms` (negative = forever). Returns the event count (0 = timeout); -1 // only on a real failure — EINTR is retried (mirror of kqueue's kq_wait). ep_wait :: (epfd: i32, events: []EpollEvent, maxev: i32, timeout_ms: i32) -> i32 { while true { n := epoll_wait(epfd, @events[0], maxev, timeout_ms); if n >= 0 { return n; } if errno_slot_ep().* != EINTR_EP { return -1; } // EINTR: reissue } return -1; }