diff --git a/examples/diagnostics/1192-diagnostics-many-pointer-open-slice.sx b/examples/diagnostics/1192-diagnostics-many-pointer-open-slice.sx new file mode 100644 index 00000000..38741e90 --- /dev/null +++ b/examples/diagnostics/1192-diagnostics-many-pointer-open-slice.sx @@ -0,0 +1,12 @@ +// Slicing a many-pointer `[*]T` requires an explicit upper bound — it carries +// no length, so an open-ended `mp[lo..]` has no bound to resolve and would +// otherwise build a garbage-length slice. This guards that diagnostic. +// (Companion: examples/types/0195 covers the valid explicit-bound form.) +#import "modules/std.sx"; + +main :: () -> i64 { + a : [4]i64 = .[5, 6, 7, 8]; + mp : [*]i64 = xx @a[0]; + s := mp[1..]; // ERROR: many-pointer slice needs an explicit hi + return s.len; +} diff --git a/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.exit b/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.exit new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.exit @@ -0,0 +1 @@ +1 diff --git a/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.stderr b/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.stderr new file mode 100644 index 00000000..49babf5a --- /dev/null +++ b/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.stderr @@ -0,0 +1,5 @@ +error: slicing a many-pointer `[*]T` requires an explicit upper bound (`mp[lo..hi]`) — it has no length + --> examples/diagnostics/1192-diagnostics-many-pointer-open-slice.sx:10:10 + | +10 | s := mp[1..]; // ERROR: many-pointer slice needs an explicit hi + | ^^ diff --git a/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.stdout b/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.stdout new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/diagnostics/expected/1192-diagnostics-many-pointer-open-slice.stdout @@ -0,0 +1 @@ + diff --git a/examples/types/0195-types-many-pointer-slice.sx b/examples/types/0195-types-many-pointer-slice.sx new file mode 100644 index 00000000..0a1f7605 --- /dev/null +++ b/examples/types/0195-types-many-pointer-slice.sx @@ -0,0 +1,26 @@ +// Slicing a many-pointer `mp[lo..hi]` builds a correct `{ ptr = mp + lo, +// len = hi - lo }` slice — the caller supplies the bounds (a `[*]T` carries no +// length of its own). This makes a `List` (whose `items` is `[*]T`) iterable +// with a `for`-each over `items[0..len]`. +// +// Regression (issue 0159): a many-pointer base previously fell through the +// subslice emitter's `else` arm to an undefined slice (`LLVMGetUndef`), so the +// resulting `.len` was garbage and iterating it segfaulted. +#import "modules/std.sx"; + +main :: () -> i64 { + a : [4]i64 = .[5, 6, 7, 8]; + + // Slice a many-pointer with explicit bounds. + mp : [*]i64 = xx @a[0]; + s := mp[1..4]; // { &a[1], len 3 } + print("mp[1..4]: len={} [{} {} {}]\n", s.len, s[0], s[1], s[2]); // 3 [6 7 8] + + // The payoff: iterate a List with a for-each over items[0..len]. + xs : List(i64) = .{}; + xs.append(10); xs.append(20); xs.append(30); + sum := 0; + for xs.items[0..xs.len] (e) { sum = sum + e; } + print("List for-each sum={}\n", sum); // 60 + return 0; +} diff --git a/examples/types/expected/0195-types-many-pointer-slice.exit b/examples/types/expected/0195-types-many-pointer-slice.exit new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/examples/types/expected/0195-types-many-pointer-slice.exit @@ -0,0 +1 @@ +0 diff --git a/examples/types/expected/0195-types-many-pointer-slice.stderr b/examples/types/expected/0195-types-many-pointer-slice.stderr new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/examples/types/expected/0195-types-many-pointer-slice.stderr @@ -0,0 +1 @@ + diff --git a/examples/types/expected/0195-types-many-pointer-slice.stdout b/examples/types/expected/0195-types-many-pointer-slice.stdout new file mode 100644 index 00000000..21ffb6b5 --- /dev/null +++ b/examples/types/expected/0195-types-many-pointer-slice.stdout @@ -0,0 +1,2 @@ +mp[1..4]: len=3 [6 7 8] +List for-each sum=60 diff --git a/issues/0159-many-pointer-slice-garbage-len.md b/issues/0159-many-pointer-slice-garbage-len.md new file mode 100644 index 00000000..3f8e0310 --- /dev/null +++ b/issues/0159-many-pointer-slice-garbage-len.md @@ -0,0 +1,73 @@ +# issue 0159 — slicing a many-pointer `mp[lo..hi]` produces a garbage slice (wrong `.len`/`.ptr`) + +> **RESOLVED.** Root cause: `emitSubslice` (`src/backend/llvm/ops.zig`) handled a +> struct (slice/string) base and an array base, but a many-pointer `[*]T` base is +> an LLVM *pointer* kind — it fell through to the `else` arm, which mapped the +> result to `LLVMGetUndef(slice_ty)` (a silent-undef default), so the slice's +> `.len`/`.ptr` were garbage. Fix: added a `LLVMPointerTypeKind` branch — the +> base value IS the data pointer, so GEP by `lo` and `len = hi - lo` (the caller +> supplies the bound; no length is read from the unbounded pointer). A `List` +> (whose `items` is `[*]T`) is now iterable with `for items[0..len] (e)`, applied +> in `Scheduler.deinit`. Regression: `examples/types/0195-types-many-pointer-slice.sx`. +> (The comptime/interp path can't take a many-pointer to a stack array — `xx @a[0]` +> — at comptime; that is a separate pre-existing limitation, not this bug.) + +## Symptom + +Slicing a `[*]T` many-pointer with a range, `mp[lo..hi]`, yields a slice whose +`.len` (and `.ptr`) are garbage — iterating it reads out of bounds and +segfaults. The identical slice of the underlying ARRAY is correct. + +``` +array slice : len=3 s0=5 s2=7 ← a[0..3] (correct) +manyptr slice: len=4340757212 (want 3) ← mp[0..3] (garbage) +``` + +The compiler ACCEPTS `mp[0..hi]` (it type-checks as `[]T`) but lowers it wrong. +specs.md documents many-pointer *indexing* (`mp[2]`) but not *slicing*; either +slicing a many-pointer should build a correct `{ ptr = mp + lo, len = hi - lo }` +slice, or it should be a compile error — a silently-garbage slice (which then +segfaults on use) is the forbidden silent-wrong outcome. + +Practical impact: `for xs.items[0..xs.len] (e)` over a `List` crashes, so a +`List` cannot be iterated with a `for` loop; every consumer uses the +`while i < xs.len { ... xs.items[i] ... }` index loop instead. + +## Reproduction + +```sx +#import "modules/std.sx"; +main :: () -> i64 { + a : [4]i64 = .[5, 6, 7, 8]; + sa : []i64 = a[0..3]; // correct: len=3 + print("array : {}\n", sa.len); + mp : [*]i64 = xx @a[0]; + sm : []i64 = mp[0..3]; // BUG: garbage len + print("manyptr: {}\n", sm.len); + return 0; +} +``` + +(repro: `issues/0159-many-pointer-slice-garbage-len.sx`. The garbage value is +uninitialized memory, so it varies per run — the bug is that it is NOT `3`.) + +## Investigation prompt + +> Slicing a `[*]T` many-pointer with a range (`mp[lo..hi]`) produces a slice +> with a garbage `.len`/`.ptr`, whereas slicing an array (`a[lo..hi]`) is +> correct. Repro: `issues/0159-many-pointer-slice-garbage-len.sx`. +> +> Trace the slice-expression lowering (`src/ir/lower/` — the range-index / +> `slice_expr` arm; grep for where `a[lo..hi]` builds a `{ ptr, len }` slice +> aggregate). The array path computes `len = hi - lo` and `ptr = &base[lo]` +> correctly; the many-pointer base falls through to a path that reads a bogus +> length (likely it assumes the base is an array/slice with a known bound, or +> reuses an uninitialized slot). Decide the intended semantics from specs.md +> (§Pointer Types — many-pointer; slicing a many-pointer is currently +> unspecified): if `mp[lo..hi]` is supported, build `{ ptr = mp + lo, +> len = hi - lo }` (the user-supplied `hi`/`lo` ARE the bounds — no length is +> read from the unbounded pointer); if it is NOT supported, emit a diagnostic at +> the lowering site ("cannot slice a many-pointer `[*]T` with an open length; +> …") rather than producing a garbage slice. Verify: `sx run` the repro — +> expect `manyptr: 3` (if supported) or a clean compile error, never a garbage +> length / segfault. Then promote the repro to a regression under `examples/`. diff --git a/library/modules/std/sched.sx b/library/modules/std/sched.sx index 5042ace3..5f56338d 100644 --- a/library/modules/std/sched.sx +++ b/library/modules/std/sched.sx @@ -494,10 +494,8 @@ Scheduler :: struct { self.ready_tail = null; // (2) Free every heap Task allocated by `go`. - i := 0; - while i < self.task_allocs.len { - self.own_allocator.dealloc_bytes(self.task_allocs.items[i]); - i = i + 1; + for self.task_allocs.items[0..self.task_allocs.len] (t) { + self.own_allocator.dealloc_bytes(t); } // (3) Free the List backings (all grown through `own_allocator`). diff --git a/src/backend/llvm/ops.zig b/src/backend/llvm/ops.zig index 8b474354..bfa40af8 100644 --- a/src/backend/llvm/ops.zig +++ b/src/backend/llvm/ops.zig @@ -2122,6 +2122,22 @@ pub const Ops = struct { result = c.LLVMBuildInsertValue(self.e.builder, result, new_ptr, 0, "ss.wptr"); result = c.LLVMBuildInsertValue(self.e.builder, result, new_len, 1, "ss.wlen"); self.e.mapRef(result); + } else if (base_kind == c.LLVMPointerTypeKind) { + // Many-pointer `[*]T` (or a raw `*T`): the base value IS the data + // pointer — GEP by `lo` for the new start, `len = hi - lo`. (issue + // 0159: a many-pointer base previously fell to the `else` undef arm, + // producing a slice with a garbage length. The caller supplies the + // bound via `hi`; no length is read from the unbounded pointer.) + var lo_indices = [_]c.LLVMValueRef{lo}; + const new_ptr = c.LLVMBuildGEP2(self.e.builder, elem_ty, base, &lo_indices, 1, "ss.ptr"); + var new_len = c.LLVMBuildSub(self.e.builder, hi, lo, "ss.len"); + if (c.LLVMTypeOf(new_len) != self.e.cached_i64) { + new_len = c.LLVMBuildSExt(self.e.builder, new_len, self.e.cached_i64, "ss.ext"); + } + var result = c.LLVMGetUndef(slice_ty); + result = c.LLVMBuildInsertValue(self.e.builder, result, new_ptr, 0, "ss.wptr"); + result = c.LLVMBuildInsertValue(self.e.builder, result, new_len, 1, "ss.wlen"); + self.e.mapRef(result); } else { self.e.mapRef(c.LLVMGetUndef(slice_ty)); } diff --git a/src/ir/lower/expr.zig b/src/ir/lower/expr.zig index 96fd955d..7bcfdae0 100644 --- a/src/ir/lower/expr.zig +++ b/src/ir/lower/expr.zig @@ -1483,7 +1483,14 @@ pub fn lowerSliceExpr(self: *Lowering, se: *const ast.SliceExpr) Ref { self.lowerExpr(e) else if (!obj_ty.isBuiltin() and self.module.types.get(obj_ty) == .array) self.builder.constInt(@intCast(self.module.types.get(obj_ty).array.length), .i64) - else + else if (!obj_ty.isBuiltin() and self.module.types.get(obj_ty) == .many_pointer) blk: { + // A many-pointer `[*]T` carries no length, so an open-ended slice + // `mp[lo..]` has no upper bound to resolve — a `.length` op on it would + // yield a garbage length (issue 0159). Require an explicit `hi`. + if (self.diagnostics) |d| + d.addFmt(.err, se.object.span, "slicing a many-pointer `[*]T` requires an explicit upper bound (`mp[lo..hi]`) — it has no length", .{}); + break :blk self.builder.constInt(0, .i64); + } else self.builder.emit(.{ .length = .{ .operand = obj } }, .i64); if (se.end_inclusive) hi = self.builder.add(hi, self.builder.constInt(1, .i64), .i64); // Subslice of string stays string (same {ptr, i64} layout, correct type category)