diff --git a/specs.md b/specs.md index 5359584..9a5e9a2 100644 --- a/specs.md +++ b/specs.md @@ -182,6 +182,12 @@ Fixed-size arrays with element type and length. ```sx buffer : [5]f32 = .[0, 2, 3.5, 4, 0]; val := buffer[2]; // 3.5 +buffer.len // 5 (compile-time constant, s32) +``` + +Arrays can also be constructed programmatically with the `Array` builtin: +```sx +MyArr :: Array(5, s32); // equivalent to [5]s32 ``` ### Slice Types @@ -200,6 +206,23 @@ items.ptr // raw pointer Slices support generic type parameters: `[]$T` introduces type parameter `T` inferred from the element type of the argument (array or slice). +### Subslicing +Arrays, slices, and strings support subslice syntax to create zero-copy views: +```sx +arr : [5]s32 = .[3, 1, 4, 1, 5]; +sub := arr[1..4]; // []s32 → [1, 4, 1] +head := arr[..3]; // []s32 → [3, 1, 4] +tail := arr[2..]; // []s32 → [4, 1, 5] + +msg := "hello world"; +word := msg[6..11]; // string → "world" +``` +- `expr[start..end]` — elements from `start` (inclusive) to `end` (exclusive) +- `expr[start..]` — elements from `start` to end +- `expr[..end]` — elements from beginning to `end` +- Result type: `[]T` for arrays/slices, `string` for strings +- No memory allocation — the result points into the original backing storage + ### Vector Types (SIMD) LLVM SIMD vectors, parameterized by length and element type. ```sx diff --git a/src/ast.zig b/src/ast.zig index 5247768..d31ec6c 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -49,6 +49,7 @@ pub const Node = struct { array_literal: ArrayLiteral, parameterized_type_expr: ParameterizedTypeExpr, index_expr: IndexExpr, + slice_expr: SliceExpr, while_expr: WhileExpr, for_expr: ForExpr, spread_expr: SpreadExpr, @@ -299,6 +300,12 @@ pub const IndexExpr = struct { index: *Node, }; +pub const SliceExpr = struct { + object: *Node, + start: ?*Node = null, + end: ?*Node = null, +}; + pub const WhileExpr = struct { condition: *Node, body: *Node, diff --git a/src/codegen.zig b/src/codegen.zig index 94058e2..dd619dc 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2286,6 +2286,9 @@ pub const CodeGen = struct { .index_expr => |ie| { return self.genIndexExpr(ie); }, + .slice_expr => |se| { + return self.genSliceExpr(se); + }, .call => |call_node| { return self.genCall(call_node); }, @@ -3381,6 +3384,12 @@ pub const CodeGen = struct { } return self.emitErrorFmt("no field '{s}' on slice (available: .len, .ptr)", .{fa.field}); } + if (entry.ty.isArray()) { + if (std.mem.eql(u8, fa.field, "len")) { + return c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), entry.ty.array_type.length, 0); + } + return self.emitErrorFmt("no field '{s}' on array (available: .len)", .{fa.field}); + } if (entry.ty.isAny()) { const any_val = c.LLVMBuildLoad2(self.builder, self.getAnyStructType(), entry.ptr, "any_load"); if (std.mem.eql(u8, fa.field, "tag")) { @@ -3488,6 +3497,73 @@ pub const CodeGen = struct { return self.emitError("index expression requires an array, vector, string, or slice"); } + fn genSliceExpr(self: *CodeGen, se: ast.SliceExpr) !c.LLVMValueRef { + const obj_ty = self.inferType(se.object); + const i32_ty = c.LLVMInt32TypeInContext(self.context); + const zero = c.LLVMConstInt(i32_ty, 0, 0); + const slice_struct_ty = self.getStringStructType(); + + // Resolve start (default: 0) + const start_val = if (se.start) |s| try self.genExpr(s) else zero; + + if (obj_ty.isArray()) { + const arr_info = obj_ty.array_type; + // Resolve end (default: array length) + const end_val = if (se.end) |e| try self.genExpr(e) else c.LLVMConstInt(i32_ty, arr_info.length, 0); + // Get array alloca + const arr_ptr = blk: { + if (se.object.data == .identifier) { + if (self.named_values.get(se.object.data.identifier.name)) |entry| { + break :blk entry.ptr; + } + } + break :blk try self.genExpr(se.object); + }; + // GEP to arr[start] + var indices = [_]c.LLVMValueRef{ zero, start_val }; + const elem_ptr = c.LLVMBuildGEP2(self.builder, self.typeToLLVM(obj_ty), arr_ptr, &indices, 2, "slice_start"); + // len = end - start + const len_val = c.LLVMBuildSub(self.builder, end_val, start_val, "slice_len"); + // Build {ptr, len} + var result = c.LLVMGetUndef(slice_struct_ty); + result = c.LLVMBuildInsertValue(self.builder, result, elem_ptr, 0, "slice_ptr"); + result = c.LLVMBuildInsertValue(self.builder, result, len_val, 1, "slice_len"); + return result; + } + + if (obj_ty == .string_type or obj_ty.isSlice()) { + // Load {ptr, len} from variable or expression + const obj_val = blk: { + if (se.object.data == .identifier) { + if (self.named_values.get(se.object.data.identifier.name)) |entry| { + break :blk c.LLVMBuildLoad2(self.builder, slice_struct_ty, entry.ptr, "sslice_load"); + } + } + break :blk try self.genExpr(se.object); + }; + const base_ptr = c.LLVMBuildExtractValue(self.builder, obj_val, 0, "sslice_ptr"); + const base_len = c.LLVMBuildExtractValue(self.builder, obj_val, 1, "sslice_len"); + // Resolve end (default: original length) + const end_val = if (se.end) |e| try self.genExpr(e) else base_len; + // GEP base_ptr + start + const elem_llvm_ty = if (obj_ty == .string_type) + c.LLVMInt8TypeInContext(self.context) + else + self.typeToLLVM(Type.fromName(obj_ty.slice_type.element_name) orelse return self.emitError("unknown slice element type")); + var gep_indices = [_]c.LLVMValueRef{start_val}; + const new_ptr = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, base_ptr, &gep_indices, 1, "sslice_off"); + // len = end - start + const len_val = c.LLVMBuildSub(self.builder, end_val, start_val, "sslice_len"); + // Build {ptr, len} + var result = c.LLVMGetUndef(slice_struct_ty); + result = c.LLVMBuildInsertValue(self.builder, result, new_ptr, 0, "sslice_ptr"); + result = c.LLVMBuildInsertValue(self.builder, result, len_val, 1, "sslice_len"); + return result; + } + + return self.emitError("slice expression requires an array, string, or slice"); + } + fn genBinaryOp(self: *CodeGen, op: ast.BinaryOp.Op, lhs: c.LLVMValueRef, rhs: c.LLVMValueRef, result_type: Type) c.LLVMValueRef { // Vector types: dispatch based on element type (LLVM does element-wise automatically) if (result_type.isVector()) { @@ -5018,6 +5094,17 @@ pub const CodeGen = struct { return ty; } } + if (std.mem.eql(u8, base, "Array")) { + if (args.len >= 2) { + const n: u32 = @intCast(self.resolveValueArg(args[0])); + const elem = self.resolveType(args[1]); + const elem_name = elem.displayName(self.allocator) catch return null; + const ty: Type = .{ .array_type = .{ .element_name = elem_name, .length = n } }; + const any_name = std.fmt.allocPrint(self.allocator, "[{d}]{s}", .{ n, elem_name }) catch return null; + _ = self.getAnyTypeId(any_name, ty) catch return null; + return ty; + } + } return null; } @@ -5300,6 +5387,9 @@ pub const CodeGen = struct { if (obj_ty.isSlice()) { if (std.mem.eql(u8, fa.field, "len")) return Type.s(32); } + if (obj_ty.isArray()) { + if (std.mem.eql(u8, fa.field, "len")) return Type.s(32); + } if (obj_ty.isAny()) { if (std.mem.eql(u8, fa.field, "tag")) return Type.s(32); if (std.mem.eql(u8, fa.field, "value")) return Type.s(64); @@ -5338,6 +5428,13 @@ pub const CodeGen = struct { } return Type.s(32); }, + .slice_expr => |se| { + const obj_ty = self.inferType(se.object); + if (obj_ty == .string_type) return .string_type; + if (obj_ty.isArray()) return .{ .slice_type = .{ .element_name = obj_ty.array_type.element_name } }; + if (obj_ty.isSlice()) return obj_ty; + return .void_type; + }, .array_literal => |al| { if (al.elements.len == 0) return .void_type; const elem_ty = self.inferType(al.elements[0]); diff --git a/src/parser.zig b/src/parser.zig index 44f0f68..952b871 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -906,11 +906,37 @@ pub const Parser = struct { expr = try self.createNode(expr.span.start, .{ .field_access = .{ .object = expr, .field = field } }); } } else if (self.current.tag == .l_bracket) { - // Index access: expr[expr] + // Index or slice access: expr[expr] or expr[start..end] self.advance(); - const index = try self.parseExpr(); - try self.expect(.r_bracket); - expr = try self.createNode(expr.span.start, .{ .index_expr = .{ .object = expr, .index = index } }); + if (self.current.tag == .dot_dot) { + // [..end] + self.advance(); + const end_expr = try self.parseExpr(); + try self.expect(.r_bracket); + expr = try self.createNode(expr.span.start, .{ .slice_expr = .{ + .object = expr, .start = null, .end = end_expr, + } }); + } else { + const first = try self.parseExpr(); + if (self.current.tag == .dot_dot) { + // [start..end] or [start..] + self.advance(); + const end_expr: ?*ast.Node = if (self.current.tag != .r_bracket) + try self.parseExpr() + else + null; + try self.expect(.r_bracket); + expr = try self.createNode(expr.span.start, .{ .slice_expr = .{ + .object = expr, .start = first, .end = end_expr, + } }); + } else { + // [index] — normal index access + try self.expect(.r_bracket); + expr = try self.createNode(expr.span.start, .{ .index_expr = .{ + .object = expr, .index = first, + } }); + } + } } else { break; } diff --git a/src/sema.zig b/src/sema.zig index 2bb9bea..08f1d65 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -347,6 +347,13 @@ pub const Analyzer = struct { } return Type.s(32); }, + .slice_expr => |se| { + const obj_ty = self.inferExprType(se.object); + if (obj_ty == .string_type) return .string_type; + if (obj_ty.isArray()) return .{ .slice_type = .{ .element_name = obj_ty.array_type.element_name } }; + if (obj_ty.isSlice()) return obj_ty; + return .void_type; + }, .while_expr => .void_type, .for_expr => .void_type, .spread_expr => .void_type, @@ -413,7 +420,7 @@ pub const Analyzer = struct { try self.analyzeNode(val); } }, - .enum_decl, .struct_decl, .union_decl, .array_type_expr, .slice_type_expr, .array_literal, .parameterized_type_expr, .index_expr, .insert_expr => {}, + .enum_decl, .struct_decl, .union_decl, .array_type_expr, .slice_type_expr, .array_literal, .parameterized_type_expr, .index_expr, .slice_expr, .insert_expr => {}, .namespace_decl => |ns| { try self.pushScope(); for (ns.decls) |d| { @@ -641,6 +648,7 @@ pub const Analyzer = struct { .array_literal, .parameterized_type_expr, .index_expr, + .slice_expr, => {}, .namespace_decl => |ns| { for (ns.decls) |d| { @@ -673,6 +681,7 @@ pub const Analyzer = struct { .union_literal, .array_literal, .index_expr, + .slice_expr, .type_expr, .insert_expr, .while_expr, @@ -888,6 +897,7 @@ pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node { .array_literal, .parameterized_type_expr, .index_expr, + .slice_expr, => {}, .namespace_decl => |ns| { for (ns.decls) |d| {