From 70435d3c854faa02aaaea399bacf7ffcf39ccfc4 Mon Sep 17 00:00:00 2001 From: agra Date: Tue, 10 Feb 2026 22:47:43 +0200 Subject: [PATCH] pointers --- examples/26-pointers.sx | 28 ++++++ specs.md | 44 ++++++++ src/ast.zig | 17 ++++ src/codegen.zig | 215 +++++++++++++++++++++++++++++++++++++++- src/comptime.zig | 1 + src/lexer.zig | 1 + src/lsp/server.zig | 2 + src/parser.zig | 30 +++++- src/sema.zig | 34 +++++++ src/token.zig | 5 + src/types.zig | 71 +++++++++++++ 11 files changed, 443 insertions(+), 5 deletions(-) create mode 100644 examples/26-pointers.sx diff --git a/examples/26-pointers.sx b/examples/26-pointers.sx new file mode 100644 index 0000000..3a380ff --- /dev/null +++ b/examples/26-pointers.sx @@ -0,0 +1,28 @@ +#import "modules/std.sx"; + +Vec2 :: struct { x, y: f32; } + +set_x :: (p: *Vec2, val: f32) { + p.x = val; +} + +main :: () { + v := Vec2.{ 1.0, 2.0 }; + print("before: {}\n", v); + + set_x(&v, 99.0); + print("after: {}\n", v); + + ptr := &v; + copy := ptr.*; + print("copy: {}\n", copy); + + // null pointer + np : *Vec2 = null; + + // many-pointer indexing + arr : [5]s32 = .[10, 20, 30, 40, 50]; + mp : [*]s32 = &arr[0]; + print("mp[0] = {}\n", mp[0]); + print("mp[2] = {}\n", mp[2]); +} diff --git a/specs.md b/specs.md index 9a5e9a2..1ac8d66 100644 --- a/specs.md +++ b/specs.md @@ -223,6 +223,50 @@ word := msg[6..11]; // string → "world" - Result type: `[]T` for arrays/slices, `string` for strings - No memory allocation — the result points into the original backing storage +### Pointer Types + +| Syntax | Meaning | `.len` | `[i]` | +|--------|---------|--------|-------| +| `*T` | pointer to one T | no | no | +| `[*]T` | many-pointer (buffer) | no | yes | +| `*[N]T` | pointer to array of N T | yes | yes | +| `*[]T` | pointer to slice | yes | yes | + +**Address-of**: `&x` returns a pointer to the variable. +```sx +v := Vec2.{ 1.0, 2.0 }; +ptr := &v; // *Vec2 +``` + +**Dereference**: `p.*` loads the value through the pointer. +```sx +copy := ptr.*; // Vec2 +``` + +**Auto-deref**: `p.field` is sugar for `p.*.field`. +```sx +set_x :: (p: *Vec2, val: f32) { + p.x = val; // auto-deref: p.*.x = val +} +set_x(&v, 99.0); +``` + +**Null**: All pointer types are nullable. `null` is the null pointer literal. +```sx +np : *Vec2 = null; +``` + +**Many-pointer**: `[*]T` supports indexing for buffers of unknown size. +```sx +arr : [5]s32 = .[10, 20, 30, 40, 50]; +mp : [*]s32 = &arr[0]; // *s32 → [*]s32 implicit +val := mp[2]; // 30 +``` + +**Implicit conversions**: +- `*T` → `[*]T` (pointer to element → many-pointer) +- `null` (`*void`) → any `*T` + ### Vector Types (SIMD) LLVM SIMD vectors, parameterized by length and element type. ```sx diff --git a/src/ast.zig b/src/ast.zig index d31ec6c..90f1451 100644 --- a/src/ast.zig +++ b/src/ast.zig @@ -50,6 +50,10 @@ pub const Node = struct { parameterized_type_expr: ParameterizedTypeExpr, index_expr: IndexExpr, slice_expr: SliceExpr, + pointer_type_expr: PointerTypeExpr, + many_pointer_type_expr: ManyPointerTypeExpr, + deref_expr: DerefExpr, + null_literal: void, while_expr: WhileExpr, for_expr: ForExpr, spread_expr: SpreadExpr, @@ -156,6 +160,7 @@ pub const UnaryOp = struct { negate, not, xx, + address_of, }; }; @@ -306,6 +311,18 @@ pub const SliceExpr = struct { end: ?*Node = null, }; +pub const PointerTypeExpr = struct { + pointee_type: *Node, +}; + +pub const ManyPointerTypeExpr = struct { + element_type: *Node, +}; + +pub const DerefExpr = struct { + operand: *Node, +}; + pub const WhileExpr = struct { condition: *Node, body: *Node, diff --git a/src/codegen.zig b/src/codegen.zig index dd619dc..9b37b18 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -240,6 +240,7 @@ pub const CodeGen = struct { const elem_ty = Type.fromName(info.element_name) orelse unreachable; return c.LLVMVectorType(self.typeToLLVM(elem_ty), info.length); }, + .pointer_type, .many_pointer_type => c.LLVMPointerTypeInContext(self.context, 0), .any_type => self.getAnyStructType(), .meta_type => c.LLVMPointerTypeInContext(self.context, 0), }; @@ -347,6 +348,8 @@ pub const CodeGen = struct { .vector_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "vec[{d}]{s}", .{ info.length, info.element_name }), ty), .array_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "[{d}]{s}", .{ info.length, info.element_name }), ty), .slice_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "[]{s}", .{info.element_name}), ty), + .pointer_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "*{s}", .{info.pointee_name}), ty), + .many_pointer_type => |info| try self.getAnyTypeId(try std.fmt.allocPrint(self.allocator, "[*]{s}", .{info.element_name}), ty), .meta_type => ANY_TAG_TYPE, else => ANY_TAG_S32, }; @@ -409,6 +412,7 @@ pub const CodeGen = struct { _ = c.LLVMBuildStore(self.builder, val, alloca); break :blk c.LLVMBuildPtrToInt(self.builder, alloca, i64_ty, "any_slice"); }, + .pointer_type, .many_pointer_type => c.LLVMBuildPtrToInt(self.builder, val, i64_ty, "any_ptr"), .meta_type => blk: { // Meta type is a pointer (global string) — convert via ptrtoint break :blk c.LLVMBuildPtrToInt(self.builder, val, i64_ty, "any_type"); @@ -754,6 +758,20 @@ pub const CodeGen = struct { const elem_name = elem_type.displayName(self.allocator) catch unreachable; return .{ .slice_type = .{ .element_name = elem_name } }; } + // Pointer type: *T + if (tn.data == .pointer_type_expr) { + const pte = tn.data.pointer_type_expr; + const pointee_type = self.resolveType(pte.pointee_type); + const pointee_name = pointee_type.displayName(self.allocator) catch unreachable; + return .{ .pointer_type = .{ .pointee_name = pointee_name } }; + } + // Many-pointer type: [*]T + if (tn.data == .many_pointer_type_expr) { + const mpte = tn.data.many_pointer_type_expr; + const elem_type = self.resolveType(mpte.element_type); + const elem_name = elem_type.displayName(self.allocator) catch unreachable; + return .{ .many_pointer_type = .{ .element_name = elem_name } }; + } // Parameterized type: Vector(N, T) or generic struct instantiation if (tn.data == .parameterized_type_expr) { const pte = tn.data.parameterized_type_expr; @@ -1952,6 +1970,18 @@ pub const CodeGen = struct { return self.genIndexAssignment(asgn); } + // Deref assignment: p.* = value; + if (asgn.target.data == .deref_expr) { + const de = asgn.target.data.deref_expr; + const ptr_val = try self.genExpr(de.operand); + const ptr_ty = self.inferType(de.operand); + if (!ptr_ty.isPointer()) return self.emitError("dereference assignment requires a pointer"); + const pointee_ty = self.resolveTypeFromName(ptr_ty.pointer_type.pointee_name) orelse return self.emitError("unknown pointee type"); + const new_val = try self.genExprAsType(asgn.value, pointee_ty); + _ = c.LLVMBuildStore(self.builder, new_val, ptr_val); + return null; + } + // Target must be an identifier if (asgn.target.data != .identifier) return self.emitError("assignment target must be a variable"); const name = asgn.target.data.identifier.name; @@ -2034,10 +2064,37 @@ pub const CodeGen = struct { fn genFieldAssignment(self: *CodeGen, asgn: ast.Assignment) !c.LLVMValueRef { const fa = asgn.target.data.field_access; + + // Handle deref assignment: p.* = val + if (fa.object.data == .identifier and std.mem.eql(u8, fa.field, "*")) { + // This won't happen — p.* is parsed as deref_expr, not field_access + // Kept as safeguard + } + // Object must be an identifier for now if (fa.object.data != .identifier) return self.emitError("field assignment target must be a variable"); const obj_name = fa.object.data.identifier.name; const entry = self.named_values.get(obj_name) orelse return self.emitErrorFmt("undefined variable '{s}'", .{obj_name}); + + // Pointer auto-deref: p.field = val + if (entry.ty.isPointer()) { + const pointee_ty = self.resolveTypeFromName(entry.ty.pointer_type.pointee_name) orelse + return self.emitError("unknown pointee type for field assignment"); + if (pointee_ty.isStruct()) { + const sname = pointee_ty.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const fi = self.findFieldIndex(info, fa.field) orelse return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fa.field, sname }); + const field_ty = info.field_types[fi]; + const loaded_ptr = c.LLVMBuildLoad2(self.builder, + c.LLVMPointerTypeInContext(self.context, 0), entry.ptr, "ptr_load"); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, loaded_ptr, @intCast(fi), "pfield_ptr"); + const rhs = try self.genExprAsType(asgn.value, field_ty); + _ = c.LLVMBuildStore(self.builder, rhs, gep); + return null; + } + return self.emitError("field assignment through pointer requires a struct pointee"); + } + if (!entry.ty.isStruct()) return self.emitErrorFmt("field access on non-struct variable '{s}'", .{obj_name}); const sname = entry.ty.struct_type; @@ -2141,7 +2198,19 @@ pub const CodeGen = struct { _ = c.LLVMBuildStore(self.builder, val, gep_ptr); return null; } - return self.emitError("index assignment requires a string, array, or slice target"); + // Many-pointer index assignment: mp[i] = val + if (obj_ty.isManyPointer()) { + const elem_ty = self.resolveTypeFromName(obj_ty.many_pointer_type.element_name) orelse return self.emitError("unknown many-pointer element type"); + const elem_llvm_ty = self.typeToLLVM(elem_ty); + const ptr_val = try self.genExpr(ie.object); + const idx = try self.genExpr(ie.index); + const val = try self.genExprAsType(asgn.value, elem_ty); + var gep_indices = [_]c.LLVMValueRef{idx}; + const gep_ptr = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, ptr_val, &gep_indices, 1, "mptridx"); + _ = c.LLVMBuildStore(self.builder, val, gep_ptr); + return null; + } + return self.emitError("index assignment requires a string, array, slice, or [*] pointer target"); } fn unescapeString(allocator: std.mem.Allocator, raw: []const u8) ![]u8 { @@ -2239,6 +2308,9 @@ pub const CodeGen = struct { // xx requires a target type context (assignment, declaration, argument, return) return self.emitError("'xx' cast requires a target type context"); } + if (unop.op == .address_of) { + return self.genAddressOf(unop.operand); + } const operand = try self.genExpr(unop.operand); return switch (unop.op) { .negate => blk: { @@ -2256,7 +2328,7 @@ pub const CodeGen = struct { c.LLVMBuildNeg(self.builder, operand, "negtmp"); }, .not => c.LLVMBuildNot(self.builder, operand, "nottmp"), - .xx => unreachable, + .xx, .address_of => unreachable, }; }, .struct_literal => |sl| { @@ -2355,6 +2427,18 @@ pub const CodeGen = struct { c.LLVMPositionBuilderAtEnd(self.builder, dead_bb); return null; }, + .deref_expr => |de| { + const ptr_val = try self.genExpr(de.operand); + const ptr_ty = self.inferType(de.operand); + if (ptr_ty.isPointer()) { + const pointee_ty = self.resolveTypeFromName(ptr_ty.pointer_type.pointee_name) orelse return self.emitError("unknown pointee type"); + return c.LLVMBuildLoad2(self.builder, self.typeToLLVM(pointee_ty), ptr_val, "deref"); + } + return self.emitError("dereference requires a pointer type"); + }, + .null_literal => { + return c.LLVMConstNull(c.LLVMPointerTypeInContext(self.context, 0)); + }, .comptime_expr => |ct| { return self.genExpr(ct.expr); }, @@ -2362,6 +2446,57 @@ pub const CodeGen = struct { } } + fn genAddressOf(self: *CodeGen, operand: *Node) !c.LLVMValueRef { + // &x — return the alloca pointer of the variable + if (operand.data == .identifier) { + if (self.named_values.get(operand.data.identifier.name)) |entry| { + return entry.ptr; + } + return self.emitErrorFmt("undefined variable '{s}'", .{operand.data.identifier.name}); + } + // &expr[i] — return GEP pointer to the indexed element + if (operand.data == .index_expr) { + const ie = operand.data.index_expr; + const obj_ty = self.inferType(ie.object); + const idx = try self.genExpr(ie.index); + + if (obj_ty.isArray()) { + if (ie.object.data == .identifier) { + if (self.named_values.get(ie.object.data.identifier.name)) |entry| { + const zero = c.LLVMConstInt(c.LLVMInt32TypeInContext(self.context), 0, 0); + var indices = [_]c.LLVMValueRef{ zero, idx }; + return c.LLVMBuildGEP2(self.builder, self.typeToLLVM(obj_ty), entry.ptr, &indices, 2, "addr_elem"); + } + } + } + if (obj_ty.isSlice() or obj_ty == .string_type) { + const slice_val = try self.genExpr(ie.object); + const ptr = c.LLVMBuildExtractValue(self.builder, slice_val, 0, "slice_ptr"); + const elem_ty = if (obj_ty.isSlice()) + obj_ty.sliceElementType() orelse return self.emitError("unknown slice element type") + else + Type.u(8); + var gep_indices = [_]c.LLVMValueRef{idx}; + return c.LLVMBuildGEP2(self.builder, self.typeToLLVM(elem_ty), ptr, &gep_indices, 1, "addr_elem"); + } + } + // &s.field — return GEP pointer to the struct field + if (operand.data == .field_access) { + const fa = operand.data.field_access; + if (fa.object.data == .identifier) { + if (self.named_values.get(fa.object.data.identifier.name)) |entry| { + if (entry.ty.isStruct()) { + const sname = entry.ty.struct_type; + const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const idx = self.findFieldIndex(info, fa.field) orelse return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fa.field, sname }); + return c.LLVMBuildStructGEP2(self.builder, info.llvm_type, entry.ptr, @intCast(idx), "addr_field"); + } + } + } + } + return self.emitError("address-of requires a variable, index, or field expression"); + } + fn registerStructType(self: *CodeGen, sd: ast.StructDecl) anyerror!void { // Generic struct: store as template instead of registering now if (sd.type_params.len > 0) { @@ -3335,6 +3470,34 @@ pub const CodeGen = struct { // Check if the object is a struct or vector variable if (fa.object.data == .identifier) { if (self.named_values.get(fa.object.data.identifier.name)) |entry| { + // Pointer auto-deref: p.field → p.*.field + if (entry.ty.isPointer()) { + const pointee_ty = self.resolveTypeFromName(entry.ty.pointer_type.pointee_name) orelse + return self.emitError("unknown pointee type for auto-deref"); + const loaded_ptr = c.LLVMBuildLoad2(self.builder, + c.LLVMPointerTypeInContext(self.context, 0), entry.ptr, "ptr_load"); + if (pointee_ty.isStruct()) { + const sname = pointee_ty.struct_type; + const info = self.struct_types.get(sname) orelse + return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); + const idx = self.findFieldIndex(info, fa.field) orelse + return self.emitErrorFmt("no field '{s}' in struct '{s}'", .{ fa.field, sname }); + const gep = c.LLVMBuildStructGEP2(self.builder, info.llvm_type, loaded_ptr, + @intCast(idx), "pfield"); + return c.LLVMBuildLoad2(self.builder, self.typeToLLVM(info.field_types[idx]), gep, "pfieldval"); + } + if (pointee_ty.isSlice()) { + const slice_val = c.LLVMBuildLoad2(self.builder, self.getStringStructType(), loaded_ptr, "pslice_load"); + if (std.mem.eql(u8, fa.field, "len")) { + return c.LLVMBuildExtractValue(self.builder, slice_val, 1, "pslice_len"); + } + if (std.mem.eql(u8, fa.field, "ptr")) { + return c.LLVMBuildExtractValue(self.builder, slice_val, 0, "pslice_ptr"); + } + return self.emitErrorFmt("no field '{s}' on *slice (available: .len, .ptr)", .{fa.field}); + } + return self.emitErrorFmt("no field '{s}' on pointer", .{fa.field}); + } if (entry.ty.isStruct()) { const sname = entry.ty.struct_type; const info = self.struct_types.get(sname) orelse return self.emitErrorFmt("unknown struct type '{s}'", .{sname}); @@ -3494,7 +3657,17 @@ pub const CodeGen = struct { const gep = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, ptr, &gep_indices, 1, "sliceidx"); return c.LLVMBuildLoad2(self.builder, elem_llvm_ty, gep, "sliceval"); } - return self.emitError("index expression requires an array, vector, string, or slice"); + // Many-pointer indexing: [*]T — GEP + load + if (obj_ty.isManyPointer()) { + const elem_ty = self.resolveTypeFromName(obj_ty.many_pointer_type.element_name) orelse return self.emitError("unknown many-pointer element type"); + const elem_llvm_ty = self.typeToLLVM(elem_ty); + const ptr_val = try self.genExpr(ie.object); + const idx = try self.genExpr(ie.index); + var gep_indices = [_]c.LLVMValueRef{idx}; + const gep = c.LLVMBuildGEP2(self.builder, elem_llvm_ty, ptr_val, &gep_indices, 1, "mptridx"); + return c.LLVMBuildLoad2(self.builder, elem_llvm_ty, gep, "mptrval"); + } + return self.emitError("index expression requires an array, vector, string, slice, or [*] pointer"); } fn genSliceExpr(self: *CodeGen, se: ast.SliceExpr) !c.LLVMValueRef { @@ -5224,6 +5397,22 @@ pub const CodeGen = struct { return null; } + /// Resolve a type name to a Type, checking primitives + registered structs/unions/enums. + /// Unlike Type.fromName which only handles primitives. + fn resolveTypeFromName(self: *CodeGen, name: []const u8) ?Type { + // Primitives + if (Type.fromName(name)) |t| return t; + // Structs + if (self.struct_types.contains(name)) return .{ .struct_type = name }; + // Unions + if (self.union_types.contains(name)) return .{ .union_type = name }; + // Enums + if (self.enum_types.contains(name)) return .{ .enum_type = name }; + // Type aliases + if (self.type_aliases.get(name)) |target| return self.resolveTypeFromName(target); + return null; + } + fn inferType(self: *CodeGen, node: *Node) Type { return switch (node.data) { .int_literal => Type.s(32), @@ -5376,10 +5565,25 @@ pub const CodeGen = struct { return Type.s(32); }, .unary_op => |unop| { + if (unop.op == .address_of) { + const operand_ty = self.inferType(unop.operand); + const name = operand_ty.displayName(self.allocator) catch return Type.s(32); + return .{ .pointer_type = .{ .pointee_name = name } }; + } return self.inferType(unop.operand); }, + .deref_expr => |de| { + const ptr_ty = self.inferType(de.operand); + if (ptr_ty.isPointer()) return self.resolveTypeFromName(ptr_ty.pointer_type.pointee_name) orelse Type.s(32); + return Type.s(32); + }, + .null_literal => return .{ .pointer_type = .{ .pointee_name = "void" } }, .field_access => |fa| { - const obj_ty = self.inferType(fa.object); + var obj_ty = self.inferType(fa.object); + // Auto-deref: if pointer, unwrap to pointee + if (obj_ty.isPointer()) { + obj_ty = self.resolveTypeFromName(obj_ty.pointer_type.pointee_name) orelse Type.s(32); + } if (obj_ty == .string_type) { if (std.mem.eql(u8, fa.field, "len")) return Type.s(32); if (std.mem.eql(u8, fa.field, "ptr")) return .string_type; @@ -5426,6 +5630,9 @@ pub const CodeGen = struct { if (obj_ty.isSlice()) { return obj_ty.sliceElementType() orelse Type.s(32); } + if (obj_ty.isManyPointer()) { + return self.resolveTypeFromName(obj_ty.many_pointer_type.element_name) orelse Type.s(32); + } return Type.s(32); }, .slice_expr => |se| { diff --git a/src/comptime.zig b/src/comptime.zig index 67bc218..50d6f61 100644 --- a/src/comptime.zig +++ b/src/comptime.zig @@ -456,6 +456,7 @@ pub const Compiler = struct { .negate => try self.emit(.negate), .not => try self.emit(.not), .xx => {}, // cast — handle later + .address_of => {}, // pointers not supported in comptime } }, .comptime_expr => |ct| { diff --git a/src/lexer.zig b/src/lexer.zig index 0c16247..4bbe587 100644 --- a/src/lexer.zig +++ b/src/lexer.zig @@ -164,6 +164,7 @@ pub const Lexer = struct { } return self.makeToken(.percent, start, self.index); }, + '&' => return self.makeToken(.ampersand, start, self.index), '!' => { if (self.peek() == '=') { self.index += 1; diff --git a/src/lsp/server.zig b/src/lsp/server.zig index 5ac347a..f476ed8 100644 --- a/src/lsp/server.zig +++ b/src/lsp/server.zig @@ -733,6 +733,7 @@ pub const Server = struct { .kw_xx, .kw_and, .kw_or, + .kw_null, .hash_run, .hash_import, .hash_insert, @@ -765,6 +766,7 @@ pub const Server = struct { .slash_equal, .percent, .percent_equal, + .ampersand, .arrow, .fat_arrow, .colon_colon, diff --git a/src/parser.zig b/src/parser.zig index 952b871..a93f8eb 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -206,7 +206,14 @@ pub const Parser = struct { fn parseTypeExpr(self: *Parser) anyerror!*Node { const start = self.current.loc.start; - // Array type: [N]T or Slice type: []T + // Pointer type: *T + if (self.current.tag == .star) { + self.advance(); // skip '*' + const pointee_type = try self.parseTypeExpr(); + return try self.createNode(start, .{ .pointer_type_expr = .{ .pointee_type = pointee_type } }); + } + + // Array type: [N]T, Slice type: []T, Many-pointer type: [*]T if (self.current.tag == .l_bracket) { self.advance(); // skip '[' if (self.current.tag == .r_bracket) { @@ -215,6 +222,13 @@ pub const Parser = struct { const elem_type = try self.parseTypeExpr(); return try self.createNode(start, .{ .slice_type_expr = .{ .element_type = elem_type } }); } + if (self.current.tag == .star) { + // Many-pointer type: [*]T + self.advance(); // skip '*' + try self.expect(.r_bracket); // expect ']' + const elem_type = try self.parseTypeExpr(); + return try self.createNode(start, .{ .many_pointer_type_expr = .{ .element_type = elem_type } }); + } const len_node = try self.parseExpr(); try self.expect(.r_bracket); const elem_type = try self.parseTypeExpr(); @@ -819,6 +833,12 @@ pub const Parser = struct { const operand = try self.parseUnary(); return try self.createNode(start, .{ .unary_op = .{ .op = .xx, .operand = operand } }); } + if (self.current.tag == .ampersand) { + const start = self.current.loc.start; + self.advance(); + const operand = try self.parseUnary(); + return try self.createNode(start, .{ .unary_op = .{ .op = .address_of, .operand = operand } }); + } // cast(Type) expr — prefix operator with type parameter if (self.current.tag == .identifier and std.mem.eql(u8, self.tokenSlice(self.current), "cast")) { const saved_lexer = self.lexer; @@ -896,6 +916,10 @@ pub const Parser = struct { .elements = try elements.toOwnedSlice(self.allocator), .type_expr = expr, } }); + } else if (self.current.tag == .star) { + // Dereference: expr.* + self.advance(); + expr = try self.createNode(expr.span.start, .{ .deref_expr = .{ .operand = expr } }); } else { // Field access if (self.current.tag != .identifier) { @@ -985,6 +1009,10 @@ pub const Parser = struct { self.advance(); return try self.createNode(start, .{ .bool_literal = .{ .value = false } }); }, + .kw_null => { + self.advance(); + return try self.createNode(start, .{ .null_literal = {} }); + }, .identifier => { const name = self.tokenSlice(self.current); // Check if this identifier is a type name (e.g. s32, u8, s128) diff --git a/src/sema.zig b/src/sema.zig index 08f1d65..6e6be55 100644 --- a/src/sema.zig +++ b/src/sema.zig @@ -231,6 +231,20 @@ pub const Analyzer = struct { const elem_name = elem_type.displayName(self.allocator) catch return .void_type; return .{ .slice_type = .{ .element_name = elem_name } }; } + // Pointer type: *T + if (tn.data == .pointer_type_expr) { + const pte = tn.data.pointer_type_expr; + const pointee_type = self.resolveTypeNode(pte.pointee_type); + const pointee_name = pointee_type.displayName(self.allocator) catch return .void_type; + return .{ .pointer_type = .{ .pointee_name = pointee_name } }; + } + // Many-pointer type: [*]T + if (tn.data == .many_pointer_type_expr) { + const mpte = tn.data.many_pointer_type_expr; + const elem_type = self.resolveTypeNode(mpte.element_type); + const elem_name = elem_type.displayName(self.allocator) catch return .void_type; + return .{ .many_pointer_type = .{ .element_name = elem_name } }; + } // Parameterized type: Vector(N, T) or generic struct if (tn.data == .parameterized_type_expr) { // For now, skip generic instantiation — just return void_type @@ -375,6 +389,12 @@ pub const Analyzer = struct { } return .void_type; }, + .deref_expr => |de| { + const ptr_ty = self.inferExprType(de.operand); + if (ptr_ty.isPointer()) return ptr_ty.pointerPointeeType() orelse .void_type; + return .void_type; + }, + .null_literal => .void_type, .array_literal => .void_type, .type_expr => |te| .{ .meta_type = .{ .name = te.name } }, else => .void_type, @@ -645,11 +665,17 @@ pub const Analyzer = struct { .import_decl, .array_type_expr, .slice_type_expr, + .pointer_type_expr, + .many_pointer_type_expr, + .null_literal, .array_literal, .parameterized_type_expr, .index_expr, .slice_expr, => {}, + .deref_expr => |de| { + try self.analyzeNode(de.operand); + }, .namespace_decl => |ns| { for (ns.decls) |d| { try self.analyzeNode(d); @@ -682,6 +708,8 @@ pub const Analyzer = struct { .array_literal, .index_expr, .slice_expr, + .deref_expr, + .null_literal, .type_expr, .insert_expr, .while_expr, @@ -894,11 +922,17 @@ pub fn findNodeAtOffset(node: *Node, offset: u32) ?*Node { .import_decl, .array_type_expr, .slice_type_expr, + .pointer_type_expr, + .many_pointer_type_expr, + .null_literal, .array_literal, .parameterized_type_expr, .index_expr, .slice_expr, => {}, + .deref_expr => |de| { + if (findNodeAtOffset(de.operand, offset)) |found| return found; + }, .namespace_decl => |ns| { for (ns.decls) |d| { if (findNodeAtOffset(d, offset)) |found| return found; diff --git a/src/token.zig b/src/token.zig index e946907..574448e 100644 --- a/src/token.zig +++ b/src/token.zig @@ -27,6 +27,7 @@ pub const Tag = enum { kw_and, kw_or, kw_Type, // Type (metatype keyword) + kw_null, // null // Symbols colon, // : @@ -57,6 +58,7 @@ pub const Tag = enum { slash_equal, // /= percent, // % percent_equal, // %= + ampersand, // & // Delimiters l_paren, // ( @@ -109,6 +111,8 @@ pub const Tag = enum { .slash_equal => "/=", .percent => "%", .percent_equal => "%=", + .ampersand => "&", + .kw_null => "null", .l_paren => "(", .r_paren => ")", .l_brace => "{", @@ -166,6 +170,7 @@ pub const keywords = std.StaticStringMap(Tag).initComptime(.{ .{ "and", .kw_and }, .{ "or", .kw_or }, .{ "Type", .kw_Type }, + .{ "null", .kw_null }, }); pub fn getKeyword(bytes: []const u8) ?Tag { diff --git a/src/types.zig b/src/types.zig index 4434c21..82ec214 100644 --- a/src/types.zig +++ b/src/types.zig @@ -18,6 +18,8 @@ pub const Type = union(enum) { union_type: []const u8, array_type: ArrayTypeInfo, slice_type: SliceTypeInfo, + pointer_type: PointerTypeInfo, + many_pointer_type: ManyPointerTypeInfo, vector_type: VectorTypeInfo, any_type, meta_type: MetaTypeInfo, @@ -26,6 +28,14 @@ pub const Type = union(enum) { element_name: []const u8, }; + pub const PointerTypeInfo = struct { + pointee_name: []const u8, + }; + + pub const ManyPointerTypeInfo = struct { + element_name: []const u8, + }; + pub const ArrayTypeInfo = struct { element_name: []const u8, length: u32, @@ -56,6 +66,14 @@ pub const Type = union(enum) { if (std.mem.eql(u8, name, "f32")) return .f32; if (std.mem.eql(u8, name, "f64")) return .f64; if (std.mem.eql(u8, name, "Any")) return .any_type; + // Many-pointer: [*]T + if (name.len >= 4 and name[0] == '[' and name[1] == '*' and name[2] == ']') { + return .{ .many_pointer_type = .{ .element_name = name[3..] } }; + } + // Pointer: *T + if (name.len >= 2 and name[0] == '*') { + return .{ .pointer_type = .{ .pointee_name = name[1..] } }; + } // Variable-width integers: s1..s64, u1..u64 if (name.len >= 2 and (name[0] == 's' or name[0] == 'u')) { const width = std.fmt.parseInt(u8, name[1..], 10) catch return null; @@ -112,6 +130,34 @@ pub const Type = union(enum) { }; } + pub fn isPointer(self: Type) bool { + return switch (self) { + .pointer_type => true, + else => false, + }; + } + + pub fn pointerPointeeType(self: Type) ?Type { + return switch (self) { + .pointer_type => |info| fromName(info.pointee_name), + else => null, + }; + } + + pub fn isManyPointer(self: Type) bool { + return switch (self) { + .many_pointer_type => true, + else => false, + }; + } + + pub fn manyPointerElementType(self: Type) ?Type { + return switch (self) { + .many_pointer_type => |info| fromName(info.element_name), + else => null, + }; + } + pub fn isArray(self: Type) bool { return switch (self) { .array_type => true, @@ -184,6 +230,19 @@ pub const Type = union(enum) { if (self.isSlice() and target.isSlice()) { return std.mem.eql(u8, self.slice_type.element_name, target.slice_type.element_name); } + // Pointer types: compare pointee names by content, null (*void) → any pointer + if (self.isPointer() and target.isPointer()) { + if (std.mem.eql(u8, self.pointer_type.pointee_name, "void")) return true; + return std.mem.eql(u8, self.pointer_type.pointee_name, target.pointer_type.pointee_name); + } + // Many-pointer types: compare element names by content + if (self.isManyPointer() and target.isManyPointer()) { + return std.mem.eql(u8, self.many_pointer_type.element_name, target.many_pointer_type.element_name); + } + // *T → [*]T: pointer to element is implicitly convertible to many-pointer + if (self.isPointer() and target.isManyPointer()) { + return std.mem.eql(u8, self.pointer_type.pointee_name, target.many_pointer_type.element_name); + } const src_float = self.isFloat(); const dst_float = target.isFloat(); @@ -250,6 +309,18 @@ pub const Type = union(enum) { try buf.appendSlice(allocator, info.element_name); return try buf.toOwnedSlice(allocator); }, + .pointer_type => |info| { + var buf = std.ArrayList(u8).empty; + try buf.append(allocator, '*'); + try buf.appendSlice(allocator, info.pointee_name); + return try buf.toOwnedSlice(allocator); + }, + .many_pointer_type => |info| { + var buf = std.ArrayList(u8).empty; + try buf.appendSlice(allocator, "[*]"); + try buf.appendSlice(allocator, info.element_name); + return try buf.toOwnedSlice(allocator); + }, .array_type => |info| { var buf = std.ArrayList(u8).empty; try buf.append(allocator, '[');