From 4aff0041189bc4160d6d841cfc7d8cc82e59ddf2 Mon Sep 17 00:00:00 2001 From: agra Date: Tue, 17 Feb 2026 19:26:43 +0200 Subject: [PATCH] http server --- examples/32-http-server.sx | 14 +- examples/modules/socket.sx | 8 +- examples/modules/std.sx | 49 ++++--- specs.md | 13 +- src/builtins.zig | 14 +- src/codegen.zig | 52 +++---- src/comptime.zig | 280 +++++++++++++++++++++++++++++++++++-- src/lsp/server.zig | 10 +- 8 files changed, 356 insertions(+), 84 deletions(-) diff --git a/examples/32-http-server.sx b/examples/32-http-server.sx index c77dc00..a78aa75 100644 --- a/examples/32-http-server.sx +++ b/examples/32-http-server.sx @@ -15,15 +15,9 @@ main :: () -> s32 { opt : s32 = 1; setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, @opt, 4); - addr := SockAddr.{ - sin_len = 16, - sin_family = 2, - sin_port = htons(PORT), - sin_addr = 0, - sin_zero = 0 - }; + addr := SockAddr.{ sin_len = 16, sin_family = 2, sin_port = htons(PORT) }; - if bind(fd, @addr, 16) < 0 { + if bind(fd, addr, 16) < 0 { print("error: bind()\n"); return 1; } @@ -40,8 +34,8 @@ main :: () -> s32 { if client < 0 { continue; } // Read request - buf := alloc(4096); - read(client, buf, 4096); + buf : [4096]u8 = ---; + read(client, buf, buf.len); // Send response body := "

Hello from sx!

"; diff --git a/examples/modules/socket.sx b/examples/modules/socket.sx index fcacaae..3871f6d 100644 --- a/examples/modules/socket.sx +++ b/examples/modules/socket.sx @@ -9,8 +9,8 @@ setsockopt :: (fd: s32, level: s32, optname: s32, optval: *s32, optlen: u32) -> bind :: (fd: s32, addr: *SockAddr, addrlen: u32) -> s32 #foreign libc; listen :: (fd: s32, backlog: s32) -> s32 #foreign libc; accept :: (fd: s32, addr: *SockAddr, addrlen: *u32) -> s32 #foreign libc; -read :: (fd: s32, buf: [:0]u8, count: s64) -> s64 #foreign libc; -write :: (fd: s32, buf: [:0]u8, count: s64) -> s64 #foreign libc; +read :: (fd: s32, buf: [*]u8, count: s64) -> s64 #foreign libc; +write :: (fd: s32, buf: [*]u8, count: s64) -> s64 #foreign libc; close :: (fd: s32) -> s32 #foreign libc; // Constants (macOS) @@ -24,8 +24,8 @@ SockAddr :: struct { sin_len: u8; sin_family: u8; sin_port: u16; - sin_addr: u32; - sin_zero: u64; + sin_addr: u32 = 0; + sin_zero: u64 = 0; } htons :: (port: s64) -> u16 { diff --git a/examples/modules/std.sx b/examples/modules/std.sx index f4d3df2..eee22f7 100644 --- a/examples/modules/std.sx +++ b/examples/modules/std.sx @@ -4,9 +4,9 @@ sqrt :: (x: $T) -> T #builtin; sin :: (x: $T) -> T #builtin; cos :: (x: $T) -> T #builtin; size_of :: ($T: Type) -> s64 #builtin; -alloc :: (size: s64) -> string #builtin; malloc :: (size: s64) -> *void #builtin; memcpy :: (dst: *void, src: *void, size: s64) -> *void #builtin; +memset :: (dst: *void, val: s64, size: s64) -> void #builtin; free :: (ptr: *void) -> void #builtin; type_of :: (val: $T) -> Type #builtin; type_name :: ($T: Type) -> string #builtin; @@ -18,6 +18,20 @@ field_value_int :: ($T: Type, idx: s64) -> s64 #builtin; field_index :: ($T: Type, val: T) -> s64 #builtin; string :: []u8 #builtin; +CString :: union { + s: string; + struct { ptr: *void; len: s64; }; +} + +cstring :: (size: s64) -> string { + raw := malloc(size + 1); + memset(raw, 0, size + 1); + rs : CString = ---; + rs.ptr = raw; + rs.len = size; + rs.s; +} + int_to_string :: (n: s64) -> string { if n == 0 { return "0"; } neg := n < 0; @@ -26,7 +40,7 @@ int_to_string :: (n: s64) -> string { len := 0; while tmp > 0 { len += 1; tmp = tmp / 10; } total := if neg then len + 1 else len; - buf := alloc(total); + buf := cstring(total); i := total - 1; while v > 0 { buf[i] = (v % 10) + 48; @@ -53,18 +67,17 @@ float_to_string :: (f: f64) -> string { fl := fstr.len; prefix := if neg then 1 else 0; total := prefix + il + 1 + 6; - buf := alloc(total); + buf := cstring(total); pos := 0; if neg { buf[0] = 45; pos = 1; } - i := 0; - while i < il { buf[pos] = istr[i]; pos += 1; i += 1; } + memcpy(@buf[pos], istr.ptr, il); + pos = pos + il; buf[pos] = 46; pos += 1; pad := 6 - fl; - j := 0; - while j < pad { buf[pos] = 48; pos += 1; j += 1; } - k := 0; - while k < fl { buf[pos] = fstr[k]; pos += 1; k += 1; } + memset(@buf[pos], 48, pad); + pos = pos + pad; + memcpy(@buf[pos], fstr.ptr, fl); buf; } @@ -84,7 +97,7 @@ int_to_hex_string :: (n: s64) -> string { g3 := r3 % 65536; if g3 < 0 { g3 = g3 + 65536; } - buf := alloc(16); + buf := cstring(16); // Group 3: digits 0-3 (bits 48-63) i := 3; v := g3; @@ -133,21 +146,15 @@ int_to_hex_string :: (n: s64) -> string { concat :: (a: string, b: string) -> string { al := a.len; bl := b.len; - buf := alloc(al + bl); - i := 0; - while i < al { buf[i] = a[i]; i += 1; } - j := 0; - while j < bl { buf[al + j] = b[j]; j += 1; } + buf := cstring(al + bl); + memcpy(buf.ptr, a.ptr, al); + memcpy(@buf[al], b.ptr, bl); buf; } substr :: (s: string, start: s64, len: s64) -> string { - buf := alloc(len); - i := 0; - while i < len { - buf[i] = s[start + i]; - i += 1; - } + buf := cstring(len); + memcpy(buf.ptr, @s[start], len); buf; } diff --git a/specs.md b/specs.md index 0f753c1..d35502d 100644 --- a/specs.md +++ b/specs.md @@ -932,9 +932,14 @@ Built-in functions are declared in `std.sx` with the `#builtin` suffix, which te ### Math - `sqrt(x: $T) -> T` — square root (maps to LLVM intrinsic) +- `sin(x: $T) -> T` — sine (maps to LLVM intrinsic) +- `cos(x: $T) -> T` — cosine (maps to LLVM intrinsic) ### Memory -- `alloc(size: s64) -> string` — allocate `size` bytes of memory, returned as a string slice +- `malloc(size: s64) -> *void` — allocate `size` bytes of heap memory +- `free(ptr: *void) -> void` — free previously allocated memory +- `memcpy(dst: *void, src: *void, size: s64) -> *void` — copy `size` bytes from `src` to `dst` +- `memset(dst: *void, val: s64, size: s64) -> void` — fill `size` bytes at `dst` with `val` - `size_of($T: Type) -> s64` — size of type `T` in bytes ### Type Introspection @@ -943,7 +948,9 @@ Built-in functions are declared in `std.sx` with the `#builtin` suffix, which te - `field_count($T: Type) -> s64` — returns the number of fields (struct), variants (enum), or elements (vector) in type `T` - `field_name($T: Type, idx: s64) -> string` — returns the name of the `idx`-th field (struct) or variant (enum) of type `T` - `field_value(s: $T, idx: s64) -> Any` — returns the `idx`-th field (struct) or element (vector) of `s`, boxed as `Any` +- `field_value_int($T: Type, idx: s64) -> s64` — returns the integer value of the `idx`-th enum variant - `field_index($T: Type, val: T) -> s64` — returns the sequential variant index for an explicit enum value (reverse of `field_value_int`). Returns `-1` if no variant matches. +- `is_flags($T: Type) -> bool` — returns `true` if `T` is a flags enum (declared with `#flags`) ### Type Conversion - `cast(Type) expr` — prefix operator that converts `expr` to `Type`. Examples: `cast(s32) 3.14`, `cast(f64) n`. When `Type` is a runtime `Type` value inside a type-category match arm, the compiler generates a dispatch switch over all types in the category, monomorphizing the callee for each concrete type. @@ -1031,14 +1038,14 @@ Functions within a namespaced import can call each other without the namespace p mul :: (base: $T, exp: T) -> T { base * exp; } // modules/std/std.sx -print :: (str: string) -> void #builtin; +out :: (str: string) -> void #builtin; // main.sx std :: #import "modules/std.sx"; #import "modules/std/math.sx"; main :: () -> s32 { - std.print("hello there"); + std.out("hello there"); mul(5, 2); } ``` diff --git a/src/builtins.zig b/src/builtins.zig index dfaa760..3a9d091 100644 --- a/src/builtins.zig +++ b/src/builtins.zig @@ -3,10 +3,10 @@ const c = llvm.c; pub const Builtins = struct { printf_fn: c.LLVMValueRef, - calloc_fn: c.LLVMValueRef, malloc_fn: c.LLVMValueRef, free_fn: c.LLVMValueRef, memcpy_fn: c.LLVMValueRef, + memset_fn: c.LLVMValueRef, pub fn init(module: c.LLVMModuleRef, ctx: c.LLVMContextRef) Builtins { const ptr_type = c.LLVMPointerTypeInContext(ctx, 0); @@ -19,11 +19,6 @@ pub const Builtins = struct { const printf_type = c.LLVMFunctionType(i32_type, &printf_params, 1, 1); const printf_fn = c.LLVMAddFunction(module, "printf", printf_type); - // Declare: void* calloc(size_t count, size_t size) - var calloc_params = [_]c.LLVMTypeRef{ i64_type, i64_type }; - const calloc_type = c.LLVMFunctionType(ptr_type, &calloc_params, 2, 0); - const calloc_fn = c.LLVMAddFunction(module, "calloc", calloc_type); - // Declare: void* malloc(size_t size) var malloc_params = [_]c.LLVMTypeRef{i64_type}; const malloc_type = c.LLVMFunctionType(ptr_type, &malloc_params, 1, 0); @@ -39,12 +34,17 @@ pub const Builtins = struct { const memcpy_type = c.LLVMFunctionType(ptr_type, &memcpy_params, 3, 0); const memcpy_fn = c.LLVMAddFunction(module, "memcpy", memcpy_type); + // Declare: void* memset(void* s, int c, size_t n) + var memset_params = [_]c.LLVMTypeRef{ ptr_type, i32_type, i64_type }; + const memset_type = c.LLVMFunctionType(ptr_type, &memset_params, 3, 0); + const memset_fn = c.LLVMAddFunction(module, "memset", memset_type); + return .{ .printf_fn = printf_fn, - .calloc_fn = calloc_fn, .malloc_fn = malloc_fn, .free_fn = free_fn, .memcpy_fn = memcpy_fn, + .memset_fn = memset_fn, }; } }; diff --git a/src/codegen.zig b/src/codegen.zig index b8716f9..b94b7ff 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1319,7 +1319,7 @@ pub const CodeGen = struct { .void_val => self.constInt32(0), .pointer_val => c.LLVMConstNull(self.ptrType()), .null_val => c.LLVMConstNull(self.ptrType()), - .struct_val, .array_val, .type_val, .function_val => unreachable, + .struct_val, .array_val, .type_val, .function_val, .byte_ptr_val, .union_val => unreachable, }; } @@ -1451,13 +1451,21 @@ pub const CodeGen = struct { } } } - // Safety net: inline declarations that should have been hoisted + // Inline type declarations: resolve by registered name if (tn.data == .struct_decl) { const sn = tn.data.struct_decl.name; if (self.type_registry.get(sn)) |e| { if (e == .struct_info) return .{ .struct_type = sn }; } } + if (tn.data == .union_decl) { + const un = tn.data.union_decl.name; + if (self.type_registry.get(un)) |e| switch (e) { + .union_info => return .{ .union_type = un }, + .tagged_enum => return .{ .union_type = un }, + else => {}, + }; + } if (tn.data == .enum_decl) { const en = tn.data.enum_decl.name; if (self.type_registry.get(en)) |e| switch (e) { @@ -3565,17 +3573,14 @@ pub const CodeGen = struct { var hoisted = inline_sd; hoisted.name = synthetic_name; try self.registerStructType(hoisted); - type_node.data = .{ .type_expr = .{ .name = synthetic_name } }; }, .union_decl => |inline_ud| { var hoisted_ud = inline_ud; hoisted_ud.name = synthetic_name; try self.registerUnionType(hoisted_ud); - type_node.data = .{ .type_expr = .{ .name = synthetic_name } }; }, .enum_decl => |inline_ed| { if (inline_ed.variant_types.len > 0) { - // Tagged enum with payloads var hoisted = inline_ed; hoisted.name = synthetic_name; try self.registerTaggedEnum(hoisted); @@ -3587,7 +3592,6 @@ pub const CodeGen = struct { try self.enum_backing_types.put(synthetic_name, self.typeToLLVM(bt)); } } - type_node.data = .{ .type_expr = .{ .name = synthetic_name } }; }, else => {}, } @@ -4927,21 +4931,7 @@ pub const CodeGen = struct { return self.convertValue(val, src_ty, target_ty); } - fn genAlloc(self: *CodeGen, args: []const *Node) !c.LLVMValueRef { - if (args.len != 1) return self.emitError("alloc expects exactly 1 argument: alloc(size)"); - const builtins = try self.requireBuiltins(); - const size_val = try self.genExpr(args[0]); - const i64_type = self.i64Type(); - // calloc(size + 1, 1) — extra byte for null terminator - const one_i64 = c.LLVMConstInt(i64_type, 1, 0); - const size_plus_one = c.LLVMBuildAdd(self.builder, size_val, one_i64, "szp1"); - const calloc_fn = builtins.calloc_fn; - const calloc_ty = c.LLVMGlobalGetValueType(calloc_fn); - var calloc_args = [_]c.LLVMValueRef{ size_plus_one, one_i64 }; - const ptr = c.LLVMBuildCall2(self.builder, calloc_ty, calloc_fn, &calloc_args, 2, "alloc_ptr"); - // Build string slice: {ptr, size} - return self.buildStringSlice(ptr, size_val); - } + fn genMalloc(self: *CodeGen, args: []const *Node) !c.LLVMValueRef { if (args.len != 1) return self.emitError("malloc expects exactly 1 argument: malloc(size)"); @@ -4974,6 +4964,19 @@ pub const CodeGen = struct { return null; } + fn genMemset(self: *CodeGen, args: []const *Node) !c.LLVMValueRef { + if (args.len != 3) return self.emitError("memset expects 3 arguments: memset(dst, val, size)"); + const builtins = try self.requireBuiltins(); + const dst = try self.genExpr(args[0]); + const val = try self.genExpr(args[1]); + const size_val = try self.genExpr(args[2]); + const val_i32 = self.trunc(val, self.i32Type(), "memset_val"); + const fn_ty = c.LLVMGlobalGetValueType(builtins.memset_fn); + var call_args = [_]c.LLVMValueRef{ dst, val_i32, size_val }; + _ = c.LLVMBuildCall2(self.builder, fn_ty, builtins.memset_fn, &call_args, 3, ""); + return null; + } + fn genVectorExtract(self: *CodeGen, vec_val: c.LLVMValueRef, field: []const u8) !c.LLVMValueRef { if (field.len == 1) { const idx_val = componentToIndex(field[0]) orelse return self.emitErrorFmt("invalid vector component '{c}'", .{field[0]}); @@ -6882,10 +6885,10 @@ pub const CodeGen = struct { if (std.mem.eql(u8, base, "cos")) return self.genMathIntrinsic(call_node, "cos"); if (std.mem.eql(u8, base, "size_of")) return self.genSizeOf(call_node); if (std.mem.eql(u8, base, "cast")) return self.genCast(call_node); - if (std.mem.eql(u8, base, "alloc")) return self.genAlloc(call_node.args); if (std.mem.eql(u8, base, "malloc")) return self.genMalloc(call_node.args); if (std.mem.eql(u8, base, "free")) return self.genFree(call_node.args); if (std.mem.eql(u8, base, "memcpy")) return self.genMemcpy(call_node.args); + if (std.mem.eql(u8, base, "memset")) return self.genMemset(call_node.args); if (std.mem.eql(u8, base, "type_of")) return self.genTypeOf(call_node); if (std.mem.eql(u8, base, "type_name")) return self.genTypeName(call_node); if (std.mem.eql(u8, base, "field_count")) return self.genFieldCount(call_node); @@ -7144,11 +7147,10 @@ pub const CodeGen = struct { if (call_node.args.len > 0) return self.resolveType(call_node.args[0]); return Type.s(64); } - // Built-in: alloc returns string - if (std.mem.eql(u8, base_name, "alloc")) return .string_type; if (std.mem.eql(u8, base_name, "malloc")) return .{ .pointer_type = .{ .pointee_name = "void" } }; if (std.mem.eql(u8, base_name, "free")) return .void_type; - if (std.mem.eql(u8, base_name, "memcpy")) return .void_type; + if (std.mem.eql(u8, base_name, "memcpy")) return .{ .pointer_type = .{ .pointee_name = "void" } }; + if (std.mem.eql(u8, base_name, "memset")) return .void_type; // Check generic templates — infer return type from widened bindings const template = self.generic_templates.get(callee_name) orelse blk: { // Intra-namespace fallback diff --git a/src/comptime.zig b/src/comptime.zig index 48dc6df..3168664 100644 --- a/src/comptime.zig +++ b/src/comptime.zig @@ -21,12 +21,19 @@ pub const Value = union(enum) { type_val: Type, function_val: FunctionVal, pointer_val: PointerValue, + byte_ptr_val: BytePtr, + union_val: UnionValue, null_val: void, pub const PointerValue = struct { target: [*]Value, }; + pub const BytePtr = struct { + data: []u8, + offset: usize, + }; + pub const StructValue = struct { type_name: []const u8, field_names: []const []const u8, @@ -37,6 +44,11 @@ pub const Value = union(enum) { elements: []Value, }; + pub const UnionValue = struct { + type_name: []const u8, + words: []Value, + }; + pub const FunctionVal = struct { name: []const u8, param_count: u8, @@ -123,6 +135,19 @@ pub const Value = union(enum) { const inner = try pv.target[0].format(allocator); return std.fmt.allocPrint(allocator, "@{s}", .{inner}); }, + .byte_ptr_val => |sp| std.fmt.allocPrint(allocator, "", .{sp.offset}), + .union_val => |v| { + var buf: std.ArrayList(u8) = .empty; + try buf.appendSlice(allocator, v.type_name); + try buf.append(allocator, '{'); + for (v.words, 0..) |w, i| { + if (i > 0) try buf.appendSlice(allocator, ", "); + const ws = try w.format(allocator); + try buf.appendSlice(allocator, ws); + } + try buf.append(allocator, '}'); + return buf.items; + }, .null_val => allocator.dupe(u8, "null"), }; } @@ -208,16 +233,24 @@ pub const Instruction = union(enum) { concat, format_to_string, // convert top-of-stack value to string representation + // Unions + make_union: UnionMake, + get_union_field: UnionFieldAccess, + pub const CastInfo = struct { to: ValueKind }; pub const CallInfo = struct { func_name: []const u8, arg_count: u8 }; pub const BuiltinCall = struct { id: BuiltinId, arg_count: u8 }; pub const StructMake = struct { type_name: []const u8, field_count: u16, field_names: []const []const u8 }; pub const FnRef = struct { name: []const u8, param_count: u8 }; + pub const UnionMake = struct { type_name: []const u8, word_count: u16 }; + pub const UnionFieldAccess = struct { word_offset: u16, field_type: UnionFieldType }; }; +pub const UnionFieldType = enum { int, float, bool_k, pointer, string }; + pub const ValueKind = enum { int, float, f32_k, bool_k, string }; -pub const BuiltinId = enum { print, out, sqrt, size_of, cast, alloc }; +pub const BuiltinId = enum { print, out, sqrt, size_of, cast, malloc, free, memcpy, memset }; /// A compiled function or expression — a flat sequence of instructions. pub const Chunk = struct { @@ -273,6 +306,7 @@ pub const Compiler = struct { const Local = struct { name: []const u8, depth: u16, + type_name: ?[]const u8 = null, }; pub fn init(allocator: std.mem.Allocator, sema_result: ?*const sema.SemaResult, root_decls: []const *Node, cg: ?*codegen_mod.CodeGen) Compiler { @@ -347,9 +381,13 @@ pub const Compiler = struct { } /// Look up a struct field index by name, handling pointer auto-deref. + /// Also resolves promoted fields from anonymous struct variants of unions. fn resolveFieldIndex(self: *Compiler, object: *Node, field: []const u8) ?u16 { if (self.sema_result) |sr| { - const obj_ty = sr.type_map.get(object) orelse return null; + const obj_ty = sr.type_map.get(object) orelse { + // Sema doesn't have type info — try union fallback + return self.resolveFieldViaUnion(object, field); + }; const struct_name: ?[]const u8 = if (obj_ty.isStruct()) obj_ty.struct_type else if (obj_ty.isPointer()) @@ -365,10 +403,93 @@ pub const Compiler = struct { } } } + // Fall through to union fallback + return self.resolveFieldViaUnion(object, field); + } + return self.resolveFieldViaUnion(object, field); + } + + fn resolveFieldViaUnion(self: *Compiler, object: *Node, field: []const u8) ?u16 { + const tname = self.getLocalTypeName(object) orelse return null; + return self.resolveUnionPromotedField(tname, field); + } + + /// Find a union declaration in root_decls by name and return its word count + /// (number of 8-byte slots needed for the largest variant). + fn findUnionWordCount(self: *Compiler, type_name: []const u8) ?u16 { + for (self.root_decls) |decl| { + if (decl.data == .union_decl) { + const ud = decl.data.union_decl; + if (std.mem.eql(u8, ud.name, type_name)) { + var max_words: u16 = 0; + for (ud.field_types) |ft| { + var words: u16 = 1; // default: single-word variant + if (ft.data == .struct_decl) { + words = @intCast(ft.data.struct_decl.field_names.len); + } else if (ft.data == .type_expr) { + if (Type.fromTypeExpr(ft)) |ty| { + // string = {ptr, len} = 2 words + if (ty == .string_type) words = 2; + } + } + if (words > max_words) max_words = words; + } + return max_words; + } + } } return null; } + /// Find a union declaration in root_decls by name. + fn findUnionDecl(self: *Compiler, type_name: []const u8) ?ast.UnionDecl { + for (self.root_decls) |decl| { + if (decl.data == .union_decl) { + const ud = decl.data.union_decl; + if (std.mem.eql(u8, ud.name, type_name)) return ud; + } + } + return null; + } + + /// Resolve a promoted field from an anonymous struct variant of a union. + /// Returns the field index within the anonymous struct. + fn resolveUnionPromotedField(self: *Compiler, type_name: []const u8, field: []const u8) ?u16 { + const ud = self.findUnionDecl(type_name) orelse return null; + for (ud.field_types) |ft| { + if (ft.data == .struct_decl) { + const sd = ft.data.struct_decl; + for (sd.field_names, 0..) |fname, idx| { + if (std.mem.eql(u8, fname, field)) return @intCast(idx); + } + } + } + return null; + } + + /// Get the local's type name if the object is an identifier. + fn getLocalTypeName(self: *Compiler, object: *Node) ?[]const u8 { + if (object.data != .identifier) return null; + const slot = self.resolveLocal(object.data.identifier.name) orelse return null; + return self.locals.items[slot].type_name; + } + + /// Map a union variant's type node to a UnionFieldType for get_union_field. + fn nodeToUnionFieldType(_: *Compiler, type_node: *Node) UnionFieldType { + if (type_node.data == .type_expr) { + const name = type_node.data.type_expr.name; + if (std.mem.eql(u8, name, "string")) return .string; + if (std.mem.eql(u8, name, "s64") or std.mem.eql(u8, name, "s32") or + std.mem.eql(u8, name, "u64") or std.mem.eql(u8, name, "u32") or + std.mem.eql(u8, name, "s16") or std.mem.eql(u8, name, "u16") or + std.mem.eql(u8, name, "s8") or std.mem.eql(u8, name, "u8")) return .int; + if (std.mem.eql(u8, name, "f64") or std.mem.eql(u8, name, "f32")) return .float; + if (std.mem.eql(u8, name, "bool")) return .bool_k; + } + // Default to pointer for unknown/complex types + return .pointer; + } + fn resolveLocal(self: *Compiler, name: []const u8) ?u16 { var i = self.locals.items.len; while (i > 0) { @@ -534,13 +655,32 @@ pub const Compiler = struct { self.scope_depth -= 1; }, .var_decl => |vd| { + // Extract type name from annotation + const type_name: ?[]const u8 = if (vd.type_annotation) |ta| + (if (ta.data == .type_expr) ta.data.type_expr.name else null) + else + null; + if (vd.value) |val| { - try self.compileNode(val); + if (val.data == .undef_literal) { + // Undefined init — check if type is a union and emit make_union + if (type_name) |tname| { + if (self.findUnionWordCount(tname)) |wc| { + try self.emit(.{ .make_union = .{ .type_name = tname, .word_count = wc } }); + } else { + try self.emit(.push_void); + } + } else { + try self.emit(.push_void); + } + } else { + try self.compileNode(val); + } } else { try self.emit(.push_void); } const slot: u16 = @intCast(self.locals.items.len); - try self.locals.append(self.allocator, .{ .name = vd.name, .depth = self.scope_depth }); + try self.locals.append(self.allocator, .{ .name = vd.name, .depth = self.scope_depth, .type_name = type_name }); try self.emit(.{ .set_local = slot }); }, .const_decl => |cd| { @@ -725,8 +865,20 @@ pub const Compiler = struct { try self.emit(.{ .get_field = field_idx }); return; } - // Fallback: use field name for well-known string fields - // (sema may not have type info for nodes in imported function bodies) + // Check for union variant access (e.g. rs.s where rs is CString union) + if (self.getLocalTypeName(fa.object)) |tname| { + if (self.findUnionDecl(tname)) |ud| { + for (ud.field_names, ud.field_types) |fname, ftype| { + if (std.mem.eql(u8, fname, fa.field)) { + const uft = self.nodeToUnionFieldType(ftype); + try self.emit(.{ .get_union_field = .{ .word_offset = 0, .field_type = uft } }); + return; + } + } + } + } + // Fallback for untyped field access (e.g. imported function bodies + // without sema info): assume fat pointer layout {ptr=0, len=1} if (std.mem.eql(u8, fa.field, "len")) { try self.emit(.{ .get_field = 1 }); } else { @@ -953,6 +1105,9 @@ pub const VM = struct { if (arr == .array_val) { if (idx >= arr.array_val.elements.len) return error.IndexOutOfBounds; try self.push(.{ .pointer_val = .{ .target = arr.array_val.elements.ptr + idx } }); + } else if (arr == .string_val) { + if (idx > arr.string_val.len) return error.IndexOutOfBounds; + try self.push(.{ .byte_ptr_val = .{ .data = @constCast(arr.string_val), .offset = idx } }); } else { return error.TypeError; } @@ -1088,11 +1243,17 @@ pub const VM = struct { try self.push(.{ .void_val = {} }); } } else if (obj == .string_val) { - // String slice: field 0 = ptr (return string itself), field 1 = len + // String slice: field 0 = ptr (byte-level pointer), field 1 = len if (idx == 1) { try self.push(.{ .int_val = @intCast(obj.string_val.len) }); } else { - try self.push(obj); // ptr → return string itself + try self.push(.{ .byte_ptr_val = .{ .data = @constCast(obj.string_val), .offset = 0 } }); + } + } else if (obj == .union_val) { + if (idx < obj.union_val.words.len) { + try self.push(obj.union_val.words[idx]); + } else { + try self.push(.{ .void_val = {} }); } } else { return error.TypeError; @@ -1109,6 +1270,11 @@ pub const VM = struct { if (idx < sv.fields.len) { sv.fields[idx] = val; } + } else if (target[0] == .union_val) { + const uv = target[0].union_val; + if (idx < uv.words.len) { + uv.words[idx] = val; + } } try self.push(raw_obj); // push pointer back } else if (raw_obj == .struct_val) { @@ -1116,6 +1282,11 @@ pub const VM = struct { raw_obj.struct_val.fields[idx] = val; } try self.push(raw_obj); + } else if (raw_obj == .union_val) { + if (idx < raw_obj.union_val.words.len) { + raw_obj.union_val.words[idx] = val; + } + try self.push(raw_obj); } else { return error.TypeError; } @@ -1201,6 +1372,49 @@ pub const VM = struct { try self.push(.{ .string_val = s }); }, + // Unions + .make_union => |um| { + const words = try self.allocator.alloc(Value, um.word_count); + @memset(words, .{ .void_val = {} }); + try self.push(.{ .union_val = .{ .type_name = um.type_name, .words = words } }); + }, + .get_union_field => |uf| { + const raw_obj = try self.pop(); + const obj = if (raw_obj == .pointer_val) raw_obj.pointer_val.target[0] else raw_obj; + if (obj == .union_val) { + const words = obj.union_val.words; + switch (uf.field_type) { + .string => { + // Reconstruct string_val from words[0] (byte_ptr_val) + words[1] (int_val) + if (uf.word_offset + 1 < words.len or (uf.word_offset == 0 and words.len >= 2)) { + const ptr_word = words[uf.word_offset]; + const len_word = words[uf.word_offset + 1]; + if (ptr_word == .byte_ptr_val) { + const bp = ptr_word.byte_ptr_val; + const len: usize = if (len_word.asInt()) |v| @intCast(@max(0, v)) else 0; + const end = @min(bp.offset + len, bp.data.len); + try self.push(.{ .string_val = bp.data[bp.offset..end] }); + } else { + try self.push(.{ .string_val = "" }); + } + } else { + try self.push(.{ .string_val = "" }); + } + }, + else => { + // Single-word read + if (uf.word_offset < words.len) { + try self.push(words[uf.word_offset]); + } else { + try self.push(.{ .void_val = {} }); + } + }, + } + } else { + return error.TypeError; + } + }, + .cast => {}, } } @@ -1288,6 +1502,13 @@ pub const VM = struct { } return .{ .array_val = .{ .elements = new_elements } }; }, + .union_val => |uv| { + const new_words = try self.allocator.alloc(Value, uv.words.len); + for (uv.words, 0..) |w, i| { + new_words[i] = try self.cloneValue(w); + } + return .{ .union_val = .{ .type_name = uv.type_name, .words = new_words } }; + }, else => val, }; } @@ -1465,18 +1686,53 @@ pub const VM = struct { try self.push(.{ .int_val = 0 }); } }, - .alloc => { - // alloc(size) — allocate zeroed byte buffer, return as string + .malloc => { + // malloc(size) — allocate byte buffer, return as byte_ptr_val if (arg_count >= 1) { const val = try self.pop(); const size: usize = if (val.asInt()) |v| @intCast(@max(0, v)) else 0; const buf = try self.allocator.alloc(u8, size); @memset(buf, 0); - try self.push(.{ .string_val = buf }); + try self.push(.{ .byte_ptr_val = .{ .data = buf, .offset = 0 } }); } else { - try self.push(.{ .string_val = "" }); + try self.push(.{ .byte_ptr_val = .{ .data = &.{}, .offset = 0 } }); } }, + .free => { + // free(ptr) — no-op at comptime (arena cleanup) + if (arg_count >= 1) _ = try self.pop(); + try self.push(.{ .void_val = {} }); + }, + .memcpy => { + // memcpy(dst, src, len) — copy len bytes from src to dst + if (arg_count >= 3) { + const len_val = try self.pop(); + const src_val = try self.pop(); + const dst_val = try self.pop(); + const len: usize = if (len_val.asInt()) |v| @intCast(@max(0, v)) else 0; + if (dst_val == .byte_ptr_val and src_val == .byte_ptr_val) { + const dst = dst_val.byte_ptr_val; + const src = src_val.byte_ptr_val; + @memcpy(dst.data[dst.offset .. dst.offset + len], src.data[src.offset .. src.offset + len]); + } + } + try self.push(.{ .void_val = {} }); + }, + .memset => { + // memset(dst, val, len) — fill len bytes at dst with val + if (arg_count >= 3) { + const len_val = try self.pop(); + const val = try self.pop(); + const dst_val = try self.pop(); + const len: usize = if (len_val.asInt()) |v| @intCast(@max(0, v)) else 0; + const byte: u8 = if (val.asInt()) |v| @intCast(v & 0xFF) else 0; + if (dst_val == .byte_ptr_val) { + const dst = dst_val.byte_ptr_val; + @memset(dst.data[dst.offset .. dst.offset + len], byte); + } + } + try self.push(.{ .void_val = {} }); + }, } } diff --git a/src/lsp/server.zig b/src/lsp/server.zig index f35f204..c51892f 100644 --- a/src/lsp/server.zig +++ b/src/lsp/server.zig @@ -437,7 +437,10 @@ pub const Server = struct { .{ .label = "field_value", .detail = "(s: $T, idx: s32) -> Any" }, .{ .label = "size_of", .detail = "($T: Type) -> s32" }, .{ .label = "cast", .detail = "(Type) expr — prefix type cast" }, - .{ .label = "alloc", .detail = "(size: s32) -> string" }, + .{ .label = "malloc", .detail = "(size: s64) -> *void" }, + .{ .label = "free", .detail = "(ptr: *void) -> void" }, + .{ .label = "memcpy", .detail = "(dst: *void, src: *void, size: s64) -> *void" }, + .{ .label = "memset", .detail = "(dst: *void, val: s64, size: s64) -> void" }, .{ .label = "sqrt", .detail = "(x: $T) -> T" }, }; for (&keywords) |kw| { @@ -876,7 +879,10 @@ pub const Server = struct { .{ .name = "field_value", .label = "field_value(s: $T, idx: s32) -> Any", .params = &.{ "s: $T", "idx: s32" } }, .{ .name = "size_of", .label = "size_of($T: Type) -> s32", .params = &.{"$T: Type"} }, .{ .name = "cast", .label = "cast(Type) expr", .params = &.{"Type"} }, - .{ .name = "alloc", .label = "alloc(size: s32) -> string", .params = &.{"size: s32"} }, + .{ .name = "malloc", .label = "malloc(size: s64) -> *void", .params = &.{"size: s64"} }, + .{ .name = "free", .label = "free(ptr: *void) -> void", .params = &.{"ptr: *void"} }, + .{ .name = "memcpy", .label = "memcpy(dst: *void, src: *void, size: s64) -> *void", .params = &.{ "dst: *void", "src: *void", "size: s64" } }, + .{ .name = "memset", .label = "memset(dst: *void, val: s64, size: s64) -> void", .params = &.{ "dst: *void", "val: s64", "size: s64" } }, .{ .name = "sqrt", .label = "sqrt(x: $T) -> T", .params = &.{"x: $T"} }, .{ .name = "print", .label = "print(fmt: string, args: ..Any)", .params = &.{ "fmt: string", "args: ..Any" } }, .{ .name = "out", .label = "out(str: string) -> void", .params = &.{"str: string"} },