From: Orfeas Date: Tue, 17 Feb 2026 16:06:47 +0000 (+0200) Subject: add src/ and build.zig X-Git-Url: https://git.orfeas.xyz/?a=commitdiff_plain;h=42b18f428ca45bcc8edd66286e1417e734c388d9;p=zig-lisp.git add src/ and build.zig --- diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..2a92236 --- /dev/null +++ b/build.zig @@ -0,0 +1,134 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + // This creates a module, which represents a collection of source files alongside + // some compilation options, such as optimization mode and linked system libraries. + // Zig modules are the preferred way of making Zig code available to consumers. + // addModule defines a module that we intend to make available for importing + // to our consumers. We must give it a name because a Zig package can expose + // multiple modules and consumers will need to be able to specify which + // module they want to access. + const mod = b.addModule("zig_lisp", .{ + // The root source file is the "entry point" of this module. Users of + // this module will only be able to access public declarations contained + // in this file, which means that if you have declarations that you + // intend to expose to consumers that were defined in other files part + // of this module, you will have to make sure to re-export them from + // the root file. + .root_source_file = b.path("src/root.zig"), + // Later on we'll use this module as the root module of a test executable + // which requires us to specify a target. + .target = target, + }); + + // Here we define an executable. An executable needs to have a root module + // which needs to expose a `main` function. While we could add a main function + // to the module defined above, it's sometimes preferable to split business + // logic and the CLI into two separate modules. + // + // If your goal is to create a Zig library for others to use, consider if + // it might benefit from also exposing a CLI tool. A parser library for a + // data serialization format could also bundle a CLI syntax checker, for example. + // + // If instead your goal is to create an executable, consider if users might + // be interested in also being able to embed the core functionality of your + // program in their own executable in order to avoid the overhead involved in + // subprocessing your CLI tool. + // + // If neither case applies to you, feel free to delete the declaration you + // don't need and to put everything under a single module. + const exe = b.addExecutable(.{ + .name = "zig_lisp", + .root_module = b.createModule(.{ + // b.createModule defines a new module just like b.addModule but, + // unlike b.addModule, it does not expose the module to consumers of + // this package, which is why in this case we don't have to give it a name. + .root_source_file = b.path("src/main.zig"), + // Target and optimization levels must be explicitly wired in when + // defining an executable or library (in the root module), and you + // can also hardcode a specific target for an executable or library + // definition if desireable (e.g. firmware for embedded devices). + .target = target, + .optimize = optimize, + // List of modules available for import in source files part of the + // root module. + .imports = &.{ + // Here "zig_lisp" is the name you will use in your source code to + // import this module (e.g. `@import("zig_lisp")`). The name is + // repeated because you are allowed to rename your imports, which + // can be extremely useful in case of collisions (which can happen + // importing modules from different packages). + .{ .name = "zig_lisp", .module = mod }, + }, + }), + }); + + // This declares intent for the executable to be installed into the + // install prefix when running `zig build` (i.e. when executing the default + // step). By default the install prefix is `zig-out/` but can be overridden + // by passing `--prefix` or `-p`. + b.installArtifact(exe); + + // This creates a top level step. Top level steps have a name and can be + // invoked by name when running `zig build` (e.g. `zig build run`). + // This will evaluate the `run` step rather than the default step. + // For a top level step to actually do something, it must depend on other + // steps (e.g. a Run step, as we will see in a moment). + const run_step = b.step("run", "Run the app"); + + // This creates a RunArtifact step in the build graph. A RunArtifact step + // invokes an executable compiled by Zig. Steps will only be executed by the + // runner if invoked directly by the user (in the case of top level steps) + // or if another step depends on it, so it's up to you to define when and + // how this Run step will be executed. In our case we want to run it when + // the user runs `zig build run`, so we create a dependency link. + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + + // By making the run step depend on the default step, it will be run from the + // installation directory rather than directly from within the cache directory. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const tokenizer_tests = b.addTest(.{ + .root_module = b.createModule(.{ + .root_source_file = b.path("src/Tokenizer.zig"), + .target = target, + .optimize = optimize, + }), + }); + const run_tokenizer_tests = b.addRunArtifact(tokenizer_tests); + + const parser_tests = b.addTest(.{ + .root_module = b.createModule(.{ + .root_source_file = b.path("src/Parser.zig"), + .target = target, + .optimize = optimize, + }), + }); + const run_parser_tests = b.addRunArtifact(parser_tests); + + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&run_tokenizer_tests.step); + test_step.dependOn(&run_parser_tests.step); + + // Just like flags, top level steps are also listed in the `--help` menu. + // + // The Zig build system is entirely implemented in userland, which means + // that it cannot hook into private compiler APIs. All compilation work + // orchestrated by the build system will result in other Zig compiler + // subcommands being invoked with the right flags defined. You can observe + // these invocations when one fails (or you pass a flag to increase + // verbosity) to validate assumptions and diagnose problems. + // + // Lastly, the Zig build system is relatively simple and self-contained, + // and reading its source code will allow you to master it. +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..932e3fb --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .zig_lisp, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0x8bcfdfe2f76b43c1, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.15.2", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/src/Parser.zig b/src/Parser.zig new file mode 100644 index 0000000..92add12 --- /dev/null +++ b/src/Parser.zig @@ -0,0 +1,364 @@ +const std = @import("std"); +const log = std.log; +const Allocator = std.mem.Allocator; +const Writer = std.Io.Writer; +const DoublyLinkedList = std.DoublyLinkedList; +const Node = DoublyLinkedList.Node; + +const Tokenizer = @import("Tokenizer.zig"); + +const Parser = @This(); +// expression ::= integer | float | string | keyword | symbol | vector | list +// integer ::= token.integer +// float ::= token.float +// string ::= token.string +// list ::= ( expression... ) + +// fn printParseError(tokenizer: *const Tokenizer, comptime message: []const u8) void { +// const line_ctx = tokenizer.tokenLineContext(token); +// const source_loc = tokenizer.tokenSourceLocation(tokenizer.cache); + +// log.warn("input:{d}:{d}: " ++ message ++ "\n{s}\n", .{ +// source_loc.line, +// source_loc.column, +// line_ctx, +// }); +// } + +const ParseError = error{ + EndOfFile, + InvalidToken, + UnmatchedParenthesis, + IncompleteExpression, + OutOfMemory, +}; + +pub const Expression = struct { + const ExpressionTag = enum { + nil, + t, + integer, + float, + string, + keyword, + symbol, + list, + funcall, + quoted, + }; + + const Tag = union(ExpressionTag) { + nil, + t, + integer: i64, + float: f64, + string: []const u8, + keyword: []const u8, + symbol: []const u8, + list: DoublyLinkedList, + funcall: DoublyLinkedList, + quoted: *Expression, + }; + + const QuoteTag = enum { + quote, + backquote, + }; + + tag: Tag, + quote: ?QuoteTag = null, + node: Node = .{ .prev = null, .next = null }, + + pub fn fromBool(b: bool) Expression { + return .{ .tag = if (b) .t else .nil }; + } + + pub fn toBool(self: Expression) bool { + return self.tag != .nil; + } + + fn cloneExpressionList( + source: DoublyLinkedList, + allocator: Allocator, + ) error{OutOfMemory}!DoublyLinkedList { + var dest = DoublyLinkedList{ .first = null, .last = null }; + errdefer deinitExpressionList(dest, allocator); + + var it = source.first; + while (it) |node| : (it = node.next) { + const elem: *Expression = @fieldParentPtr("node", node); + const copy = try allocator.create(Expression); + defer allocator.destroy(copy); + copy.* = try elem.*.clone(allocator); + dest.append(©.node); + } + + return dest; + } + + fn cloneQuoted( + quoted: *Expression, + allocator: Allocator, + ) error{OutOfMemory}!*Expression { + const result = try allocator.create(Expression); + errdefer allocator.destroy(result); + result.* = try quoted.*.clone(allocator); + return result; + } + + pub fn clone(self: Expression, allocator: Allocator) error{OutOfMemory}!Expression { + const result = Expression{ + .quote = self.quote, + .tag = switch (self.tag) { + .nil, .t, .integer, .float => self.tag, + + .string => |string| .{ + .string = try allocator.dupe(u8, string), + }, + + .keyword => |keyword| .{ + .keyword = try allocator.dupe(u8, keyword), + }, + + .symbol => |symbol| .{ + .symbol = try allocator.dupe(u8, symbol), + }, + + .list => |list| .{ + .list = try cloneExpressionList(list, allocator), + }, + + .funcall => |funcall| .{ + .funcall = try cloneExpressionList(funcall, allocator), + }, + .quoted => |quoted| .{ + .quoted = try cloneQuoted(quoted, allocator), + }, + }, + }; + + return result; + } + + pub fn parse( + tokenizer: *Tokenizer, + alloc: Allocator, + quote: ?QuoteTag, + ) ParseError!Expression { + var result = Expression{ + .tag = undefined, + }; + + const token = tokenizer.peek(); + const token_value = tokenizer.tokenValue(token); + switch (token.tag) { + .eof => return error.EndOfFile, + + .invalid => { + _ = tokenizer.next(); + log.warn("Invalid token {s}", .{token_value}); + return error.InvalidToken; + }, + + .quote => { + _ = tokenizer.next(); + const expr = Expression.parse( + tokenizer, + alloc, + quote orelse .quote, + ) catch |err| switch (err) { + error.EndOfFile => { + log.warn("Unexpected end of file after '", .{}); + return error.IncompleteExpression; + }, + else => return err, + }; + const quoted = try alloc.create(Expression); + quoted.* = expr; + result = .{ + .tag = .{ .quoted = quoted }, + .quote = .quote, + }; + }, + + .backquote => { + _ = tokenizer.next(); + const expr = Expression.parse( + tokenizer, + alloc, + quote orelse .backquote, + ) catch |err| switch (err) { + error.EndOfFile => { + log.warn("Unexpected end of file after `", .{}); + return error.IncompleteExpression; + }, + else => return err, + }; + const quoted = try alloc.create(Expression); + quoted.* = expr; + result = .{ + .tag = .{ .quoted = quoted }, + .quote = .backquote, + }; + }, + + .comma => { + _ = tokenizer.next(); + const next_quote: ?QuoteTag = if (quote) |q| switch (q) { + .backquote => null, + .quote => .quote, + } else null; + const expr = Expression.parse( + tokenizer, + alloc, + next_quote, + ) catch |err| switch (err) { + error.EndOfFile => { + log.warn("Unexpected end of file after ,", .{}); + return error.IncompleteExpression; + }, + else => return err, + }; + result = .{ + .tag = expr.tag, + .quote = next_quote, + }; + }, + + .integer => { + _ = tokenizer.next(); + const value = std.fmt.parseInt(i64, token_value, 10) catch |err| switch (err) { + error.Overflow => { + log.debug("Input number `{s}` is too large", .{token_value}); + return error.InvalidToken; + }, + // tokenizer should have handled this case + error.InvalidCharacter => unreachable, + }; + result.tag = .{ .integer = value }; + }, + + .float => { + _ = tokenizer.next(); + const value = std.fmt.parseFloat(f64, token_value) catch |err| switch (err) { + // tokenizer should have handled this case + error.InvalidCharacter => unreachable, + }; + result.tag = .{ .float = value }; + }, + + .string => { + _ = tokenizer.next(); + const value = token_value[1 .. token_value.len - 1]; + result.tag = .{ .string = value }; + }, + + .keyword => { + _ = tokenizer.next(); + result.tag = .{ .keyword = token_value }; + }, + + .identifier => { + _ = tokenizer.next(); + const value = token_value; + result.tag = if (std.mem.eql(u8, value, "nil")) + .nil + else if (std.mem.eql(u8, value, "t")) + .t + else + .{ .symbol = value }; + }, + + .l_paren => { + _ = tokenizer.next(); + var list: std.DoublyLinkedList = .{ .first = null, .last = null }; + while (true) { + const lookahead = tokenizer.peek(); + if (lookahead.tag == .r_paren) { + _ = tokenizer.next(); + result.tag = + if (list.first == null) + .nil + else if (quote) |_| + .{ .list = list } + else + .{ .funcall = list }; + break; + } + if (Expression.parse(tokenizer, alloc, quote)) |expr| { + const elem = try alloc.create(Expression); + elem.* = expr; + list.append(&elem.node); + } else |err| switch (err) { + error.EndOfFile => return error.UnmatchedParenthesis, + else => return err, + } + } + }, + + .r_paren => return error.UnmatchedParenthesis, + + else => { + _ = tokenizer.next(); + log.debug("> Unimplemented", .{}); + return error.InvalidToken; + }, + } + + log.debug("=> EXPRESSION({s}: {f})", .{ @tagName(result.tag), result }); + return result; + } + + pub fn format(self: Expression, writer: *Writer) Writer.Error!void { + if (self.quote) |q| { + const c: u8 = switch (q) { + .quote => '\'', + .backquote => '`', + }; + try writer.print("{c}", .{c}); + } + + switch (self.tag) { + .nil => try writer.print("nil", .{}), + .t => try writer.print("t", .{}), + .integer => |value| try writer.print("{}", .{value}), + .float => |value| try writer.print("{}", .{value}), + .string => |value| try writer.print("\"{s}\"", .{value}), + .keyword, .symbol => |value| try writer.print("{s}", .{value}), + .list, .funcall => |list| { + try writer.print("(", .{}); + var it = list.first; + while (it) |node| : (it = node.next) { + const elem: *Expression = @fieldParentPtr("node", node); + try writer.print("{f}", .{elem.*}); + if (node.next != null) { + try writer.print(" ", .{}); + } + } + try writer.print(")", .{}); + }, + .quoted => |value| try writer.print("{f}", .{value.*}), + } + } + + fn deinitExpressionList(list: DoublyLinkedList, allocator: Allocator) void { + var it = list.first; + while (it) |node| : (it = node.next) { + const elem: *Expression = @fieldParentPtr("node", node); + elem.*.deinit(allocator); + allocator.destroy(elem); + } + } + + pub fn deinit(self: Expression, allocator: Allocator) void { + switch (self.tag) { + .nil, .t, .integer, .float => {}, + .string, .keyword, .symbol => |string| allocator.free(string), + .list, .funcall => |list| deinitExpressionList(list, allocator), + .quoted => |quoted| { + quoted.*.deinit(allocator); + allocator.destroy(quoted); + }, + } + } +}; diff --git a/src/Tokenizer.zig b/src/Tokenizer.zig new file mode 100644 index 0000000..6a5d940 --- /dev/null +++ b/src/Tokenizer.zig @@ -0,0 +1,520 @@ +const std = @import("std"); +const Reader = std.Io.Reader; +const Allocator = std.mem.Allocator; +const log = std.log; +const Tokenizer = @This(); + +const Loc = struct { + begin: usize, + end: usize, +}; + +const Token = struct { + const Tag = enum { + invalid, + eof, + comment, + identifier, + keyword, + string, + integer, + float, + l_paren, + r_paren, + quote, + comma, + period, + backquote, + l_bracket, + r_bracket, + }; + + tag: Tag, + loc: Loc, +}; + +const State = enum { + begin, + invalid, + semi_colon, + comment, + string, + maybe_integer, + maybe_float, + identifier, + period, + keyword, +}; + +line: usize, +state: State, +index: usize, +buffer: [:0]const u8, +cache: ?Token, + +pub fn init(buffer: [:0]const u8) @This() { + return .{ + .line = 1, + .cache = null, + .state = .begin, + .index = 0, + .buffer = buffer, + }; +} + +pub fn tokenValue(self: Tokenizer, token: Token) []const u8 { + return self.buffer[token.loc.begin..token.loc.end]; +} + +pub fn tokenLineContext(self: Tokenizer, token: Token) []const u8 { + const begin = std.mem.indexOfScalar( + u8, + self.buffer[0..token.begin], + '\n', + ) orelse self.buffer.len; + const end = std.mem.indexOfScalar( + u8, + self.buffer[token.end..], + '\n', + ) orelse self.buffer.len; + + return self.buffer[begin..end]; +} + +pub fn peek(self: *Tokenizer) Token { + if (self.cache) |token| { + return token; + } + self.cache = self.next(); + return self.cache.?; +} + +pub fn next(self: *Tokenizer) Token { + if (self.cache) |token| { + self.cache = null; + return token; + } + var result = Token{ + .tag = undefined, + .loc = .{ + .begin = self.index, + .end = undefined, + }, + }; + + state: switch (State.begin) { + .begin => switch (self.buffer[self.index]) { + 0 => { + if (self.index == self.buffer.len) { + result.tag = .eof; + } else { + continue :state .invalid; + } + }, + + '\n' => { + self.line += 1; + self.index += 1; + result.loc.begin = self.index; + continue :state .begin; + }, + + ' ', '\t', '\r' => { + self.index += 1; + result.loc.begin = self.index; + continue :state .begin; + }, + + ';' => { + result.loc.begin = self.index; + continue :state .semi_colon; + }, + + '"' => { + result.tag = .string; + continue :state .string; + }, + + '(' => { + self.index += 1; + result.tag = .l_paren; + }, + + ')' => { + self.index += 1; + result.tag = .r_paren; + }, + + '[' => { + self.index += 1; + result.tag = .l_bracket; + }, + + ']' => { + self.index += 1; + result.tag = .r_bracket; + }, + + '\'' => { + self.index += 1; + result.tag = .quote; + }, + + ',' => { + self.index += 1; + result.tag = .comma; + }, + + '`' => { + self.index += 1; + result.tag = .backquote; + }, + + ':' => { + result.tag = .keyword; + continue :state .keyword; + }, + + '.' => { + result.tag = .period; + continue :state .period; + }, + + '-', '0'...'9' => { + result.tag = .integer; + continue :state .maybe_integer; + }, + + '|', + '~', + '/', + '!', + '@', + '$', + '%', + '^', + '&', + '*', + '_', + '+', + '=', + '<', + '>', + 'a'...'z', + 'A'...'Z', + => { + result.tag = .identifier; + continue :state .identifier; + }, + + else => { + log.debug("Found invalid character: '{c}'", .{self.buffer[self.index]}); + continue :state .invalid; + }, + }, + + .semi_colon => { + self.index += 1; + switch (self.buffer[self.index]) { + ';' => { + result.tag = .comment; + continue :state .comment; + }, + else => continue :state .invalid, + } + }, + + .comment => { + self.index += 1; + switch (self.buffer[self.index]) { + 0, '\n' => continue :state .begin, + else => continue :state .comment, + } + }, + + .period => { + self.index += 1; + switch (self.buffer[self.index]) { + '0'...'9' => { + result.tag = .float; + continue :state .maybe_float; + }, + '|', + '~', + '/', + '!', + '@', + '$', + '%', + '^', + '&', + '*', + '.', + '-', + '_', + '+', + '=', + '<', + '>', + ':', + 'a'...'z', + 'A'...'Z', + => { + result.tag = .identifier; + continue :state .identifier; + }, + else => {}, + } + }, + + .string => { + self.index += 1; + switch (self.buffer[self.index]) { + 0 => { + if (self.index != self.buffer.len) { + continue :state .invalid; + } else { + result.tag = .invalid; + } + }, + '"' => self.index += 1, + 0x01...0x09, 0x0b...0x1f, 0x7f => { + continue :state .invalid; + }, + else => continue :state .string, + } + }, + + .keyword => { + self.index += 1; + switch (self.buffer[self.index]) { + '|', + '~', + '/', + '!', + '@', + '$', + '%', + '^', + '&', + '*', + '.', + '-', + '_', + '+', + '=', + '<', + '>', + '?', + ':', + 'a'...'z', + 'A'...'Z', + '0'...'9', + => continue :state .keyword, + else => {}, + } + }, + + .maybe_float => { + self.index += 1; + switch (self.buffer[self.index]) { + '0'...'9' => { + continue :state .maybe_float; + }, + '|', + '~', + '/', + '!', + '@', + '$', + '%', + '^', + '&', + '*', + '.', + '-', + '_', + '+', + '=', + '<', + '>', + ':', + 'a'...'z', + 'A'...'Z', + => { + result.tag = .identifier; + continue :state .identifier; + }, + else => {}, + } + }, + + .maybe_integer => { + self.index += 1; + switch (self.buffer[self.index]) { + '.' => { + result.tag = .float; + continue :state .maybe_float; + }, + '0'...'9' => continue :state .maybe_integer, + '|', + '~', + '/', + '!', + '@', + '$', + '%', + '^', + '&', + '*', + '-', + '_', + '+', + '=', + '<', + '>', + '?', + ':', + 'a'...'z', + 'A'...'Z', + => { + result.tag = .identifier; + continue :state .identifier; + }, + else => {}, + } + }, + + .identifier => { + self.index += 1; + switch (self.buffer[self.index]) { + '|', + '~', + '/', + '!', + '@', + '$', + '%', + '^', + '&', + '*', + '.', + '-', + '_', + '+', + '=', + '<', + '>', + '?', + ':', + 'a'...'z', + 'A'...'Z', + '0'...'9', + => continue :state .identifier, + else => {}, + } + }, + + .invalid => { + self.index += 1; + switch (self.buffer[self.index]) { + 0 => if (self.index == self.buffer.len) { + result.tag = .invalid; + } else { + continue :state .invalid; + }, + '\n' => result.tag = .invalid, + else => continue :state .invalid, + } + }, + } + + result.loc.end = self.index; + log.debug("TOKEN({s} : '{s}')", .{ @tagName(result.tag), self.tokenValue(result) }); + return result; +} + +fn testTokenizer(input: [:0]const u8, expected_token_tags: []const Token.Tag) !void { + var tokenizer = Tokenizer.init(input); + for (expected_token_tags) |expected_tag| { + const token = tokenizer.next(); + try std.testing.expectEqual(expected_tag, token.tag); + } + const last_token = tokenizer.next(); + try std.testing.expectEqual(Token.Tag.eof, last_token.tag); +} + +test "strings" { + try testTokenizer("\"mystring\"", &.{.string}); + try testTokenizer("\"string with spaces\"", &.{.string}); + try testTokenizer("\"multi\nline\nstring\"", &.{.string}); +} + +test "integer" { + try testTokenizer("0", &.{.integer}); + try testTokenizer("1", &.{.integer}); + try testTokenizer("2", &.{.integer}); + try testTokenizer("3", &.{.integer}); + try testTokenizer("4", &.{.integer}); + try testTokenizer("5", &.{.integer}); + try testTokenizer("6", &.{.integer}); + try testTokenizer("7", &.{.integer}); + try testTokenizer("8", &.{.integer}); + try testTokenizer("9", &.{.integer}); + try testTokenizer("10", &.{.integer}); + try testTokenizer("9999999", &.{.integer}); + try testTokenizer("-1", &.{.integer}); + try testTokenizer("-0123", &.{.integer}); +} + +test "float" { + try testTokenizer("0.1", &.{.float}); + try testTokenizer("10.", &.{.float}); + try testTokenizer(".17", &.{.float}); + try testTokenizer("-.42", &.{.float}); +} + +test "identifier" { + try testTokenizer("simple", &.{.identifier}); + try testTokenizer("/c0m*!1c@t3d_$%^&*.-+=<>?:azAZ09", &.{.identifier}); + try testTokenizer("..", &.{.identifier}); + try testTokenizer("1..", &.{.identifier}); + try testTokenizer("-1a", &.{.identifier}); + try testTokenizer("12/34", &.{.identifier}); + try testTokenizer("-.42-", &.{.identifier}); +} + +test "keywords" { + try testTokenizer(":simple", &.{.keyword}); + try testTokenizer(":/c0m*!1c@t3d_$%^&*.-+=<>?:azAZ09", &.{.keyword}); + try testTokenizer(":..", &.{.keyword}); + try testTokenizer(":1..", &.{.keyword}); + try testTokenizer(":-1a", &.{.keyword}); + try testTokenizer(":12/34", &.{.keyword}); + try testTokenizer(":-.42-", &.{.keyword}); +} + +test "singles" { + try testTokenizer("()[]", &.{ .l_paren, .r_paren, .l_bracket, .r_bracket }); + try testTokenizer("'`,", &.{ .quote, .backquote, .comma }); + try testTokenizer(".", &.{.period}); +} + +test "simple list" { + const input = + \\(1 "1" :1) + ; + try testTokenizer(input, &.{ + .l_paren, + .integer, + .string, + .keyword, + .r_paren, + }); +} + +test "invalid" { + try testTokenizer("#", &.{.invalid}); + try testTokenizer("?", &.{.invalid}); + try testTokenizer("\"", &.{.invalid}); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..4698b88 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,80 @@ +const std = @import("std"); +const zig_lisp = @import("zig_lisp"); +const Tokenizer = zig_lisp.Tokenizer; +const Expression = zig_lisp.Parser.Expression; +const builtins = zig_lisp.builtins; +const Environment = zig_lisp.Environment; +const log = std.log; +const Reader = std.Io.Reader; +const Writer = std.Io.Writer; +const Allocator = std.mem.Allocator; + +pub const std_options = std.Options{ + .log_level = .debug, + .logFn = logFn, +}; + +pub fn logFn( + comptime level: log.Level, + comptime _: @Type(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + const prefix = "[" ++ comptime level.asText() ++ "] "; + std.debug.lockStdErr(); + defer std.debug.unlockStdErr(); + var stderr_writer = std.fs.File.stderr().writer(&.{}); + const stderr = &stderr_writer.interface; + stderr.print(prefix ++ format ++ "\n", args) catch return; +} + +fn repl( + stdin: *Reader, + stdout: *Writer, + stderr: *Writer, + allocator: Allocator, +) !void { + var env: Environment = .init(stdin, stdout, stderr, allocator); + const prompt = "(zig-lisp) > "; + _ = try env.stdout.write(prompt); + try env.stdout.flush(); + + while (env.stdin.takeDelimiterInclusive('\n')) |buf| : ({ + _ = try env.stdout.write(prompt); + try env.stdout.flush(); + }) { + const input: [:0]u8 = try allocator.dupeZ(u8, buf); + + var tokenizer = Tokenizer.init(input); + const expr = Expression.parse(&tokenizer, env.allocator, null) catch |err| { + switch (err) { + error.EndOfFile => continue, + else => return err, + } + }; + + const result = try builtins.eval(&env, expr); + try env.stdout.print("{f}\n", .{result}); + } else |err| switch (err) { + error.EndOfStream => return, + else => return err, + } +} + +pub fn main() !void { + var debug_allocator = std.heap.DebugAllocator(.{}).init; + const alloc = debug_allocator.allocator(); + + var stdin_buf: [1024]u8 = undefined; + var stdin_reader = std.fs.File.stdin().readerStreaming(&stdin_buf); + const stdin = &stdin_reader.interface; + + var stdout_buf: [1024]u8 = undefined; + var stdout_writer = std.fs.File.stdout().writer(&stdout_buf); + const stdout = &stdout_writer.interface; + + var stderr_writer = std.fs.File.stderr().writer(&.{}); + const stderr = &stderr_writer.interface; + + try repl(stdin, stdout, stderr, alloc); +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..968d5a5 --- /dev/null +++ b/src/root.zig @@ -0,0 +1,6 @@ +//! By convention, root.zig is the root source file when making a library. +const std = @import("std"); +pub const Tokenizer = @import("Tokenizer.zig"); +pub const Parser = @import("Parser.zig"); +pub const Environment = @import("vm/Environment.zig"); +pub const builtins = @import("vm/builtins.zig"); diff --git a/src/vm/Environment.zig b/src/vm/Environment.zig new file mode 100644 index 0000000..ca7e94b --- /dev/null +++ b/src/vm/Environment.zig @@ -0,0 +1,45 @@ +const Environment = @This(); +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Expression = @import("../Parser.zig").Expression; +const Reader = std.Io.Reader; +const Writer = std.Io.Writer; + +stdin: *Reader, +stdout: *Writer, +stderr: *Writer, +allocator: Allocator, +binds: std.StringHashMap(Expression), + +pub fn init( + stdin: *Reader, + stdout: *Writer, + stderr: *Writer, + allocator: Allocator, +) Environment { + return .{ + .stdin = stdin, + .stdout = stdout, + .stderr = stderr, + .allocator = allocator, + .binds = .init(allocator), + }; +} + +pub fn bind(self: *Environment, symbol: []const u8, value: Expression) !void { + // We manage the keys from now on + const key = try self.allocator.dupe(u8, symbol); + try self.binds.put(key, value); +} + +pub fn get(self: Environment, symbol: []const u8) !Expression { + return self.binds.get(symbol) orelse error.VoidVariable; +} + +pub fn deinit(self: *Environment) void { + var it = self.keyIterator(); + while (it.next()) |key| { + self.allocator.free(key); + } + self.binds.deinit(); +} diff --git a/src/vm/builtins.zig b/src/vm/builtins.zig new file mode 100644 index 0000000..d5b23b3 --- /dev/null +++ b/src/vm/builtins.zig @@ -0,0 +1,151 @@ +const std = @import("std"); +const DoublyLinkedList = std.DoublyLinkedList; +const log = std.log; + +const Expression = @import("../Parser.zig").Expression; +const Environment = @import("Environment.zig"); +const special_forms = @import("special_forms.zig"); + +const builtins = @This(); +const Builtin = *const fn (*Environment, DoublyLinkedList) anyerror!Expression; + +const table = std.StaticStringMap(Builtin).initComptime(.{ + .{ "symbolp", symbolp }, + .{ "boundp", boundp }, + .{ "+", @"+" }, + .{ "*", @"*" }, + .{ "<", @"<" }, + // .{ "<=", @"<=" }, + // .{ "-", @"-" }, + // .{ "/", @"/" }, + // .{ "=", @"=" }, + // .{ ">", @">" }, + // .{ ">=", @">=" }, +}); + +fn objFromBool(b: bool) Expression { + return .{ .tag = if (b) .nil else .t }; +} + +pub fn isBuiltin(symbol: []const u8) ?Builtin { + return table.get(symbol); +} + +pub fn symbolp(_: *Environment, args: DoublyLinkedList) !Expression { + if (args.len() != 1) { + return error.WrongNumberOfArguments; + } + const expr: *Expression = @fieldParentPtr("node", args.first.?); + return objFromBool(symbolpImpl(expr.*)); +} + +pub inline fn symbolpImpl(expr: Expression) bool { + return expr.tag == .symbol; +} + +pub fn boundp(env: *Environment, args: DoublyLinkedList) !Expression { + if (args.len() != 1) { + return error.WrongNumberOfArguments; + } + const arg: *Expression = @fieldParentPtr("node", args.first.?); + if (!symbolpImpl(arg.*)) { + return error.WrongTypeArgument; + } + return objFromBool(boundpImpl(env, arg.*.tag.symbol)); +} + +pub inline fn boundpImpl(env: *const Environment, symbol: []const u8) bool { + _ = env.get(symbol) catch return false; + return true; +} + +pub fn @"+"(env: *Environment, args: DoublyLinkedList) !Expression { + var sum: i64 = 0; + var it = args.first; + while (it) |node| : (it = node.next) { + const expr: *Expression = @fieldParentPtr("node", node); + const result = try builtins.eval(env, expr.*); + if (result.tag != .integer) { + return error.WrongTypeArgument; + } + sum += result.tag.integer; + } + return .{ .tag = .{ .integer = sum } }; +} + +pub fn @"*"(env: *Environment, args: DoublyLinkedList) !Expression { + var product: i64 = 1; + var it = args.first; + while (it) |node| : (it = node.next) { + const expr: *Expression = @fieldParentPtr("node", node); + const result = try builtins.eval(env, expr.*); + if (result.tag != .integer) { + return error.WrongTypeArgument; + } + product *= result.tag.integer; + } + return .{ .tag = .{ .integer = product } }; +} + +pub fn @"<"(env: *Environment, args: DoublyLinkedList) !Expression { + if (args.len() < 2) { + return error.WrongNumberOfArguments; + } + + const first: *Expression = @fieldParentPtr("node", args.first.?); + const first_result = try builtins.eval(env, first.*); + const second: *Expression = @fieldParentPtr("node", args.first.?.next.?); + const second_result = try builtins.eval(env, second.*); + if (first_result.tag != .integer or second_result.tag != .integer) { + return error.WrongTypeArgument; + } + + return Expression.fromBool(first_result.tag.integer < second_result.tag.integer); +} + +pub fn message(env: *Environment, list: DoublyLinkedList) anyerror!Expression { + _ = env; // autofix + _ = list; // autofix +} + +fn evalFuncall(env: *Environment, list: DoublyLinkedList) anyerror!Expression { + const form: Expression = @as(*Expression, @fieldParentPtr("node", list.first.?)).*; + const args = DoublyLinkedList{ .first = form.node.next, .last = list.last }; + + if (symbolpImpl(form)) { + if (special_forms.isSpecialForm(form.tag.symbol)) |special_form| { + return special_form(env, args); + } + + if (builtins.isBuiltin(form.tag.symbol)) |builtin| { + return builtin(env, args); + } + } + + const func = try eval(env, form); + _ = func; + return error.NotImplemented; +} + +fn evalList(env: *Environment, list: DoublyLinkedList) anyerror!Expression { + var out_list: DoublyLinkedList = .{ .first = null, .last = null }; + var it = list.first; + while (it) |node| : (it = node.next) { + const expr: *Expression = @fieldParentPtr("node", node); + var result = try eval(env, expr.*); + out_list.append(&result.node); + log.debug("{f}", .{result}); + } + return .{ .tag = .{ .list = out_list } }; +} + +pub fn eval(env: *Environment, form: Expression) anyerror!Expression { + const result: Expression = try switch (form.tag) { + .nil, .t, .integer, .float, .string, .keyword => form, + .quoted => |quoted| quoted.*, + .symbol => |symbol| env.get(symbol), + .funcall => |funcall| evalFuncall(env, funcall), + .list => |list| evalList(env, list), + }; + return try result.clone(env.allocator); +} diff --git a/src/vm/special_forms.zig b/src/vm/special_forms.zig new file mode 100644 index 0000000..bf0d61a --- /dev/null +++ b/src/vm/special_forms.zig @@ -0,0 +1,93 @@ +const std = @import("std"); +const DoublyLinkedList = std.DoublyLinkedList; +const Node = DoublyLinkedList.Node; +const log = std.log; + +const Expression = @import("../Parser.zig").Expression; +const builtins = @import("builtins.zig"); +const Environment = @import("Environment.zig"); + +const SpecialForm = *const fn (*Environment, DoublyLinkedList) anyerror!Expression; + +const table = std.StaticStringMap(SpecialForm).initComptime(.{ + .{ "setq", setq }, + .{ "if", @"if" }, + .{ "while", @"while" }, +}); + +pub fn isSpecialForm(symbol: []const u8) ?SpecialForm { + return table.get(symbol); +} + +inline fn isEven(T: type, num: T) bool { + return num & 1 == 0; +} + +inline fn isOdd(T: type, num: T) bool { + return !isEven(T, num); +} + +pub fn setq(env: *Environment, args: DoublyLinkedList) !Expression { + if (isOdd(usize, args.len())) { + return error.WrongNumberOfArguments; + } + + var it = args.first; + var last_result: Expression = .{ .tag = .nil }; + while (it) |node| : (it = node.next.?.next) { + const assignee: *Expression = @fieldParentPtr("node", node); + if (!builtins.symbolpImpl(assignee.*)) { + return error.WrongTypeArgument; + } + const expr: *Expression = @fieldParentPtr("node", node.next.?); + const value = try builtins.eval(env, expr.*); + last_result = value; + try env.bind(assignee.tag.symbol, value); + } + return last_result; +} + +pub fn @"if"(env: *Environment, args: DoublyLinkedList) !Expression { + if (args.len() < 2) { // Need at least a condition and a ‘then’ branch + return error.WrongNumberOfArguments; + } + + const condition: *Expression = @fieldParentPtr("node", args.first.?); + const condition_result = try builtins.eval(env, condition.*); + if (condition_result.toBool()) { + const then: *Expression = @fieldParentPtr("node", args.first.?.next.?); + return try builtins.eval(env, then.*); + } + + // else + var last_result = Expression{ .tag = .nil }; + var it = args.first; + while (it) |node| : (it = node.next) { + const else_expr: *Expression = @fieldParentPtr("node", node); + last_result = try builtins.eval(env, else_expr.*); + } + return last_result; +} + +pub fn @"while"(env: *Environment, args: DoublyLinkedList) !Expression { + if (args.len() < 2) { // Need at least a condition and one body expression + return error.WrongNumberOfArguments; + } + + const condition: *Expression = @fieldParentPtr("node", args.first.?); + while (true) { + const condition_result = try builtins.eval(env, condition.*); + if (!condition_result.toBool()) { + break; + } + + // body + var it = args.first; + while (it) |node| : (it = node.next) { + const else_expr: *Expression = @fieldParentPtr("node", node); + _ = try builtins.eval(env, else_expr.*); + } + } + + return .{ .tag = .nil }; +}