From 3b7bab1e1bf130a0c6973fa1a70cd69c2060da6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Fri, 5 Jun 2026 00:26:59 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20L=20Language=20v0.1=20=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E5=99=A8=E5=AE=8C=E6=95=B4=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5 阶段编译流水线: 词法分析 → 语法分析(Pratt) → 语义分析(类型推断) → LLVM IR → .exe 模块: - lexer: 手写状态机, 40 种 Token, // 和 /* */ 注释 - parser: Pratt 表达式解析(9 级优先级) + 递归下降语句/函数 - ast: 14 种节点类型 + 工厂函数 - sema: 作用域链符号表 + 类型推断 + 类型检查 - codegen: AST → LLVM-C API, print_i64/f64/bool 内建 - driver: 命令行 + 流水线串联 + 错误报告 - util: Arena bump allocator (8MB) 测试: 65 单元测试(词法41+语法15+语义9) + 5 集成测试 全部通过 语言特性: i64/f64/bool/void, let不可变变量, if/else, while, 递归函数 --- .gitignore | 37 + CHANGELOG.md | 19 + CLAUDE.md | 184 ++ CMakeLists.txt | 94 + CODE_OF_CONDUCT.md | 36 + CONTRIBUTING.md | 79 + LICENSE | 21 + README.md | 238 ++ SECURITY.md | 40 + docs/PRD.md | 405 +++ .../plans/2026-06-04-l-lang-v0.1.md | 2512 +++++++++++++++++ include/l_lang.h | 38 + src/ast/ast.c | 113 + src/ast/ast.h | 96 + src/codegen/codegen.c | 383 +++ src/codegen/codegen.h | 12 + src/driver/error.c | 49 + src/driver/error.h | 23 + src/driver/main.c | 169 ++ src/lexer/lexer.c | 127 + src/lexer/lexer.h | 13 + src/lexer/token.c | 49 + src/lexer/token.h | 45 + src/parser/parser.c | 325 +++ src/parser/parser.h | 13 + src/sema/sema.c | 264 ++ src/sema/sema.h | 12 + src/sema/symbol.c | 48 + src/sema/symbol.h | 40 + src/util/arena.c | 45 + src/util/arena.h | 17 + test/programs/01_arithmetic.l | 5 + test/programs/02_if_else.l | 9 + test/programs/03_recurse.l | 11 + test/programs/04_fib_recursive.l | 12 + test/programs/05_float.l | 14 + test/test_lexer.c | 53 + test/test_parser.c | 68 + test/test_sema.c | 59 + test/test_utils.h | 27 + 40 files changed, 5804 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 CLAUDE.md create mode 100644 CMakeLists.txt create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 SECURITY.md create mode 100644 docs/PRD.md create mode 100644 docs/superpowers/plans/2026-06-04-l-lang-v0.1.md create mode 100644 include/l_lang.h create mode 100644 src/ast/ast.c create mode 100644 src/ast/ast.h create mode 100644 src/codegen/codegen.c create mode 100644 src/codegen/codegen.h create mode 100644 src/driver/error.c create mode 100644 src/driver/error.h create mode 100644 src/driver/main.c create mode 100644 src/lexer/lexer.c create mode 100644 src/lexer/lexer.h create mode 100644 src/lexer/token.c create mode 100644 src/lexer/token.h create mode 100644 src/parser/parser.c create mode 100644 src/parser/parser.h create mode 100644 src/sema/sema.c create mode 100644 src/sema/sema.h create mode 100644 src/sema/symbol.c create mode 100644 src/sema/symbol.h create mode 100644 src/util/arena.c create mode 100644 src/util/arena.h create mode 100644 test/programs/01_arithmetic.l create mode 100644 test/programs/02_if_else.l create mode 100644 test/programs/03_recurse.l create mode 100644 test/programs/04_fib_recursive.l create mode 100644 test/programs/05_float.l create mode 100644 test/test_lexer.c create mode 100644 test/test_parser.c create mode 100644 test/test_sema.c create mode 100644 test/test_utils.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cefb27b --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +/bin/ +/build/ +/.idea/ +/.vscode/ +/.claude/ +/.trae/ +/.dist/ +*.iml + +# 代码图索引 +/.codegraph/ + +# 编译产物 +*.o +*.obj +*.exe +*.out +*.a +*.lib +*.dll +*.exp + +# LLVM IR +*.ll +*.bc + +# 临时文件 +*.tmp +*.swp +*~ +.DS_Store +Thumbs.db + +# 归档 +*.zip +*.tar.gz +*.7z diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..235a3d7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +## 0.1.0 (2026-06-05) + +### Added +- 词法分析器:手写状态机,40 种 Token 类型,支持 `//` 和 `/* */` 注释 +- 语法分析器:Pratt 表达式解析(9 级优先级)+ 递归下降语句/函数解析 +- AST:14 种节点类型,工厂函数模式创建 +- 语义分析器:作用域链符号表 + 类型推断 + 类型检查 +- LLVM IR 代码生成:全 AST 节点覆盖,内建 `print_i64` / `print_f64` / `print_bool` +- 驱动程序:命令行参数解析 + 编译流水线串联 + `--emit-ir` 调试模式 +- Arena bump allocator (8MB) +- 错误报告:ANSI 红色高亮,文件名:行号:列号 格式 +- 类型系统:`i64` / `f64` / `bool` / `void`,`let` 不可变变量,类型推断 +- 控制流:`if` / `else`,`while` 循环,`return` 语句 +- 函数:多参数、递归、可选返回类型标注 +- 65 个单元测试 (词法 41 + 语法 15 + 语义 9) +- 5 个集成测试 (算术、分支、递归、斐波那契、浮点) +- CMake 构建系统,静态库 + 可执行文件 + 测试分离 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..dfcb439 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,184 @@ +# CLAUDE.md + +## 项目概述 + +L Language v0.1 — 用 C17 实现的静态类型编译型编程语言,Rust 风格语法,LLVM 22.x 后端。经典 5 阶段流水线:词法 → 语法 → 语义 → IR → 可执行文件。 + +## 构建命令 + +```bash +# 配置(仅首次) +cd build +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM" + +# 编译 +mingw32-make -j4 + +# 编译单个目标 +mingw32-make l_lang +mingw32-make l_lang_lib +``` + +## 架构 + +``` +源文件(.l) → 词法分析 → 语法分析 → 语义分析 → IR 生成 → 可执行文件 + Token[] AstNode* 带类型AST LLVM Module .exe +``` + +``` +L Language/ +├── include/ +│ └── l_lang.h 公共头文件 (TypeKind 枚举, 向前声明) +├── src/ +│ ├── lexer/ +│ │ ├── token.h/c Token {kind, start, length, line, col} +│ │ └── lexer.h/c 手写状态机,40 种 Token 类型 +│ ├── parser/ +│ │ └── parser.h/c Pratt 表达式 (9 级优先级) + 递归下降语句 +│ ├── ast/ +│ │ └── ast.h/c 14 种节点 (PROGRAM..IDENT_EXPR) + 工厂函数 +│ ├── sema/ +│ │ ├── symbol.h/c 作用域链 (Scope* parent 链表) +│ │ └── sema.h/c 类型推断 + 类型检查 + 3 个内建函数注册 +│ ├── codegen/ +│ │ └── codegen.h/c AST → LLVM-C API → LLVMModuleRef +│ ├── driver/ +│ │ ├── main.c 入口 + 命令行解析 + 流水线串联 +│ │ └── error.h/c ErrorInfo / ErrorList 错误报告 +│ └── util/ +│ └── arena.h/c Bump allocator (8MB, 8 字节对齐) +├── test/ +│ ├── test_utils.h 断言宏 (ASSERT / TEST_RUN / test_summary) +│ ├── test_lexer.c 词法测试 (41 tests) +│ ├── test_parser.c 语法测试 (15 tests) +│ ├── test_sema.c 语义测试 (9 tests) +│ └── programs/ .l 集成测试 (5 个程序) +├── docs/ +│ ├── PRD.md 产品需求文档 +│ └── superpowers/plans/ 实现计划 +├── CMakeLists.txt l_lang_lib (静态库) + l_lang (exe) + 测试 +└── README.md +``` + +## 核心 API 参考 + +### 词法分析 + +```c +// lexer.h +Token* lex(Arena* a, const char* source, const char* filename, + size_t* count, ErrorInfo* error); +// 返回: Token 数组(分配在 arena),出错返回 NULL +// Token: {TokenKind kind, const char* start, int length, int line, int col} +``` + +### 语法分析 + +```c +// parser.h +AstNode* parse(Arena* a, const Token* tokens, size_t count, + const char* filename, ErrorInfo* error); +// 返回: AST_PROGRAM 节点,出错返回 NULL +// 支持: 所有语句 (let/if/while/return) + 表达式 (Pratt precedence climbing) +``` + +### 语义分析 + +```c +// sema.h +void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena); +// 副作用: AST 节点填充 type 字段, errors 收集类型错误 +// 内建: scope_insert_function(print_i64, print_f64, print_bool) +``` + +### 代码生成 + +```c +// codegen.h +LLVMModuleRef codegen_module(AstNode* ast, const char* module_name, + const char** error_msg); +// 返回: 已验证的 LLVM Module,出错返回 NULL +// 内建 print_* 函数生成对应的 printf 调用 +``` + +## 类型系统 + +| L 类型 | LLVM 类型 | C 常量创建 | +|--------|-----------|-----------| +| `i64` | `LLVMInt64Type()` | `LLVMConstInt(ty, val, true)` | +| `f64` | `LLVMDoubleType()` | `LLVMConstReal(ty, val)` | +| `bool` | `LLVMInt1Type()` | `LLVMConstInt(ty, val, false)` | +| `void` | `LLVMVoidType()` | — | + +类型推断规则: +- 字面量:`42` → `i64`, `3.14` → `f64`, `true` → `bool` +- `let x = expr` → 从 expr 推断 +- `let x: i64 = expr` → 显式标注优先 +- 算术运算:i64 + i64 → i64, i64 + f64 → f64 (提升) +- 比较运算:返回 `bool` + +## 运算符优先级 + +| 优先级 | 运算符 | +|--------|--------| +| 70 (最高) | `-` (一元负), `!` (一元非) | +| 60 | `*` `/` `%` | +| 50 | `+` `-` | +| 40 | `==` `!=` `<` `>` `<=` `>=` | +| 30 | `&&` | +| 20 | `\|\|` | +| 10 (最低) | — | + +## 错误处理 + +| 阶段 | 策略 | +|------|------| +| 词法分析 | 首个非法字符 → 立即终止,返回 ErrorInfo | +| 语法分析 | 首个语法错误 → 立即终止,返回 ErrorInfo | +| 语义分析 | 收集所有类型错误到 ErrorList → 批量输出 (ANSI 红色) | +| IR 生成 | LLVMVerifyModule → 返回 char* 错误消息 | +| 链接 | system() 返回值检查 → 打印 exit code | +| 分配失败 | arena_alloc 返回 NULL → 逐层检查 | + + +## 测试 + +```bash +# 单元测试 (每个 test_*.c 独立编译运行,各有自己的 main) +./l_lang_lexer_test.exe # 41 个断言 +./l_lang_test.exe # 15 个断言 +./l_lang_sema_test.exe # 9 个断言 + +# 集成测试 (编译 .l → 运行 .exe → 检查输出) +for f in ../test/programs/*.l; do + echo "=== $f ===" + ./l_lang.exe "$f" -o /tmp/out.exe && /tmp/out.exe +done +``` + +## 关键约束 + +- **C17 标准**:`-Wall -Wextra -g`,零编译警告 +- **Arena 分配**:Token、AST、符号表全部从 arena 分配,无 malloc/free 散落 +- **LLVM 路径**:`D:\settings\Language\LLVM`,C API 头文件手动补充(v22.1.7 预编译包缺少部分头文件) +- **链接器**:MinGW 环境用 **gcc** 链接(非 clang,避免 MSVC 依赖) +- **Windows**:仅支持 Windows 11 + MinGW-w64 +- **错误消息**:中文,格式 `文件名:行号:列号: 描述` + +## 已知限制 (v0.1) + +- `let` 变量不可变(无 `mut`),循环计数器无法修改 — 迭代算法需递归实现 +- 无字符串类型(`print_*` 是编译器内建,非语言特性) +- 无数组、结构体、枚举、泛型、trait +- 无模块系统(所有函数在单文件) +- 作用域未清理(同函数内变量名不可重用) +- `main` 返回值未被 OS 使用(需 CRT 包装) + +## 版本号升级清单 + +| 文件 | 字段 | +|------|------| +| `CMakeLists.txt` | `VERSION` 变量 | +| `README.md` | badges | +| `CHANGELOG.md` | 版本标题 | diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..53aacdd --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,94 @@ +cmake_minimum_required(VERSION 3.20) +project(l_lang C) + +set(CMAKE_C_STANDARD 17) +set(CMAKE_C_STANDARD_REQUIRED ON) + +# === LLVM 配置 === +set(LLVM_PREFIX "D:/settings/Language/LLVM" CACHE PATH "LLVM installation prefix") + +# 查找 LLVM(优先使用 CONFIG 模式,失败则手动配置) +find_package(LLVM 19 QUIET CONFIG + HINTS ${LLVM_PREFIX}/lib/cmake/llvm + ${LLVM_PREFIX}/cmake +) + +if(NOT LLVM_FOUND) + message(STATUS "LLVM CONFIG not found — using manual LLVM paths") + set(LLVM_FOUND TRUE) + set(LLVM_INCLUDE_DIRS "${LLVM_PREFIX}/include") + set(LLVM_LIBRARY_DIR "${LLVM_PREFIX}/lib") + set(LLVM_LIBRARIES "${LLVM_PREFIX}/lib/LLVM-C.lib") + # 标记使用手动模式 + set(LLVM_MANUAL_MODE ON) +else() + message(STATUS "LLVM found: ${LLVM_DIR}") + message(STATUS "LLVM includes: ${LLVM_INCLUDE_DIRS}") + message(STATUS "LLVM available libs: ${LLVM_AVAILABLE_LIBS}") + set(LLVM_MANUAL_MODE OFF) +endif() + +# === 编译器库(不含 main.c,供测试复用)=== +file(GLOB_RECURSE L_LANG_LIB_SOURCES "src/*.c") +list(REMOVE_ITEM L_LANG_LIB_SOURCES "${CMAKE_SOURCE_DIR}/src/driver/main.c") + +add_library(l_lang_lib STATIC ${L_LANG_LIB_SOURCES}) +target_include_directories(l_lang_lib PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util src/lexer src/ast src/parser src/sema src/codegen src/driver +) +target_compile_options(l_lang_lib PRIVATE -Wall -Wextra -g) + +# === 编译器可执行文件 === +add_executable(l_lang src/driver/main.c) +target_link_libraries(l_lang PRIVATE l_lang_lib) +target_include_directories(l_lang PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util src/lexer src/ast src/parser src/sema src/codegen src/driver +) + +# === 链接 LLVM(通过库)=== +if(LLVM_MANUAL_MODE) + target_link_libraries(l_lang_lib PUBLIC ${LLVM_LIBRARIES}) + message(STATUS "Linking LLVM manually: ${LLVM_LIBRARIES}") +else() + target_link_libraries(l_lang_lib PUBLIC LLVM) +endif() + +# === LLVM 定义 === +if(NOT LLVM_MANUAL_MODE) + target_compile_definitions(l_lang_lib PRIVATE ${LLVM_DEFINITIONS}) +endif() + +# === 测试可执行文件(每个测试文件独立编译,各有自己的 main)=== +# Parser 测试 +add_executable(l_lang_test test/test_parser.c) +target_link_libraries(l_lang_test PRIVATE l_lang_lib) +target_include_directories(l_lang_test PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util src/lexer src/ast src/parser src/sema src/codegen src/driver + test +) + +# Lexer 测试 +add_executable(l_lang_lexer_test test/test_lexer.c) +target_link_libraries(l_lang_lexer_test PRIVATE l_lang_lib) +target_include_directories(l_lang_lexer_test PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util src/lexer src/ast src/parser src/sema src/codegen src/driver + test +) + +# Sema 测试 +add_executable(l_lang_sema_test test/test_sema.c) +target_link_libraries(l_lang_sema_test PRIVATE l_lang_lib) +target_include_directories(l_lang_sema_test PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util src/lexer src/ast src/parser src/sema src/codegen src/driver + test +) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..14d1247 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,36 @@ +# 贡献者行为准则 + +## 我们的承诺 + +为了营造一个开放和友好的环境,我们作为贡献者和维护者承诺:无论年龄、体型、残障、种族、性别认同和表达、经验水平、国籍、个人外貌、宗教、性取向或身份,参与本项目不会受到骚扰。 + +## 我们的标准 + +有助于创造积极环境的行为包括: + +- 使用友好和包容的语言 +- 尊重不同的观点和经验 +- 优雅地接受建设性批评 +- 关注对社区最有利的事情 +- 对其他社区成员表示同理心 + +不可接受的行为包括: + +- 使用性暗示语言或图像以及不受欢迎的性关注 +- 侮辱/贬损性评论以及人身攻击或政治攻击 +- 公开或私下的骚扰 +- 未经明确许可发布他人的私人信息 + +## 我们的责任 + +项目维护者有责任澄清可接受行为的标准,并应对任何不可接受的行为采取适当和公平的纠正措施。 + +## 范围 + +本行为准则适用于项目空间和代表项目的公共空间。 + +## 执行 + +可通过 GitHub Issues 或直接联系维护者报告辱骂、骚扰或其他不可接受的行为。所有投诉将被审查和调查,并将产生被认为必要且适合情况的回应。 + +本项目改编自 [Contributor Covenant](https://www.contributor-covenant.org) 2.1 版。 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..40d8d1d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,79 @@ +# 贡献指南 + +## 本地开发环境 + +- **GCC** 14.x+ (MinGW-w64) +- **CMake** 3.20+ +- **LLVM** 22.x(需要 C API 库和头文件,路径见 CLAUDE.md) + +## 开发流程 + +1. Fork 本仓库 +2. `git clone <你的 fork>` +3. `git checkout -b feature/xxx` +4. 开发 + 测试 +5. `git commit`(遵循约定式提交格式) +6. `git push` +7. 提交 Pull Request + +## 运行测试 + +```bash +cd build + +# 单元测试 +./l_lang_lexer_test.exe # 词法分析 +./l_lang_test.exe # 语法分析 +./l_lang_sema_test.exe # 语义分析 + +# 集成测试 +for f in ../test/programs/*.l; do + ./l_lang.exe "$f" -o /tmp/out.exe && /tmp/out.exe +done +``` + +## 代码规范 + +### C 代码 + +- C17 标准,`-Wall -Wextra -g` 零警告 +- 注释用中文 +- 内存统一使用 Arena bump allocator,不在局部函数内 malloc/free +- 错误信息格式:`文件名:行号:列号: 描述` +- 函数聚焦(< 100 行),文件内聚(< 500 行) + +### 编译器架构约定 + +- 每个编译阶段独立模块,通过头文件声明公共接口 +- 新增 AST 节点需同步更新 `sema.c` 和 `codegen.c` +- 新增 Token 类型需同步更新 `lexer.c` 和 `NAMES[]` 数组 +- 新增内建函数需在 `sema.c`(注册签名)和 `codegen.c`(生成 IR)两处实现 + +## 提交格式 + +``` +<类型>: <描述> +``` + +类型:`feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `perf` + +## 项目结构 + +``` +src/ +├── lexer/ 词法分析器 +├── parser/ 语法分析器 +├── ast/ AST 定义 +├── sema/ 语义分析 +├── codegen/ LLVM IR 生成 +├── driver/ 主入口 + 错误报告 +└── util/ 内存池 +test/ 测试 +docs/ 文档 +``` + +## 开始贡献前 + +- 大改动建议先开 Issue 讨论 +- 新语言特性需要在 `test/programs/` 添加对应的集成测试 +- 不要引入编译警告 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fb01715 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 刘航宇 (LHY0125) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..352eff4 --- /dev/null +++ b/README.md @@ -0,0 +1,238 @@ +

+

L Language

+

用 C17 实现的静态类型编译型编程语言

+

+ +

+ version + C + LLVM + GCC + tests + license +

+ +--- + +## 简介 + +L Language 是一门学习型编译语言,手写词法分析、递归下降 + Pratt 解析、语义分析和 LLVM IR 代码生成,最终生成原生可执行文件。语法借鉴 Rust,类型系统支持类型推断。 + +```rust +fn fib(n: i64) -> i64 { + if n < 2 { return n; } + return fib(n - 1) + fib(n - 2); +} + +fn main() -> i64 { + print_i64(fib(10)); // 输出 55 + return 0; +} +``` + +## 架构 + +``` +源码(.l) → 词法分析(Token) → 语法分析(AST) → 语义分析(类型标注) → LLVM IR → 可执行文件 +``` + +```mermaid +graph TB + subgraph 前端["编译器前端"] + Lexer[词法分析器
手写状态机
40 种 Token] + Parser[语法分析器
递归下降 + Pratt
14 种 AST 节点] + Sema[语义分析器
作用域链 + 类型推断
类型检查 + 错误收集] + end + + subgraph 后端["编译器后端"] + Codegen[LLVM IR 生成
AST → LLVM-C API
内建 print 函数] + Link[链接器
clang/lld
生成 .exe] + end + + subgraph 运行时["运行时支持"] + Builtins[内建函数
print_i64 / print_f64
print_bool → printf] + end + + Source[源码 .l] --> Lexer + Lexer --> Parser + Parser --> Sema + Sema --> Codegen + Codegen --> Link + Link --> Exe[可执行文件 .exe] + Builtins -.-> Codegen +``` + +### 模块职责 + +| 模块 | 输入 | 输出 | 核心结构 | +|------|------|------|----------| +| `lexer/` | `char*` 源码 | `Token[]` | `Token` {kind, start, length, line, col} | +| `parser/` | `Token[]` | `AstNode*` | 14 种节点 (Program..IdentExpr) | +| `ast/` | — | 工厂函数 | `AstNode` {kind, type, as{union}} | +| `sema/` | `AstNode*` | 类型标注 | `Scope` 作用域链 + `Symbol` 符号表 | +| `codegen/` | `AstNode*` | `LLVMModuleRef` | `CgCtx` {module, builder, var_table} | +| `driver/` | 命令行参数 | exit code | 流水线串联 + 错误报告 | + +## 功能 (v0.1) + +### 类型系统 + +| 类型 | 关键字 | 说明 | +|------|--------|------| +| 64 位有符号整数 | `i64` | `42`, `-7` | +| 64 位浮点数 | `f64` | `3.14`, `-0.5` | +| 布尔值 | `bool` | `true`, `false` | +| 无返回值 | `void` | 函数不返回值时使用 | + +- `let` 不可变变量,支持可选类型标注和类型推断 +- 类型在编译时完全确定,无隐式转换(除 `i64` → `f64` 自动提升) + +### 控制流 + +- `if` / `else` 条件分支(支持 `else if` 链) +- `while` 循环 +- `return` 提前返回(可选带表达式) + +### 函数 + +- 多参数,显式返回类型(可省略,默认 `void`) +- 递归调用 +- 内建函数:`print_i64`, `print_f64`, `print_bool` + +## 安装 + +### 依赖 + +- **GCC** 15.x (MinGW-w64) +- **CMake** ≥ 3.20 +- **LLVM** 22.x(C API 库 + 头文件) + +### 从源码构建 + +```bash +git clone +cd "L Language" +mkdir build && cd build +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM" +mingw32-make -j4 +``` + +生成 `l_lang.exe`。 + +## 使用 + +```bash +# 编译并运行 +./l_lang.exe example.l -o example.exe +./example.exe + +# 查看生成的 LLVM IR +./l_lang.exe example.l --emit-ir +``` + +## 开发 + +```bash +# 构建 +cd build && mingw32-make -j4 + +# 运行全部测试 (65 单元 + 5 集成) +./l_lang_lexer_test.exe # 词法分析 (41 tests) +./l_lang_test.exe # 语法分析 (15 tests) +./l_lang_sema_test.exe # 语义分析 (9 tests) + +# 集成测试 +for f in ../test/programs/*.l; do + ./l_lang.exe "$f" -o out.exe && ./out.exe +done +``` + +### 技术栈 + +| 层 | 技术 | +|----|------| +| 实现语言 | C17 (GCC 15.x) | +| 构建系统 | CMake 3.20+ | +| IR 后端 | LLVM 22.1.7 C API | +| 链接器 | clang / lld | +| 内存管理 | Arena bump allocator | +| 测试框架 | 手写断言宏 (ASSERT / TEST_RUN / test_summary) | + +### 项目结构 + +``` +include/l_lang.h # 公共类型定义 (TypeKind, 向前声明) +src/ +├── lexer/ # 词法分析器 +│ ├── token.h/c # Token 类型 + 工具函数 +│ └── lexer.h/c # 状态机 lex() +├── parser/ +│ └── parser.h/c # Pratt 表达式 + 递归下降 parse() +├── ast/ +│ └── ast.h/c # 14 种节点定义 + 创建函数 +├── sema/ +│ ├── symbol.h/c # 作用域链 (查/插) +│ └── sema.h/c # 类型推断 + 检查 sema_analyze() +├── codegen/ +│ └── codegen.h/c # AST → LLVM IR codegen_module() +├── driver/ +│ ├── main.c # 入口 + 命令行 + 流水线串联 +│ └── error.h/c # 错误报告 (ErrorInfo / ErrorList) +└── util/ + └── arena.h/c # Bump allocator (8MB) +test/ +├── test_utils.h # 断言宏 +├── test_lexer.c # 词法测试 (41 tests) +├── test_parser.c # 语法测试 (15 tests) +├── test_sema.c # 语义测试 (9 tests) +└── programs/ # 集成测试 (.l 源文件) +docs/ +├── PRD.md # 产品需求文档 +└── superpowers/plans/ # 实现计划 +``` + +## 错误处理 + +| 阶段 | 策略 | +|------|------| +| 词法分析 | 首个非法字符即终止,报告 文件名:行:列: 错误信息 | +| 语法分析 | 首个语法错误即终止,报告期望 vs 实际 | +| 语义分析 | 收集所有类型错误后批量输出,红色 ANSI 高亮 | +| IR 生成 | LLVMVerifyModule 验证失败 → 输出 LLVM 诊断 | +| 链接 | system() 返回值检查,失败时打印 exit code | + +## 贡献 + +欢迎提交 Issue 和 Pull Request。 + +### 本地开发环境 + +- GCC 14.x+ (MinGW-w64) +- CMake 3.20+ +- LLVM 22.x(需要 C API 库和头文件) + +### 代码规范 + +- C17 标准,`-Wall -Wextra -g` 零警告 +- 注释用中文 +- Arena 内存池贯穿全流水线,不在局部函数内 malloc +- 错误信息格式:`文件名:行:列: 描述` +- 提交格式:`<类型>: <描述>`(feat/fix/refactor/docs/test/chore) + +## 版本号升级清单 + +版本号需在 **3 个地方** 手动修改: + +| 文件 | 字段 | 说明 | +|------|------|------| +| `CMakeLists.txt` | `VERSION` 变量 | CMake `project()` | +| `README.md` | badges | 文档徽章 | +| `CHANGELOG.md` | 版本标题 | 变更日志 | + +## 许可证 + +MIT License + +## 作者 + +[刘航宇](https://github.com/LHY0125) — 河南理工大学人工智能协会 diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..a9970ff --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,40 @@ +# 安全策略 + +## 报告漏洞 + +如果你发现安全漏洞,请**不要**在公开 Issue 中报告。请通过以下方式私下报告: + +- GitHub: 在 [Security Advisories](https://github.com/LHY0125/l-language/security/advisories) 页面提交 +- 邮件: 联系项目维护者 + +我们会在 **48 小时内**确认收到报告,并在 7 天内提供初步评估和修复计划。 + +## 安全最佳实践 + +### 作为用户 + +- 仅从 [Releases](https://github.com/LHY0125/l-language/releases) 页面下载编译好的二进制文件 +- 编译的 `.l` 源文件在当前目录生成 `.o` 和 `.exe` 文件 +- 不要编译不可信来源的 `.l` 源文件 + +### 作为开发者 + +- 永远不要在源代码中硬编码密钥或凭据 +- 所有外部输入(源文件)在系统边界验证 +- Arena 分配失败时逐层返回 NULL,不做静默回退 +- `vsnprintf` 缓冲区用完时检查返回值 +- `strtoll` / `strtod` 解析 Token 值前检查长度边界 + +## 已知限制 + +- v0.1 不支持沙箱或权限控制 — 编译出的可执行文件具有当前用户的所有权限 +- `print_*` 内建函数直接调用 `printf`,format string 为硬编码常量,无注入风险 +- 不支持文件 I/O、网络、外部 FFI 调用(v0.1 语言能力有限,攻击面极小) + +## 支持版本 + +| 版本 | 支持状态 | +|------|----------| +| v0.1.x | 活跃开发中 | + +> v0.1 处于早期开发阶段,API 和语言语法可能发生破坏性变更。生产环境请勿使用。 diff --git a/docs/PRD.md b/docs/PRD.md new file mode 100644 index 0000000..34d4f8a --- /dev/null +++ b/docs/PRD.md @@ -0,0 +1,405 @@ +# L Language PRD(产品需求文档) + +> 版本: v0.1 | 日期: 2026-06-04 | 作者: 刘航宇 (AI 辅助) + +--- + +## 1. 项目概述 + +### 1.1 一句话描述 + +用 C 语言实现一门静态类型、Rust 风格语法、多范式混合的编译型编程语言 "L Language"。 + +### 1.2 目标 + +| | 短期(v0.1) | 远期 | +|---|-------------|------| +| 定位 | 学习编译器全流程 | 真正能用的通用编程语言 | +| 能力 | 计算器级 — 基本类型、算术、if/while、函数 | 模块、泛型、trait、所有权等 | +| 标准 | 跑通全流水线就是胜利 | 自举 | + +### 1.3 非目标(v0.1 不做) + +- 字符串类型(只有字面量用于 `print`) +- 数组 / 切片 / 结构体 +- 模块系统和多文件编译 +- 泛型、trait、模式匹配 +- 任何标准库 +- 垃圾回收或自动内存管理 + +--- + +## 2. 语言规范(v0.1) + +### 2.1 类型系统 + +| 类型 | 关键字 | 占位 | 示例 | +|------|--------|------|------| +| 有符号 64 位整数 | `i64` | 64 bit | `42`、`-7` | +| 64 位浮点数 | `f64` | 64 bit | `3.14`、`-0.5` | +| 布尔值 | `bool` | 1 bit | `true`、`false` | +| 无返回值 | `void` | — | 函数不返回值时使用 | + +类型推断规则: +- `let` 声明时从初始化表达式推断类型,无需显式标注 +- 函数参数和返回值必须显式标注类型 +- 变量一旦推断出类型就固定(强类型、静态类型) + +### 2.2 语法(EBNF 摘要) + +```ebnf +program = { function } +function = "fn" IDENT "(" [params] ")" ["->" type] block +params = param { "," param } +param = IDENT ":" type +type = "i64" | "f64" | "bool" | "void" + +block = "{" { statement } [expression] "}" +statement = let_stmt | if_stmt | while_stmt | return_stmt | expr_stmt +let_stmt = "let" IDENT "=" expression ";" (* 变量不可变,无赋值语句 *) +if_stmt = "if" expression block ["else" (if_stmt | block)] +while_stmt = "while" expression block +return_stmt = "return" [expression] ";" +expr_stmt = expression ";" + +expression = logical_or +logical_or = logical_and { "||" logical_and } +logical_and = comparison { "&&" comparison } +comparison = term { ("==" | "!=" | "<" | ">" | "<=" | ">=") term } +term = factor { ("+" | "-") factor } +factor = unary { ("*" | "/" | "%") unary } +unary = ("-" | "!") unary | primary +primary = NUMBER | BOOL | IDENT | call | "(" expression ")" +call = IDENT "(" [args] ")" +args = expression { "," expression } +``` + +### 2.3 内置函数(编译器提供,非语言特性) + +| 函数 | 说明 | +|------|------| +| `print_i64(x: i64) -> void` | 打印整数并换行 | +| `print_f64(x: f64) -> void` | 打印浮点数并换行 | +| `print_bool(x: bool) -> void` | 打印布尔值并换行 | + +### 2.4 示例程序 + +```rust +fn fib(n: i64) -> i64 { + if n < 2 { + return n; + } + return fib(n - 1) + fib(n - 2); +} + +fn main() -> i64 { + let result = fib(10); + print_i64(result); // 输出: 55 + return 0; +} +``` + +--- + +## 3. 编译器架构 + +### 3.1 整体流水线 + +``` +源文件(.l) ──▶ 词法分析 ──▶ 语法分析 ──▶ 语义分析 ──▶ IR生成 ──▶ 可执行文件(.exe) + Token流 AST 带类型AST LLVM Module 机器码 +``` + +### 3.2 各阶段输入输出 + +| 阶段 | 输入 | 输出 | 关键数据结构 | +|------|------|------|-------------| +| 词法分析 | `char*` 源码 | Token 数组 | `Token`(类型 + 行号 + 列号 + 值)| +| 语法分析 | Token 数组 | AST 根节点 | `AstNode`(递归树,每个节点有类型枚举 + 子节点)| +| 语义分析 | AST 根节点 | 带类型标注的 AST | 在 `AstNode` 上附加 `TypeInfo` | +| IR 生成 | 带类型 AST | LLVM Module | `LLVMModuleRef`、`LLVMValueRef` 等 | +| 代码生成 | LLVM Module | `.exe` 可执行文件 | LLVM 的 `LLVMTargetMachineEmitToFile` | + +### 3.3 错误处理策略 + +- 词法/语法错误:打印 `文件:行号:列号: 错误信息` 后立即终止 +- 语义错误(类型不匹配等):收集当前阶段所有错误后统一输出,再终止 +- 不尝试错误恢复,不做增量编译 + +--- + +## 4. 模块详细设计 + +### 4.1 词法分析器(Lexer) + +**职责**:将源代码字符串转换为 Token 流 + +**Token 类型清单**: + +| 类别 | Token | +|------|-------| +| 关键字 | `fn` `let` `if` `else` `while` `return` `true` `false` | +| 类型 | `i64` `f64` `bool` `void` | +| 字面量 | 整数、浮点数 | +| 标识符 | 用户定义的变量名、函数名 | +| 运算符/分隔 | `+` `-` `*` `/` `%` `==` `!=` `<` `>` `<=` `>=` `&&` `||` `!` `=` `->` `(` `)` `{` `}` `,` `:` `;` | + +**实现要点**: +- 手写状态机,不依赖 flex/lex +- 跳过空白(空格、`\t`、`\r`)和注释(`//` 行注释 + `/* */` 块注释) +- 每个 Token 记录行号和列号,用于错误报告 +- 关键字通过哈希表或完美哈希识别 + +**关键函数签名**: +```c +Token* lex(const char* source, size_t* token_count, ErrorInfo* error); +``` + +### 4.2 语法分析器(Parser) + +**职责**:将 Token 流转换为抽象语法树 + +**实现方式**:手写递归下降解析器(Pratt parsing 处理表达式) + +**AST 节点类型**: + +``` +Program — 程序根节点,包含多个函数 +Function — 函数定义(名称、参数列表、返回类型、函数体) +Parameter — 函数参数(名称、类型) +Block — 代码块,包含语句列表 +LetStmt — let 声明(不可变变量) +IfStmt — if 语句(条件、then块、可选的else块) +WhileStmt — while 循环 +ReturnStmt — return 语句 +BinaryExpr — 二元运算(运算符 + 左右操作数) +UnaryExpr — 一元运算(-、!) +CallExpr — 函数调用 +LiteralExpr — 字面量(整数、浮点、布尔) +IdentifierExpr — 标识符引用 +``` + +**关键函数签名**: +```c +AstNode* parse(const Token* tokens, size_t token_count, ErrorInfo* error); +``` + +### 4.3 语义分析器(Sema / Semantic Analyzer) + +**职责**:类型推断和类型检查 + +**核心工作**: +1. **符号表管理** — 作用域栈(全局作用域 → 函数作用域 → 块作用域) +2. **类型推断** — 从 `let x = 42` 推断出 `x: i64` +3. **类型检查** — `if`/`while` 条件必须是 `bool`;二元运算两边类型必须一致 +4. **隐式类型转换** — 整数可自动提升为浮点数(`i64` → `f64`) +5. **函数签名检查** — 调用时参数数量和类型必须匹配声明 +6. **未定义检查** — 所有引用的标识符必须在作用域内已定义 + +**数据结构**: +```c +typedef struct { + const char* name; // 符号名称 + TypeKind type; // 推断出的类型 + SymbolKind kind; // 变量 / 参数 / 函数 + // 函数符号额外信息 + TypeKind return_type; + TypeKind* param_types; + size_t param_count; +} Symbol; + +typedef struct Scope { + Symbol* symbols; // 当前作用域的符号表 + size_t count; + struct Scope* parent; // 上级作用域 +} Scope; +``` + +**关键函数签名**: +```c +void analyze(AstNode* ast, ErrorList* errors); +``` + +### 4.4 LLVM IR 生成器(Codegen) + +**职责**:遍历带类型的 AST,调用 LLVM-C API 生成 LLVM IR + +**类型映射**: + +| L 类型 | LLVM 类型 | +|--------|-----------| +| `i64` | `LLVMInt64Type()` | +| `f64` | `LLVMDoubleType()` | +| `bool` | `LLVMInt1Type()` | +| `void` | `LLVMVoidType()` | + +**各 AST 节点的生成策略**: + +| AST 节点 | IR 生成策略 | +|----------|------------| +| `Function` | 创建 `LLVMAddFunction`,分配 entry BB,生成函数体 | +| `Block` | 顺序生成每条语句/表达式 | +| `LetStmt` | `alloca` 分配栈空间,计算初始化表达式,`store` | +| `BinaryExpr` | 生成左右操作数,按运算符选 `LLVMBuildAdd`/`LLVMBuildSub`/... | +| `IfStmt` | 创建 3 个 BB: then/else/merge,`LLVMBuildCondBr` | +| `WhileStmt` | 创建 cond/body/merge 三个 BB,`LLVMBuildCondBr` + `LLVMBuildBr` | +| `CallExpr` | 查找函数,`LLVMBuildCall2` | +| `ReturnStmt` | `LLVMBuildRet` | +| `LiteralExpr` | `LLVMConstInt`/`LLVMConstReal` | +| 标识符读取 | 从 `alloca` 地址 `LLVMBuildLoad2` | + +**内置函数实现**:`print_i64`/`print_f64`/`print_bool` 在编译器内部用 C `printf` 实现,生成时直接映射到 LLVM IR 调用 `printf`。 + +**关键函数签名**: +```c +LLVMModuleRef codegen(AstNode* ast, const char* module_name); +``` + +### 4.5 驱动层(Driver) + +**职责**:串联各阶段,处理命令行参数 + +``` +l-language.exe [-o ] [--emit-ir] + --emit-ir 输出 LLVM IR 文本(.ll),不生成可执行文件 + -o 指定输出文件名(默认 a.exe) +``` + +**流程**: +1. 读取源文件到内存 +2. 调用 `lex()` → 检查词法错误 +3. 调用 `parse()` → 检查语法错误 +4. 调用 `analyze()` → 检查语义错误 +5. 调用 `codegen()` → 生成 LLVM Module +6. 调用 `LLVMTargetMachineEmitToFile()` → 输出目标文件 +7. 调用系统链接器(`clang` 或 `gcc`)→ 生成可执行文件 + +--- + +## 5. 开发阶段划分 + +### Phase 1:基础设施(预计 2-3 天) + +- [ ] CMake 构建系统(查找 LLVM、配置编译选项) +- [ ] 词法分析器:完整的 Token 识别 + 注释跳过 +- [ ] 单元测试框架搭建(CUnit 或手写断言宏) +- [ ] 错误报告基础设施(行号/列号 + 彩色输出) + +### Phase 2:表达式计算(预计 2-3 天) + +- [ ] AST 数据结构定义 +- [ ] Pratt 表达式解析器(算术、比较、逻辑) +- [ ] 字面量 + 一元运算 + 二元运算的 IR 生成 +- [ ] 生成第一个可执行文件:`print_i64(1 + 2 * 3)` + +### Phase 3:变量和控制流(预计 3-4 天) + +- [ ] `let` 声明 + 标识符引用 +- [ ] 语义分析:符号表 + 类型推断 +- [ ] `if` / `else` 语句 +- [ ] `while` 循环 +- [ ] `return` 语句 + +### Phase 4:函数(预计 3-4 天) + +- [ ] 函数定义解析(参数 + 返回类型) +- [ ] 函数调用 IR 生成 +- [ ] 作用域链管理 +- [ ] 完整的斐波那契程序跑通 + +### Phase 5:集成验证(预计 1-2 天) + +- [ ] 端到端测试(多个 `.l` 程序编译运行验证结果) +- [ ] README 文档 +- [ ] 编译错误信息完善 + +**总预计工期**:约 2-3 周(每天投入 2-4 小时) + +--- + +## 6. 技术依赖 + +| 依赖 | 版本 | 用途 | +|------|------|------| +| C 编译器 | GCC 14.x (MinGW) | 编译编译器自身 | +| CMake | ≥ 3.20 | 构建系统 | +| LLVM | 19.x | IR 生成 + 目标代码输出 | +| 操作系统 | Windows 11 | 开发和运行 | + +LLVM 安装路径:`D:\settings\Language\LLVM` + +--- + +## 7. 目录结构 + +``` +L Language/ +├── docs/ +│ └── PRD.md 本文档 +├── src/ +│ ├── lexer/ +│ │ ├── lexer.c 词法分析器主逻辑 +│ │ ├── token.c Token 数据结构 +│ │ └── lexer.h +│ ├── parser/ +│ │ ├── parser.c 递归下降 + Pratt 解析 +│ │ └── parser.h +│ ├── ast/ +│ │ ├── ast.c AST 节点创建/销毁 +│ │ └── ast.h +│ ├── sema/ +│ │ ├── sema.c 语义分析 + 类型检查 +│ │ ├── symbol.c 符号表管理 +│ │ └── sema.h +│ ├── codegen/ +│ │ ├── codegen.c LLVM IR 生成 +│ │ └── codegen.h +│ ├── driver/ +│ │ ├── main.c 入口 + 命令行参数 +│ │ └── error.c 错误报告 +│ └── util/ +│ └── arena.c 内存池(简化内存管理) +├── include/ +│ └── l_lang.h 公共头文件(类型定义等) +├── test/ +│ ├── test_lexer.c +│ ├── test_parser.c +│ ├── test_sema.c +│ ├── test_codegen.c +│ └── programs/ .l 测试程序 +│ ├── hello.l +│ ├── fib.l +│ └── ... +├── CMakeLists.txt +└── README.md +``` + +--- + +## 8. 成功标准 + +v0.1 完成的判定标准: + +1. 斐波那契程序(递归 + 循环两个版本)编译并输出正确结果 +2. 至少 3 个不同算例编译运行通过 +3. 以下语法元素均有覆盖: + - [x] `let` 变量声明和类型推断 + - [x] 算术运算(`+` `-` `*` `/` `%`) + - [x] 比较运算(`==` `!=` `<` `>` `<=` `>=`) + - [x] 逻辑运算(`&&` `||` `!`) + - [x] `if` / `else` 控制流 + - [x] `while` 循环 + - [x] 函数定义和调用 + - [x] 递归 +4. 类型错误能被正确检测并给出可读的错误信息 + +--- + +## 9. 风险与缓解 + +| 风险 | 概率 | 缓解措施 | +|------|------|----------| +| LLVM-C API 复杂度过高 | 中 | 先用 LLVM 官方 Kaleidoscope 教程预热,理解核心 API | +| 类型推断实现困难 | 中 | v0.1 只做最简单的 "从初始化表达式推断",不涉及 Hindley-Milner 或泛型 | +| 递归函数 IR 栈管理出错 | 中 | 所有变量用 `alloca`(栈分配),LLVM 的 `mem2reg` pass 自动优化 | +| Windows/MinGW + LLVM 兼容问题 | 低 | 提前验证 LLVM 安装和 CMake 能找到 LLVM | diff --git a/docs/superpowers/plans/2026-06-04-l-lang-v0.1.md b/docs/superpowers/plans/2026-06-04-l-lang-v0.1.md new file mode 100644 index 0000000..40aff09 --- /dev/null +++ b/docs/superpowers/plans/2026-06-04-l-lang-v0.1.md @@ -0,0 +1,2512 @@ +# L Language v0.1 实现计划 + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** 用 C + LLVM 构建 L Language 编译器的完整 v0.1(计算器级别 → 斐波那契可执行文件) + +**Architecture:** 经典 5 阶段编译器流水线 — 词法分析(Token) → 语法解析(AST) → 语义分析(类型标注) → LLVM IR 生成 → 链接成可执行文件。手写递归下降+Pratt解析器,Arena 内存池简化内存管理。 + +**Tech Stack:** C17, GCC (MinGW), CMake ≥ 3.20, LLVM 19.x (C API), Windows 11 + +--- + +## 文件蓝图 + +``` +L Language/ +├── CMakeLists.txt 构建系统 +├── include/ +│ └── l_lang.h 全局类型定义(Token、AST、类型枚举共享) +├── src/ +│ ├── util/ +│ │ ├── arena.h 内存池声明 +│ │ └── arena.c 内存池实现(bump allocator) +│ ├── lexer/ +│ │ ├── token.h Token 结构体 + 创建函数 +│ │ ├── token.c Token 实现 +│ │ ├── lexer.h 词法分析器声明 +│ │ └── lexer.c 核心词法分析(状态机) +│ ├── ast/ +│ │ ├── ast.h AST 节点类型枚举 + 结构体 +│ │ └── ast.c 节点创建/销毁 +│ ├── parser/ +│ │ ├── parser.h 解析器声明 +│ │ └── parser.c 递归下降 + Pratt 表达式解析 +│ ├── sema/ +│ │ ├── symbol.h 符号表声明 +│ │ ├── symbol.c 符号表实现(作用域链) +│ │ ├── sema.h 语义分析声明 +│ │ └── sema.c 类型推断 + 类型检查 +│ ├── codegen/ +│ │ ├── codegen.h 代码生成声明 +│ │ └── codegen.c AST → LLVM IR +│ ├── driver/ +│ │ ├── error.h 错误报告声明 +│ │ ├── error.c 错误格式化输出 +│ │ └── main.c 入口 + 命令行 + 流水线串联 +├── test/ +│ ├── test_lexer.c 词法分析单元测试 +│ ├── test_parser.c 语法分析单元测试 +│ ├── test_sema.c 语义分析单元测试 +│ ├── test_utils.h 测试断言宏 +│ └── programs/ +│ ├── 01_arithmetic.l 四则运算 + print_i64 +│ ├── 02_if_else.l if/else 控制流 +│ ├── 03_while.l 递归 + if 控制流组合 +│ ├── 04_fib_recursive.l 斐波那契递归 +│ └── 05_float.l 浮点运算 + 多函数调用 +``` + +--- + +### Task 1: 项目骨架和构建系统 + +**Files:** +- Create: `CMakeLists.txt` +- Create: `include/l_lang.h` +- Create: `src/util/arena.h`, `src/util/arena.c` +- Create: `src/driver/error.h`, `src/driver/error.c` + +- [ ] **Step 1: 编写 CMakeLists.txt** + +```cmake +cmake_minimum_required(VERSION 3.20) +project(l_lang C) + +set(CMAKE_C_STANDARD 17) +set(CMAKE_C_STANDARD_REQUIRED ON) + +# 查找 LLVM +find_package(LLVM 19 REQUIRED CONFIG) + +message(STATUS "LLVM found: ${LLVM_DIR}") +message(STATUS "LLVM includes: ${LLVM_INCLUDE_DIRS}") +message(STATUS "LLVM libraries: ${LLVM_AVAILABLE_LIBS}") + +# 收集源文件 +file(GLOB_RECURSE L_LANG_SOURCES "src/*.c") + +# 编译器可执行文件 +add_executable(l_lang ${L_LANG_SOURCES}) + +# 包含目录 +target_include_directories(l_lang PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util + src/lexer + src/ast + src/parser + src/sema + src/codegen + src/driver +) + +target_link_libraries(l_lang + LLVM +) + +# 编译选项 +target_compile_options(l_lang PRIVATE -Wall -Wextra -g) + +# 测试可执行文件 +file(GLOB TEST_SOURCES "test/test_*.c") +add_executable(l_lang_test ${TEST_SOURCES} ${L_LANG_SOURCES}) +target_include_directories(l_lang_test PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${LLVM_INCLUDE_DIRS} + src/util src/lexer src/ast src/parser src/sema src/codegen src/driver + test +) +target_link_libraries(l_lang_test LLVM) +``` + +- [ ] **Step 2: 编写公共头文件 `include/l_lang.h`** + +```c +#ifndef L_LANG_H +#define L_LANG_H + +#include +#include +#include + +// === 类型系统 === +typedef enum { + TYPE_I64, + TYPE_F64, + TYPE_BOOL, + TYPE_VOID, + TYPE_UNKNOWN, // 尚未推断 + TYPE_ERROR, // 类型错误 +} TypeKind; + +static inline const char* type_name(TypeKind kind) { + switch (kind) { + case TYPE_I64: return "i64"; + case TYPE_F64: return "f64"; + case TYPE_BOOL: return "bool"; + case TYPE_VOID: return "void"; + default: return ""; + } +} + +// === 向前声明 === +typedef struct Token Token; +typedef struct AstNode AstNode; +typedef struct Scope Scope; +typedef struct Arena Arena; + +// === 跨模块分配器接口(避免循环依赖,各模块通过 void* 使用 arena)=== +void* arena_alloc_impl(void* alloc, size_t size); +char* arena_strdup_impl(void* alloc, const char* src, size_t len); + +#endif +``` + +- [ ] **Step 3: 编写 Arena 内存池 `src/util/arena.h`** + +```c +#ifndef ARENA_H +#define ARENA_H + +#include + +typedef struct { + char* memory; + size_t capacity; + size_t offset; +} Arena; + +Arena arena_create(size_t capacity_mb); +void arena_destroy(Arena* a); +void* arena_alloc(Arena* a, size_t size); +char* arena_strdup(Arena* a, const char* src); + +#endif +``` + +- [ ] **Step 4: 编写 Arena 实现 `src/util/arena.c`** + +```c +#include "arena.h" +#include +#include + +Arena arena_create(size_t capacity_mb) { + Arena a; + a.capacity = capacity_mb * 1024 * 1024; + a.memory = (char*)malloc(a.capacity); + a.offset = 0; + return a; +} + +void arena_destroy(Arena* a) { + free(a->memory); + a->memory = NULL; + a->capacity = 0; + a->offset = 0; +} + +void* arena_alloc(Arena* a, size_t size) { + size = (size + 7) & ~7; // 8 字节对齐 + if (a->offset + size > a->capacity) return NULL; + void* ptr = a->memory + a->offset; + a->offset += size; + return ptr; +} + +char* arena_strdup(Arena* a, const char* src) { + size_t len = strlen(src) + 1; + char* dst = arena_alloc(a, len); + memcpy(dst, src, len); + return dst; +} +``` + +- [ ] **Step 5: 编写错误报告 `src/driver/error.h`** + +```c +#ifndef ERROR_H +#define ERROR_H + +#include + +typedef struct { + const char* message; + const char* filename; + int line; + int col; +} ErrorInfo; + +typedef struct { + ErrorInfo* errors; + size_t count; + size_t capacity; +} ErrorList; + +void error_init(ErrorList* list); +void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...); +void error_print(const ErrorList* list); + +#endif +``` + +- [ ] **Step 6: 编写错误实现 `src/driver/error.c`** + +```c +#include "error.h" +#include +#include +#include + +void error_init(ErrorList* list) { + list->capacity = 8; + list->errors = malloc(list->capacity * sizeof(ErrorInfo)); + list->count = 0; +} + +void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...) { + if (list->count >= list->capacity) { + list->capacity *= 2; + list->errors = realloc(list->errors, list->capacity * sizeof(ErrorInfo)); + } + char buf[512]; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + list->errors[list->count++] = (ErrorInfo){ + .message = strdup(buf), + .filename = filename, + .line = line, + .col = col, + }; +} + +void error_print(const ErrorList* list) { + for (size_t i = 0; i < list->count; i++) { + ErrorInfo* e = &list->errors[i]; + fprintf(stderr, "\033[1;31m错误:\033[0m %s:%d:%d: %s\n", + e->filename, e->line, e->col, e->message); + } +} +``` + +- [ ] **Step 7: 配置构建并验证编译** + +```bash +cd "D:\Code\doing_exercises\programs\L Language" +mkdir -p build && cd build +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM" 2>&1 +``` + +Expected: cmake 配置成功,输出 "LLVM found"。 + +- [ ] **Step 8: 构建骨架项目** + +```bash +mingw32-make -j4 +``` + +Expected: 编译通过,生成 `l_lang.exe`(暂时无功能)。 + +--- + +### Task 2: Token 数据结构 + +**Files:** +- Create: `src/lexer/token.h`, `src/lexer/token.c` + +- [ ] **Step 1: 编写 `src/lexer/token.h`** + +```c +#ifndef TOKEN_H +#define TOKEN_H + +#include "l_lang.h" + +// === Token 类型枚举 === +typedef enum { + // 关键字 + TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_RETURN, + // 类型关键字 + TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, + // 字面量 + TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, + // 标识符 + TOK_IDENT, + // 运算符 + TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT, + TOK_EQ, TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ, + TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, + TOK_ARROW, + // 分隔符 + TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE, + TOK_COMMA, TOK_COLON, TOK_SEMICOLON, TOK_ASSIGN, + // 特殊 + TOK_EOF, TOK_ERROR, +} TokenKind; + +// === Token 结构体 === +struct Token { + TokenKind kind; + const char* start; // 指向源码中 token 起始位置 + int length; // token 文本长度 + int line; + int col; +}; + +// === 工具函数 === +const char* tok_name(TokenKind kind); +bool tok_is_type(TokenKind kind); + +// 从 Token 提取值(必须在同一个 arena 中) +int64_t tok_int_value(const Token* tok); +double tok_float_value(const Token* tok); + +#endif +``` + +- [ ] **Step 2: 编写 `src/lexer/token.c`** + +```c +#include "token.h" +#include +#include +#include +#include + +static const char* NAMES[] = { + [TOK_FN] = "fn", [TOK_LET] = "let", [TOK_IF] = "if", + [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_RETURN] = "return", + [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_VOID] = "void", + [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", + [TOK_TRUE] = "true", [TOK_FALSE] = "false", + [TOK_IDENT] = "标识符", + [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*", + [TOK_SLASH] = "/", [TOK_PERCENT] = "%", + [TOK_EQ] = "=", [TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=", + [TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=", + [TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_BANG] = "!", + [TOK_ARROW] = "->", + [TOK_LPAREN] = "(", [TOK_RPAREN] = ")", + [TOK_LBRACE] = "{", [TOK_RBRACE] = "}", + [TOK_COMMA] = ",", [TOK_COLON] = ":", [TOK_SEMICOLON] = ";", + [TOK_ASSIGN] = "=", + [TOK_EOF] = "EOF", [TOK_ERROR] = "错误", +}; + +const char* tok_name(TokenKind kind) { + return NAMES[kind]; +} + +bool tok_is_type(TokenKind kind) { + return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_VOID; +} + +int64_t tok_int_value(const Token* tok) { + char buf[32]; + memcpy(buf, tok->start, tok->length); + buf[tok->length] = '\0'; + return strtoll(buf, NULL, 10); +} + +double tok_float_value(const Token* tok) { + char buf[64]; + memcpy(buf, tok->start, tok->length); + buf[tok->length] = '\0'; + return strtod(buf, NULL); +} +``` + +--- + +### Task 3: 词法分析器 + +**Files:** +- Create: `src/lexer/lexer.h`, `src/lexer/lexer.c` + +- [ ] **Step 1: 编写 `src/lexer/lexer.h`** + +```c +#ifndef LEXER_H +#define LEXER_H + +#include "token.h" +#include "arena.h" +#include "error.h" + +// 返回 Token 数组(分配在 arena 中),*count 为数量。 +// 如遇错误,error 被填充并返回 NULL。 +Token* lex(Arena* a, const char* source, const char* filename, + size_t* count, ErrorInfo* error); + +#endif +``` + +- [ ] **Step 2: 实现核心词法分析器 `src/lexer/lexer.c`** + +```c +#include "lexer.h" +#include +#include + +typedef struct { + const char* src; + const char* filename; + int pos; + int line; + int col; +} Lexer; + +static char peek(const Lexer* l) { return l->src[l->pos]; } +static char peek_next(const Lexer* l) { return l->src[l->pos + 1]; } +static void advance(Lexer* l) { + if (l->src[l->pos] == '\n') { l->line++; l->col = 1; } + else { l->col++; } + l->pos++; +} +static void skip_whitespace(Lexer* l) { + while (1) { + char c = peek(l); + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { advance(l); continue; } + if (c == '/' && peek_next(l) == '/') { + while (peek(l) != '\n' && peek(l) != '\0') advance(l); + continue; + } + if (c == '/' && peek_next(l) == '*') { + advance(l); advance(l); + while (peek(l) != '\0' && !(peek(l) == '*' && peek_next(l) == '/')) advance(l); + if (peek(l) != '\0') { advance(l); advance(l); } // skip */ + continue; + } + break; + } +} + +static Token make_token(Lexer* l, TokenKind kind, int start_pos, int len) { + return (Token){.kind = kind, .start = l->src + start_pos, + .length = len, .line = l->line, .col = l->col}; +} + +static Token lex_number(Lexer* l) { + int start = l->pos; + bool is_float = false; + TokenKind kind = TOK_INT_LIT; + while (isdigit(peek(l))) advance(l); + if (peek(l) == '.') { + is_float = true; kind = TOK_FLOAT_LIT; advance(l); + while (isdigit(peek(l))) advance(l); + } + return make_token(l, kind, start, l->pos - start); +} + +static TokenKind check_keyword(const Token* tok) { + #define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k + KW("fn", TOK_FN); KW("let", TOK_LET); + KW("if", TOK_IF); KW("else", TOK_ELSE); + KW("while", TOK_WHILE); KW("return", TOK_RETURN); + KW("i64", TOK_I64); KW("f64", TOK_F64); + KW("bool", TOK_BOOL); KW("void", TOK_VOID); + KW("true", TOK_TRUE); KW("false", TOK_FALSE); + #undef KW + return TOK_IDENT; +} + +static Token lex_ident_or_keyword(Lexer* l) { + int start = l->pos; + while (isalnum(peek(l)) || peek(l) == '_') advance(l); + Token t = make_token(l, TOK_IDENT, start, l->pos - start); + t.kind = check_keyword(&t); + return t; +} + +Token* lex(Arena* a, const char* source, const char* filename, + size_t* count, ErrorInfo* error) { + Lexer l = {.src = source, .filename = filename, .pos = 0, .line = 1, .col = 1}; + // 预估容量:源码长度的 1/3 + size_t cap = strlen(source) / 3 + 16; + Token* tokens = arena_alloc(a, cap * sizeof(Token)); + size_t idx = 0; + + while (peek(&l) != '\0') { + skip_whitespace(&l); + if (peek(&l) == '\0') break; + + int line = l.line, col = l.col; + char c = peek(&l); + + #define TOK(k) (tokens[idx++] = make_token(&l, k, l.pos, 1), advance(&l)) + #define TOK2(k, len) (tokens[idx++] = make_token(&l, k, l.pos, len), advance(&l); advance(&l)) + + if (isdigit(c)) { tokens[idx++] = lex_number(&l); } + else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); } + else if (c == '+' && peek_next(&l) != '=') TOK(TOK_PLUS) + else if (c == '-' && peek_next(&l) != '>') TOK(TOK_MINUS) + else if (c == '-' && peek_next(&l) == '>') TOK2(TOK_ARROW, 2) + else if (c == '*') TOK(TOK_STAR) + else if (c == '/') TOK(TOK_SLASH) + else if (c == '%') TOK(TOK_PERCENT) + else if (c == '=' && peek_next(&l) == '=') TOK2(TOK_EQ_EQ, 2) + else if (c == '=') TOK(TOK_ASSIGN) + else if (c == '!' && peek_next(&l) == '=') TOK2(TOK_BANG_EQ, 2) + else if (c == '!') TOK(TOK_BANG) + else if (c == '<' && peek_next(&l) == '=') TOK2(TOK_LT_EQ, 2) + else if (c == '<') TOK(TOK_LT) + else if (c == '>' && peek_next(&l) == '=') TOK2(TOK_GT_EQ, 2) + else if (c == '>') TOK(TOK_GT) + else if (c == '&' && peek_next(&l) == '&') TOK2(TOK_AND_AND, 2) + else if (c == '|' && peek_next(&l) == '|') TOK2(TOK_PIPE_PIPE, 2) + else if (c == '(') TOK(TOK_LPAREN) + else if (c == ')') TOK(TOK_RPAREN) + else if (c == '{') TOK(TOK_LBRACE) + else if (c == '}') TOK(TOK_RBRACE) + else if (c == ',') TOK(TOK_COMMA) + else if (c == ':') TOK(TOK_COLON) + else if (c == ';') TOK(TOK_SEMICOLON) + else { + *error = (ErrorInfo){ + .message = "无法识别的字符", + .filename = filename, .line = line, .col = col + }; + return NULL; + } + #undef TOK + #undef TOK2 + } + tokens[idx++] = make_token(&l, TOK_EOF, l.pos, 0); + *count = idx; + return tokens; +} +``` + +- [ ] **Step 3: 编写词法分析测试 `test/test_lexer.c`** + +```c +#include "test_utils.h" +#include "lexer.h" +#include "arena.h" + +void test_simple_tokens() { + Arena a = arena_create(1); + const char* src = "fn main() { return 42; }"; + size_t count; ErrorInfo error = {0}; + Token* tokens = lex(&a, src, "test", &count, &error); + ASSERT(tokens != NULL); + ASSERT(count >= 8); + ASSERT(tokens[0].kind == TOK_FN); + ASSERT(tokens[1].kind == TOK_IDENT); + ASSERT(tokens[2].kind == TOK_LPAREN); + ASSERT(tokens[3].kind == TOK_RPAREN); + ASSERT(tokens[4].kind == TOK_LBRACE); + ASSERT(tokens[5].kind == TOK_RETURN); + ASSERT(tokens[6].kind == TOK_INT_LIT); + ASSERT(tok_int_value(&tokens[6]) == 42); + arena_destroy(&a); +} + +void test_keywords() { + Arena a = arena_create(1); + const char* src = "fn let if else while return i64 f64 bool void true false"; + TokenKind expected[] = {TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE, + TOK_RETURN, TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, TOK_TRUE, TOK_FALSE, TOK_EOF}; + size_t count; ErrorInfo error = {0}; + Token* tokens = lex(&a, src, "test", &count, &error); + ASSERT(tokens != NULL); + for (int i = 0; i < 13; i++) ASSERT(tokens[i].kind == expected[i]); + arena_destroy(&a); +} + +void test_operators() { + Arena a = arena_create(1); + const char* src = "+ - * / % == != < > <= >= && || ! ->"; + TokenKind expected[] = {TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT, + TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ, + TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, TOK_ARROW, TOK_EOF}; + size_t count; ErrorInfo error = {0}; + Token* tokens = lex(&a, src, "test", &count, &error); + ASSERT(tokens != NULL); + for (int i = 0; i < 16; i++) ASSERT(tokens[i].kind == expected[i]); + arena_destroy(&a); +} + +int main(void) { + TEST_RUN(test_simple_tokens); + TEST_RUN(test_keywords); + TEST_RUN(test_operators); + return test_summary(); +} +``` + +- [ ] **Step 4: 编写测试工具宏 `test/test_utils.h`** + +```c +#ifndef TEST_UTILS_H +#define TEST_UTILS_H +#include +#include + +static int _tests_run = 0; +static int _tests_failed = 0; + +#define ASSERT(expr) do { \ + _tests_run++; \ + if (!(expr)) { \ + fprintf(stderr, "\033[1;31mFAIL\033[0m %s:%d: %s\n", __FILE__, __LINE__, #expr); \ + _tests_failed++; \ + } \ +} while(0) + +#define TEST_RUN(func) do { \ + fprintf(stderr, " RUN %s\n", #func); \ + func(); \ +} while(0) + +static inline int test_summary(void) { + fprintf(stderr, "\n%d tests, %d passed, %d failed\n", + _tests_run, _tests_run - _tests_failed, _tests_failed); + return _tests_failed > 0 ? 1 : 0; +} +#endif +``` + +- [ ] **Step 5: 构建并运行词法测试** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM" +mingw32-make -j4 +./l_lang_test.exe +``` + +Expected: 3 个测试全部 PASS。 + +--- + +### Task 4: AST 数据结构 + +**Files:** +- Create: `src/ast/ast.h`, `src/ast/ast.c` + +- [ ] **Step 1: 编写 `src/ast/ast.h`** + +```c +#ifndef AST_H +#define AST_H + +#include "l_lang.h" +#include + +typedef enum { + AST_PROGRAM, + AST_FUNCTION, + AST_PARAMETER, + AST_BLOCK, + AST_LET_STMT, + AST_IF_STMT, + AST_WHILE_STMT, + AST_RETURN_STMT, + AST_EXPR_STMT, + AST_BINARY_EXPR, + AST_UNARY_EXPR, + AST_CALL_EXPR, + AST_LITERAL_EXPR, + AST_IDENT_EXPR, +} AstKind; + +typedef enum { + OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_EQ, OP_NE, OP_LT, OP_GT, OP_LE, OP_GE, + OP_AND, OP_OR, + OP_NEG, OP_NOT, +} BinaryOp; + +// 类型信息(语义分析阶段填充) +typedef struct { + TypeKind kind; +} TypeInfo; + +// AST 节点 +struct AstNode { + AstKind kind; + TypeInfo type; // 语义分析后填充,默认为 TYPE_UNKNOWN + int line; // 源文件行号 + int col; // 源文件列号 + + // 节点特有数据(按 kind 解释) + union { + // AST_PROGRAM + struct { AstNode** functions; size_t fn_count; } program; + // AST_FUNCTION + struct { const char* name; AstNode** params; size_t param_count; + TypeKind return_type; AstNode* body; } function; + // AST_PARAMETER + struct { const char* name; TypeKind type; } parameter; + // AST_BLOCK + struct { AstNode** stmts; size_t stmt_count; } block; + // AST_LET_STMT + struct { const char* name; AstNode* init; } let_stmt; + // AST_IF_STMT + struct { AstNode* cond; AstNode* then_block; AstNode* else_block; } if_stmt; + // AST_WHILE_STMT + struct { AstNode* cond; AstNode* body; } while_stmt; + // AST_RETURN_STMT + struct { AstNode* expr; } return_stmt; + // AST_EXPR_STMT + struct { AstNode* expr; } expr_stmt; + // AST_BINARY_EXPR + struct { BinaryOp op; AstNode* left; AstNode* right; } binary; + // AST_UNARY_EXPR + struct { BinaryOp op; AstNode* operand; } unary; + // AST_CALL_EXPR + struct { const char* name; AstNode** args; size_t arg_count; } call; + // AST_LITERAL_EXPR + struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; }; } literal; + // AST_IDENT_EXPR + struct { const char* name; } ident; + } as; +}; + +// 创建节点的辅助函数(内存来自 arena) +AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col); +AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount, + TypeKind ret, AstNode* body, int line, int col); +AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col); +AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col); +AstNode* ast_make_let(void* alloc, const char* name, AstNode* init, int line, int col); +AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col); +AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col); +AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col); +AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col); +AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col); +AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col); +AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col); +AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col); +AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col); +AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col); +AstNode* ast_make_ident(void* alloc, const char* name, int line, int col); + +#endif +``` + +- [ ] **Step 2: 编写 `src/ast/ast.c`** + +```c +#include "ast.h" +#include "arena.h" +#include + +// === 跨模块分配器(供 parser.c、symbol.c 等复用)=== +void* arena_alloc_impl(void* alloc, size_t sz) { + return arena_alloc((Arena*)alloc, sz); +} + +char* arena_strdup_impl(void* alloc, const char* src, size_t len) { + char* dst = arena_alloc_impl(alloc, len + 1); + memcpy(dst, src, len); + dst[len] = '\0'; + return dst; +} + +// 使用宏简化节点创建 +#define NEW(alloc, kind) AstNode* n = (AstNode*)arena_alloc_impl(alloc, sizeof(AstNode)); \ + n->kind = (kind); n->type.kind = TYPE_UNKNOWN; \ + n->line = line; n->col = col + +static void* arena_alloc_impl(void* alloc, size_t sz); // forward + +AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col) { + NEW(alloc, AST_PROGRAM); + n->as.program.functions = fns; + n->as.program.fn_count = count; + return n; +} + +AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount, + TypeKind ret, AstNode* body, int line, int col) { + NEW(alloc, AST_FUNCTION); + n->as.function.name = name; n->as.function.params = params; + n->as.function.param_count = pcount; n->as.function.return_type = ret; + n->as.function.body = body; + return n; +} + +AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col) { + NEW(alloc, AST_PARAMETER); + n->as.parameter.name = name; n->as.parameter.type = type; + return n; +} + +AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col) { + NEW(alloc, AST_BLOCK); + n->as.block.stmts = stmts; n->as.block.stmt_count = count; + return n; +} + +AstNode* ast_make_let(void* alloc, const char* name, AstNode* init, int line, int col) { + NEW(alloc, AST_LET_STMT); + n->as.let_stmt.name = name; n->as.let_stmt.init = init; + return n; +} + +AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col) { + NEW(alloc, AST_IF_STMT); + n->as.if_stmt.cond = cond; n->as.if_stmt.then_block = then_b; + n->as.if_stmt.else_block = else_b; + return n; +} + +AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col) { + NEW(alloc, AST_WHILE_STMT); + n->as.while_stmt.cond = cond; n->as.while_stmt.body = body; + return n; +} + +AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col) { + NEW(alloc, AST_RETURN_STMT); + n->as.return_stmt.expr = expr; + return n; +} + +AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col) { + NEW(alloc, AST_EXPR_STMT); + n->as.expr_stmt.expr = expr; + return n; +} + +AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col) { + NEW(alloc, AST_BINARY_EXPR); + n->as.binary.op = op; n->as.binary.left = left; n->as.binary.right = right; + return n; +} + +AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col) { + NEW(alloc, AST_UNARY_EXPR); + n->as.unary.op = op; n->as.unary.operand = operand; + return n; +} + +AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col) { + NEW(alloc, AST_CALL_EXPR); + n->as.call.name = name; n->as.call.args = args; n->as.call.arg_count = count; + return n; +} + +AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_I64; n->as.literal.i64_val = val; + n->type.kind = TYPE_I64; + return n; +} + +AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_F64; n->as.literal.f64_val = val; + n->type.kind = TYPE_F64; + return n; +} + +AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_BOOL; n->as.literal.bool_val = val; + n->type.kind = TYPE_BOOL; + return n; +} + +AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) { + NEW(alloc, AST_IDENT_EXPR); + n->as.ident.name = name; + return n; +} +``` + +- [ ] **Step 3: 验证编译** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +mingw32-make -j4 +``` + +Expected: 编译通过(含 AST 模块)。 + +--- + +### Task 5: 语法分析器 — 表达式(Pratt Parser) + +**Files:** +- Create: `src/parser/parser.h`, `src/parser/parser.c` + +- [ ] **Step 1: 编写 `src/parser/parser.h`** + +```c +#ifndef PARSER_H +#define PARSER_H + +#include "ast.h" +#include "token.h" +#include "error.h" + +// 解析 Token 数组,返回 Program 节点(内存来自 arena)。 +// 出错时 error 被填充并返回 NULL。 +AstNode* parse(Arena* a, const Token* tokens, size_t count, + const char* filename, ErrorInfo* error); + +#endif +``` + +- [ ] **Step 2: 实现解析器 `src/parser/parser.c`(第一部分:Pratt 表达式解析)** + +```c +#include "parser.h" +#include +#include + +typedef struct { + const Token* tokens; + size_t count; + size_t pos; + const char* filename; + Arena* arena; +} Parser; + +// === 向前看 === +static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; } +static const Token* peek_n(const Parser* p, int n) { return &p->tokens[p->pos + n]; } +static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; } +static bool match(Parser* p, TokenKind k) { + if (peek(p)->kind == k) { p->pos++; return true; } + return false; +} +static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) { + if (peek(p)->kind == k) return advance(p); + e->message = msg; e->filename = p->filename; + e->line = peek(p)->line; e->col = peek(p)->col; + return NULL; +} + +// === 运算符优先级定义 === +typedef enum { + PREC_NONE = 0, + PREC_ASSIGN = 10, + PREC_OR = 20, + PREC_AND = 30, + PREC_COMPARE = 40, // == != < > <= >= + PREC_TERM = 50, // + - + PREC_FACTOR = 60, // * / % + PREC_UNARY = 70, // - ! + PREC_CALL = 80, +} Precedence; + +static Precedence tok_to_prec(TokenKind kind) { + switch (kind) { + case TOK_PIPE_PIPE: return PREC_OR; + case TOK_AND_AND: return PREC_AND; + case TOK_EQ_EQ: case TOK_BANG_EQ: + case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE; + case TOK_PLUS: case TOK_MINUS: return PREC_TERM; + case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR; + default: return PREC_NONE; + } +} + +static BinaryOp tok_to_binop(TokenKind kind) { + switch (kind) { + case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB; + case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV; + case TOK_PERCENT: return OP_MOD; + case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE; + case TOK_LT: return OP_LT; case TOK_GT: return OP_GT; + case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE; + case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR; + default: return OP_ADD; + } +} + +// 向前声明 +static AstNode* parse_expr(Parser* p, ErrorInfo* error); +static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error); +static AstNode* parse_block(Parser* p, ErrorInfo* error); + +// === 前缀解析:nil、前缀一元运算符、基本表达式 === +static AstNode* parse_unary(Parser* p, ErrorInfo* error) { + const Token* op = advance(p); + AstNode* operand = parse_expr_prec(p, PREC_UNARY, error); + if (!operand) return NULL; + BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT; + return ast_make_unary(p->arena, uop, operand, op->line, op->col); +} + +static AstNode* parse_group(Parser* p, ErrorInfo* error) { + advance(p); // 跳过 ( + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + return expr; +} + +static AstNode* parse_literal(Parser* p) { + const Token* t = advance(p); + switch (t->kind) { + case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), t->line, t->col); + case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col); + case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col); + case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col); + default: return NULL; + } +} + +static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) { + const Token* name = advance(p); + if (match(p, TOK_LPAREN)) { + // 函数调用 + AstNode* args[16]; int arg_count = 0; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + args[arg_count] = parse_expr(p, error); + arg_count++; + if (peek(p)->kind == TOK_COMMA) advance(p); + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); + memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); + return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), + arg_arr, arg_count, name->line, name->col); + } + return ast_make_ident(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + name->line, name->col); +} + +// === Pratt 主循环 === +static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { + const Token* tok = peek(p); + AstNode* left = NULL; + + // 前缀解析 + if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { + left = parse_unary(p, error); + } else if (tok->kind == TOK_LPAREN) { + left = parse_group(p, error); + } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || + tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) { + left = parse_literal(p); + } else if (tok->kind == TOK_IDENT) { + left = parse_ident_or_call(p, error); + } else { + error->message = "无法识别的表达式"; error->filename = p->filename; + error->line = tok->line; error->col = tok->col; + return NULL; + } + if (!left) return NULL; + + // 中缀解析循环 + while (!error->message) { + TokenKind kind = peek(p)->kind; + Precedence prec = tok_to_prec(kind); + if (prec <= min_prec) break; + + const Token* op = advance(p); + AstNode* right = parse_expr_prec(p, prec, error); + if (!right) return NULL; + left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, op->line, op->col); + } + + return left; +} + +static AstNode* parse_expr(Parser* p, ErrorInfo* error) { + return parse_expr_prec(p, PREC_NONE, error); +} +``` + +- [ ] **Step 3: 解析器第二部分:语句解析** + +```c +// === 语句解析(续 parser.c)=== + +static bool is_type_token(TokenKind k) { + return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID; +} + +static TypeKind token_to_type(TokenKind k) { + switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64; + case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; } +} + +static AstNode* parse_statement(Parser* p, ErrorInfo* error); + +static AstNode* parse_block(Parser* p, ErrorInfo* error) { + const Token* open = peek(p); + if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL; + AstNode* stmts[256]; int count = 0; + while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) { + // 块级表达式作为最后一条语句 + if (peek(p)->kind == TOK_RBRACE) break; + AstNode* s = parse_statement(p, error); + if (!s) return NULL; + stmts[count++] = s; + } + if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; + AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*)); + memcpy(arr, stmts, count * sizeof(AstNode*)); + return ast_make_block(p->arena, arr, count, open->line, open->col); +} + +static AstNode* parse_statement(Parser* p, ErrorInfo* error) { + const Token* t = peek(p); + + if (t->kind == TOK_LET) { + advance(p); + const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名"); + if (!name) return NULL; + if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL; + AstNode* init = parse_expr(p, error); + if (!init) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_let(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + init, t->line, t->col); + } + + if (t->kind == TOK_IF) { + advance(p); + AstNode* cond = parse_expr(p, error); + if (!cond) return NULL; + AstNode* then_block = parse_block(p, error); + if (!then_block) return NULL; + AstNode* else_block = NULL; + if (match(p, TOK_ELSE)) { + if (peek(p)->kind == TOK_IF) { + else_block = parse_statement(p, error); + } else { + else_block = parse_block(p, error); + } + if (!else_block) return NULL; + } + return ast_make_if(p->arena, cond, then_block, else_block, t->line, t->col); + } + + if (t->kind == TOK_WHILE) { + advance(p); + AstNode* cond = parse_expr(p, error); + if (!cond) return NULL; + AstNode* body = parse_block(p, error); + if (!body) return NULL; + return ast_make_while(p->arena, cond, body, t->line, t->col); + } + + if (t->kind == TOK_RETURN) { + advance(p); + // void return + if (match(p, TOK_SEMICOLON)) { + return ast_make_return(p->arena, NULL, t->line, t->col); + } + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_return(p->arena, expr, t->line, t->col); + } + + // 表达式语句 + if (peek(p)->kind == TOK_IDENT && peek_n(p, 1)->kind == TOK_LPAREN) { + // 函数调用表达式语句 + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_expr_stmt(p->arena, expr, t->line, t->col); + } + + // 表达式语句(不常见的非函数调用表达式语句) + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_expr_stmt(p->arena, expr, t->line, t->col); +} + +// === 函数和程序解析 === + +static AstNode* parse_function(Parser* p, ErrorInfo* error) { + const Token* fn_tok = advance(p); // fn + const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名"); + if (!name) return NULL; + if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL; + + // 参数列表 + AstNode* params[64]; int pcount = 0; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + const Token* pname = expect(p, TOK_IDENT, error, "参数名"); + if (!pname) return NULL; + if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; + const Token* ptype = advance(p); + if (!is_type_token(ptype->kind)) { + error->message = "无效的参数类型"; error->filename = p->filename; + error->line = ptype->line; error->col = ptype->col; return NULL; + } + params[pcount++] = ast_make_parameter(p->arena, + arena_strdup_impl(p->arena, pname->start, pname->length), + token_to_type(ptype->kind), pname->line, pname->col); + if (match(p, TOK_COMMA)) continue; + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + + // 返回类型 + TypeKind ret = TYPE_VOID; + if (match(p, TOK_ARROW)) { + const Token* rt = advance(p); + if (!is_type_token(rt->kind)) { + error->message = "无效的返回类型"; error->filename = p->filename; + error->line = rt->line; error->col = rt->col; return NULL; + } + ret = token_to_type(rt->kind); + } + + AstNode* body = parse_block(p, error); + if (!body) return NULL; + + AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*)); + memcpy(parr, params, pcount * sizeof(AstNode*)); + return ast_make_function(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + parr, pcount, ret, body, fn_tok->line, fn_tok->col); +} + +AstNode* parse(Arena* a, const Token* tokens, size_t count, + const char* filename, ErrorInfo* error) { + Parser p = {.tokens = tokens, .count = count, .pos = 0, + .filename = filename, .arena = a}; + AstNode* functions[256]; int fn_count = 0; + while (peek(&p)->kind != TOK_EOF && !error->message) { + functions[fn_count++] = parse_function(&p, error); + } + if (error->message) return NULL; + AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*)); + memcpy(arr, functions, fn_count * sizeof(AstNode*)); + return ast_make_program(a, arr, fn_count, 0, 0); +} +``` + +- [ ] **Step 4: 编写解析器测试 `test/test_parser.c`** + +```c +#include "test_utils.h" +#include "parser.h" +#include "lexer.h" +#include "arena.h" + +static AstNode* parse_string(const char* src, ErrorInfo* error) { + Arena a = arena_create(1); + size_t tcount; + Token* tokens = lex(&a, src, "test", &tcount, error); + if (!tokens) { arena_destroy(&a); return NULL; } + AstNode* ast = parse(&a, tokens, tcount, "test", error); + // Note: arena 必须保持存活直到 AST 不再需要 + return ast; +} + +void test_simple_function() { + ErrorInfo error = {0}; + AstNode* ast = parse_string("fn main() { return 42; }", &error); + ASSERT(ast != NULL); + ASSERT(ast->kind == AST_PROGRAM); + ASSERT(ast->as.program.fn_count == 1); + AstNode* fn = ast->as.program.functions[0]; + ASSERT(fn->kind == AST_FUNCTION); +} + +void test_arithmetic_expr() { + ErrorInfo error = {0}; + AstNode* ast = parse_string("fn main() { return 1 + 2 * 3; }", &error); + ASSERT(ast != NULL); + AstNode* body = ast->as.program.functions[0]->as.function.body; + AstNode* ret = body->as.block.stmts[0]; + ASSERT(ret->kind == AST_RETURN_STMT); + AstNode* expr = ret->as.return_stmt.expr; + ASSERT(expr->kind == AST_BINARY_EXPR); + ASSERT(expr->as.binary.op == OP_ADD); +} + +void test_if_statement() { + ErrorInfo error = {0}; + AstNode* ast = parse_string("fn main() { if true { return 1; } else { return 0; } }", &error); + ASSERT(ast != NULL); +} + +int main(void) { + TEST_RUN(test_simple_function); + TEST_RUN(test_arithmetic_expr); + TEST_RUN(test_if_statement); + return test_summary(); +} +``` + +- [ ] **Step 5: 构建并运行解析测试** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +mingw32-make -j4 +./l_lang_test.exe +``` + +Expected: 词法 3 个 + 解析 3 个 = 6 个测试 PASS。 + +--- + +### Task 6: 语义分析 — 符号表 + +**Files:** +- Create: `src/sema/symbol.h`, `src/sema/symbol.c` + +- [ ] **Step 1: 编写 `src/sema/symbol.h`** + +```c +#ifndef SYMBOL_H +#define SYMBOL_H + +#include "l_lang.h" +#include "ast.h" + +typedef enum { SYM_VARIABLE, SYM_PARAMETER, SYM_FUNCTION } SymbolKind; + +typedef struct Symbol { + const char* name; + SymbolKind kind; + TypeKind type; // 变量/参数的类型 + // 函数特有 + TypeKind return_type; + TypeKind* param_types; + size_t param_count; + // 链表(同一作用域内的下一个符号) + struct Symbol* next; +} Symbol; + +typedef struct Scope { + Symbol* head; // 符号链表头 + struct Scope* parent; // 上级作用域 +} Scope; + +// 创建新作用域(子作用域) +Scope* scope_new(void* alloc, Scope* parent); + +// 在当前作用域及其父作用域中查找符号 +Symbol* scope_lookup(const Scope* scope, const char* name); + +// 在当前作用域中插入符号(重复插入返回 NULL) +Symbol* scope_insert(Scope* scope, void* alloc, const char* name, + SymbolKind kind, TypeKind type); + +// 插入函数符号 +Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, + TypeKind ret, TypeKind* pt, size_t pc); + +#endif +``` + +- [ ] **Step 2: 实现 `src/sema/symbol.c`** + +```c +#include "symbol.h" +#include + +Scope* scope_new(void* alloc, Scope* parent) { + Scope* s = (Scope*)arena_alloc_impl(alloc, sizeof(Scope)); + s->head = NULL; + s->parent = parent; + return s; +} + +Symbol* scope_lookup(const Scope* scope, const char* name) { + for (const Scope* s = scope; s; s = s->parent) { + for (Symbol* sym = s->head; sym; sym = sym->next) { + if (strcmp(sym->name, name) == 0) return sym; + } + } + return NULL; +} + +Symbol* scope_insert(Scope* scope, void* alloc, const char* name, + SymbolKind kind, TypeKind type) { + if (scope_lookup(scope, name)) return NULL; // 检查当前 scope 链 + Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + sym->name = name; sym->kind = kind; sym->type = type; + sym->next = scope->head; + scope->head = sym; + return sym; +} + +Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, + TypeKind ret, TypeKind* pt, size_t pc) { + if (scope_lookup(scope, name)) return NULL; + Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + sym->name = name; sym->kind = SYM_FUNCTION; sym->type = TYPE_VOID; + sym->return_type = ret; sym->param_types = pt; sym->param_count = pc; + sym->next = scope->head; + scope->head = sym; + return sym; +} +``` + +--- + +### Task 7: 语义分析 — 类型推断和检查 + +**Files:** +- Create: `src/sema/sema.h`, `src/sema/sema.c` + +- [ ] **Step 1: 编写 `src/sema/sema.h`** + +```c +#ifndef SEMA_H +#define SEMA_H + +#include "ast.h" +#include "error.h" +#include "symbol.h" + +// 对 AST 进行语义分析(类型推断 + 类型检查) +// 为每个节点填充 type 字段,错误收集到 errors 列表中。 +// 参数 arena 用于作用域分配。 +void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena); + +#endif +``` + +- [ ] **Step 2: 实现语义分析 `src/sema/sema.c`** + +```c +#include "sema.h" +#include + +// === 类型关系 === +static TypeKind promote(TypeKind a, TypeKind b) { + if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64; + if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64; + if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL; + return TYPE_ERROR; +} + +static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64; } +static bool is_comparable(TypeKind a, TypeKind b) { return a == b; } + +// === 向前声明 === +static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a); + +// === 检查单个节点 === +static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) { + switch (node->kind) { + case AST_LITERAL_EXPR: + // 类型已在创建时设置,无需处理 + break; + + case AST_IDENT_EXPR: { + Symbol* sym = scope_lookup(scope, node->as.ident.name); + if (!sym) { + error_add(errors, "", node->line, node->col, + "未定义的变量 '%s'", node->as.ident.name); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = sym->type; + } + break; + } + + case AST_UNARY_EXPR: { + analyze_expr(node->as.unary.operand, scope, errors, a); + TypeKind inner = node->as.unary.operand->type.kind; + if (node->as.unary.op == OP_NEG && !is_numeric(inner)) { + error_add(errors, "", node->line, node->col, + "一元 '-' 只能用于数值类型"); + node->type.kind = TYPE_ERROR; + } else if (node->as.unary.op == OP_NOT && inner != TYPE_BOOL) { + error_add(errors, "", node->line, node->col, + "'!' 只能用于布尔类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = inner; + } + break; + } + + case AST_BINARY_EXPR: { + analyze_expr(node->as.binary.left, scope, errors, a); + analyze_expr(node->as.binary.right, scope, errors, a); + TypeKind l = node->as.binary.left->type.kind; + TypeKind r = node->as.binary.right->type.kind; + + switch (node->as.binary.op) { + case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: + if (!is_numeric(l) || !is_numeric(r)) { + error_add(errors, "", node->line, node->col, + "算术运算需要数值类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = promote(l, r); + } + break; + case OP_EQ: case OP_NE: case OP_LT: case OP_GT: case OP_LE: case OP_GE: + if (!is_comparable(l, r)) { + error_add(errors, "", node->line, node->col, + "类型 '%s' 和 '%s' 无法比较", type_name(l), type_name(r)); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = TYPE_BOOL; + } + break; + case OP_AND: case OP_OR: + if (l != TYPE_BOOL || r != TYPE_BOOL) { + error_add(errors, "", node->line, node->col, + "逻辑运算需要布尔类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = TYPE_BOOL; + } + break; + default: break; + } + break; + } + + case AST_CALL_EXPR: { + Symbol* sym = scope_lookup(scope, node->as.call.name); + if (!sym || sym->kind != SYM_FUNCTION) { + error_add(errors, "", node->line, node->col, + "未定义的函数 '%s'", node->as.call.name); + node->type.kind = TYPE_ERROR; + break; + } + if (node->as.call.arg_count != sym->param_count) { + error_add(errors, "", node->line, node->col, + "函数 '%s' 需要 %zu 个参数,但提供了 %zu 个", + node->as.call.name, sym->param_count, node->as.call.arg_count); + node->type.kind = TYPE_ERROR; + break; + } + for (size_t i = 0; i < node->as.call.arg_count; i++) { + analyze_expr(node->as.call.args[i], scope, errors, a); + if (node->as.call.args[i]->type.kind != sym->param_types[i]) { + error_add(errors, "", node->line, node->col, + "参数 %zu 类型不匹配: 期望 '%s',得到 '%s'", + i + 1, type_name(sym->param_types[i]), + type_name(node->as.call.args[i]->type.kind)); + } + } + node->type.kind = sym->return_type; + break; + } + + default: break; + } +} + +static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) { + if (!node) return; + + switch (node->kind) { + case AST_PROGRAM: + // 第一遍:收集所有函数签名 + for (size_t i = 0; i < node->as.program.fn_count; i++) { + AstNode* fn = node->as.program.functions[i]; + TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind)); + for (size_t j = 0; j < fn->as.function.param_count; j++) { + pts[j] = fn->as.function.params[j]->as.parameter.type; + } + scope_insert_function(scope, a, fn->as.function.name, + fn->as.function.return_type, pts, + fn->as.function.param_count); + } + // 第二遍:分析每个函数体 + for (size_t i = 0; i < node->as.program.fn_count; i++) { + analyze_node(node->as.program.functions[i], scope, errors, a); + } + break; + + case AST_FUNCTION: { + Scope* fn_scope = scope_new(a, scope); + // 注册参数 + for (size_t i = 0; i < node->as.function.param_count; i++) { + AstNode* p = node->as.function.params[i]; + scope_insert(fn_scope, a, p->as.parameter.name, SYM_PARAMETER, p->as.parameter.type); + } + analyze_node(node->as.function.body, fn_scope, errors, a); + break; + } + + case AST_BLOCK: + for (size_t i = 0; i < node->as.block.stmt_count; i++) { + analyze_node(node->as.block.stmts[i], scope, errors, a); + } + break; + + case AST_LET_STMT: { + analyze_expr(node->as.let_stmt.init, scope, errors, a); + TypeKind inferred = node->as.let_stmt.init->type.kind; + node->type.kind = inferred; + if (!scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, inferred)) { + error_add(errors, "", node->line, node->col, + "变量 '%s' 重复定义", node->as.let_stmt.name); + } + break; + } + + case AST_IF_STMT: + analyze_expr(node->as.if_stmt.cond, scope, errors, a); + if (node->as.if_stmt.cond->type.kind != TYPE_BOOL) { + error_add(errors, "", node->line, node->col, "if 条件必须是布尔类型"); + } + analyze_node(node->as.if_stmt.then_block, scope, errors, a); + if (node->as.if_stmt.else_block) { + analyze_node(node->as.if_stmt.else_block, scope, errors, a); + } + break; + + case AST_WHILE_STMT: + analyze_expr(node->as.while_stmt.cond, scope, errors, a); + if (node->as.while_stmt.cond->type.kind != TYPE_BOOL) { + error_add(errors, "", node->line, node->col, "while 条件必须是布尔类型"); + } + analyze_node(node->as.while_stmt.body, scope, errors, a); + break; + + case AST_RETURN_STMT: + if (node->as.return_stmt.expr) { + analyze_expr(node->as.return_stmt.expr, scope, errors, a); + node->type.kind = node->as.return_stmt.expr->type.kind; + } + break; + + case AST_EXPR_STMT: + analyze_expr(node->as.expr_stmt.expr, scope, errors, a); + break; + + default: + analyze_expr(node, scope, errors, a); + break; + } +} + +void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) { + Scope* global = scope_new(arena, NULL); + + // 注册内置函数 + TypeKind params_i64[] = {TYPE_I64}; + scope_insert_function(global, arena, "print_i64", TYPE_VOID, params_i64, 1); + TypeKind params_f64[] = {TYPE_F64}; + scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1); + TypeKind params_bool[] = {TYPE_BOOL}; + scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1); + + analyze_node(ast, global, errors, arena); +} +``` + +- [ ] **Step 3: 编写语义分析测试 `test/test_sema.c`** + +```c +#include "test_utils.h" +#include "parser.h" +#include "lexer.h" +#include "sema.h" +#include "arena.h" + +void test_type_error() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, "fn main() { let x = 1; let y = x + true; return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count > 0); + arena_destroy(&a); +} + +void test_undefined_var() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, "fn main() { let x = y; return; }", "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count > 0); + arena_destroy(&a); +} + +int main(void) { + TEST_RUN(test_type_error); + TEST_RUN(test_undefined_var); + return test_summary(); +} +``` + +- [ ] **Step 4: 构建并运行语义测试** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +mingw32-make -j4 +./l_lang_test.exe +``` + +Expected: 词法 3 + 解析 3 + 语义 2 = 8 个测试 PASS。 + +--- + +### Task 8: LLVM 代码生成 — 基础设施和表达式 + +**Files:** +- Create: `src/codegen/codegen.h`, `src/codegen/codegen.c` + +- [ ] **Step 1: 编写 `src/codegen/codegen.h`** + +```c +#ifndef CODEGEN_H +#define CODEGEN_H + +#include "ast.h" + +// 生成 LLVM Module。模块已 verify,可直接 dump 或写入文件。 +// 出错时返回 NULL 并设置 *error_msg。 +LLVMModuleRef codegen_module(AstNode* ast, const char* module_name, + const char** error_msg); + +#endif +``` + +- [ ] **Step 2: 实现代码生成 `src/codegen/codegen.c`** + +```c +#include "codegen.h" +#include +#include +#include +#include +#include + +// === 内部状态 === +typedef struct { + LLVMModuleRef module; + LLVMBuilderRef builder; + // 符号表:变量名 → alloca 地址 + struct VarEntry { + const char* name; + LLVMValueRef alloca; + struct VarEntry* next; + } *var_table; + const char* error; + // 内置函数声明 + LLVMValueRef fn_print_i64; + LLVMValueRef fn_print_f64; + LLVMValueRef fn_print_bool; + // 已声明的所有 L 函数(名称→LLVMValue) + struct FnEntry { + const char* name; + LLVMValueRef fn; + TypeKind ret; + TypeKind* params; + size_t pc; + struct FnEntry* next; + } *fn_table; +} CgCtx; + +// === 类型映射 === +static LLVMTypeRef to_llvm_type(TypeKind kind) { + switch (kind) { + case TYPE_I64: return LLVMInt64Type(); + case TYPE_F64: return LLVMDoubleType(); + case TYPE_BOOL: return LLVMInt1Type(); + default: return LLVMVoidType(); + } +} + +static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) { + switch (lit->as.literal.lit_type) { + case TYPE_I64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true); + case TYPE_F64: return LLVMConstReal(ty, lit->as.literal.f64_val); + case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false); + default: return NULL; + } +} + +// === 变量表 === +static LLVMValueRef find_var(CgCtx* ctx, const char* name) { + for (struct VarEntry* e = ctx->var_table; e; e = e->next) + if (strcmp(e->name, name) == 0) return e->alloca; + return NULL; +} + +static void add_var(CgCtx* ctx, const char* name, LLVMValueRef alloca) { + struct VarEntry* e = malloc(sizeof(*e)); + e->name = name; e->alloca = alloca; e->next = ctx->var_table; + ctx->var_table = e; +} + +// === 函数表 === +static LLVMValueRef find_fn(CgCtx* ctx, const char* name) { + for (struct FnEntry* e = ctx->fn_table; e; e = e->next) + if (strcmp(e->name, name) == 0) return e->fn; + return NULL; +} + +// === 向前声明 === +static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node); +static void codegen_stmt(CgCtx* ctx, AstNode* node); + +// === 注册内置函数 === +static void register_builtins(CgCtx* ctx) { + // print_i64 + LLVMTypeRef pi64_args[] = {LLVMInt64Type()}; + LLVMTypeRef pi64_ty = LLVMFunctionType(LLVMVoidType(), pi64_args, 1, false); + LLVMValueRef pi64 = LLVMAddFunction(ctx->module, "__builtin_print_i64", pi64_ty); + + // print_f64 + LLVMTypeRef pf64_args[] = {LLVMDoubleType()}; + LLVMTypeRef pf64_ty = LLVMFunctionType(LLVMVoidType(), pf64_args, 1, false); + LLVMValueRef pf64 = LLVMAddFunction(ctx->module, "__builtin_print_f64", pf64_ty); + + // print_bool + LLVMTypeRef pb_args[] = {LLVMInt1Type()}; + LLVMTypeRef pb_ty = LLVMFunctionType(LLVMVoidType(), pb_args, 1, false); + LLVMValueRef pb = LLVMAddFunction(ctx->module, "__builtin_print_bool", pb_ty); + + ctx->fn_print_i64 = pi64; + ctx->fn_print_f64 = pf64; + ctx->fn_print_bool = pb; +} + +// === 表达式代码生成 === +static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { + switch (node->kind) { + case AST_LITERAL_EXPR: + return to_llvm_const(to_llvm_type(node->type.kind), node); + + case AST_IDENT_EXPR: { + LLVMValueRef ptr = find_var(ctx, node->as.ident.name); + return LLVMBuildLoad2(ctx->builder, to_llvm_type(node->type.kind), ptr, "load"); + } + + case AST_UNARY_EXPR: { + LLVMValueRef operand = codegen_expr(ctx, node->as.unary.operand); + if (node->as.unary.op == OP_NEG) { + if (node->type.kind == TYPE_F64) + return LLVMBuildFNeg(ctx->builder, operand, "fneg"); + else + return LLVMBuildNeg(ctx->builder, operand, "ineg"); + } else { + return LLVMBuildNot(ctx->builder, operand, "not"); + } + } + + case AST_BINARY_EXPR: { + LLVMValueRef l = codegen_expr(ctx, node->as.binary.left); + LLVMValueRef r = codegen_expr(ctx, node->as.binary.right); + bool is_float = node->type.kind == TYPE_F64; + #define B(op_name, iop, fop) \ + if (is_float) return LLVMBuild##fop(ctx->builder, l, r, op_name); \ + else return LLVMBuild##iop(ctx->builder, l, r, op_name) + + switch (node->as.binary.op) { + case OP_ADD: B("add", Add, FAdd); + case OP_SUB: B("sub", Sub, FSub); + case OP_MUL: B("mul", Mul, FMul); + case OP_DIV: + if (is_float) return LLVMBuildFDiv(ctx->builder, l, r, "fdiv"); + else return LLVMBuildSDiv(ctx->builder, l, r, "sdiv"); + case OP_MOD: + return LLVMBuildSRem(ctx->builder, l, r, "srem"); + case OP_EQ: B("eq", ICmp, FCmp); + case OP_NE: B("ne", ICmp, FCmp); + case OP_LT: B("lt", ICmp, FCmp); + case OP_GT: B("gt", ICmp, FCmp); + case OP_LE: B("le", ICmp, FCmp); + case OP_GE: B("ge", ICmp, FCmp); + case OP_AND: + return LLVMBuildAnd(ctx->builder, l, r, "and"); + case OP_OR: + return LLVMBuildOr(ctx->builder, l, r, "or"); + default: return NULL; + } + #undef B + } + + case AST_CALL_EXPR: { + LLVMValueRef fn = find_fn(ctx, node->as.call.name); + LLVMValueRef args[32]; + for (size_t i = 0; i < node->as.call.arg_count; i++) { + args[i] = codegen_expr(ctx, node->as.call.args[i]); + } + return LLVMBuildCall2(ctx->builder, + LLVMGetReturnType(LLVMTypeOf(fn)) == LLVMVoidType() + ? LLVMVoidType() : LLVMGlobalGetValueType(fn), + fn, args, (unsigned)node->as.call.arg_count, "call"); + } + + default: return NULL; + } +} + +// === 语句代码生成 === +static void codegen_stmt(CgCtx* ctx, AstNode* node) { + switch (node->kind) { + case AST_LET_STMT: { + LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init); + LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder, + to_llvm_type(node->type.kind), node->as.let_stmt.name); + LLVMBuildStore(ctx->builder, init_val, alloca); + add_var(ctx, node->as.let_stmt.name, alloca); + break; + } + + case AST_EXPR_STMT: + codegen_expr(ctx, node->as.expr_stmt.expr); + break; + + case AST_RETURN_STMT: + if (node->as.return_stmt.expr) { + LLVMValueRef val = codegen_expr(ctx, node->as.return_stmt.expr); + LLVMBuildRet(ctx->builder, val); + } else { + LLVMBuildRetVoid(ctx->builder); + } + break; + + case AST_BLOCK: + for (size_t i = 0; i < node->as.block.stmt_count; i++) { + codegen_stmt(ctx, node->as.block.stmts[i]); + } + break; + + case AST_IF_STMT: { + LLVMValueRef cond = codegen_expr(ctx, node->as.if_stmt.cond); + LLVMBasicBlockRef then_bb = LLVMAppendBasicBlock( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "then"); + LLVMBasicBlockRef else_bb = node->as.if_stmt.else_block + ? LLVMAppendBasicBlock(LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "else") + : NULL; + LLVMBasicBlockRef merge_bb = LLVMAppendBasicBlock( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "if_merge"); + + if (else_bb) + LLVMBuildCondBr(ctx->builder, cond, then_bb, else_bb); + else + LLVMBuildCondBr(ctx->builder, cond, then_bb, merge_bb); + + LLVMPositionBuilderAtEnd(ctx->builder, then_bb); + codegen_stmt(ctx, node->as.if_stmt.then_block); + LLVMBuildBr(ctx->builder, merge_bb); + + if (else_bb) { + LLVMPositionBuilderAtEnd(ctx->builder, else_bb); + codegen_stmt(ctx, node->as.if_stmt.else_block); + LLVMBuildBr(ctx->builder, merge_bb); + } + + LLVMPositionBuilderAtEnd(ctx->builder, merge_bb); + break; + } + + case AST_WHILE_STMT: { + LLVMBasicBlockRef cond_bb = LLVMAppendBasicBlock( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "while_cond"); + LLVMBasicBlockRef body_bb = LLVMAppendBasicBlock( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "while_body"); + LLVMBasicBlockRef exit_bb = LLVMAppendBasicBlock( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "while_exit"); + + LLVMBuildBr(ctx->builder, cond_bb); + LLVMPositionBuilderAtEnd(ctx->builder, cond_bb); + LLVMValueRef cond = codegen_expr(ctx, node->as.while_stmt.cond); + LLVMBuildCondBr(ctx->builder, cond, body_bb, exit_bb); + + LLVMPositionBuilderAtEnd(ctx->builder, body_bb); + codegen_stmt(ctx, node->as.while_stmt.body); + LLVMBuildBr(ctx->builder, cond_bb); + + LLVMPositionBuilderAtEnd(ctx->builder, exit_bb); + break; + } + + default: break; + } +} + +// === 程序级代码生成 === +LLVMModuleRef codegen_module(AstNode* ast, const char* name, const char** error_msg) { + CgCtx ctx = {0}; + ctx.module = LLVMModuleCreateWithName(name); + ctx.builder = LLVMCreateBuilder(); + + register_builtins(&ctx); + + // 第一遍:声明所有 L 函数 + for (size_t i = 0; i < ast->as.program.fn_count; i++) { + AstNode* fn = ast->as.program.functions[i]; + LLVMTypeRef* ptypes = malloc(fn->as.function.param_count * sizeof(LLVMTypeRef)); + for (size_t j = 0; j < fn->as.function.param_count; j++) + ptypes[j] = to_llvm_type(fn->as.function.params[j]->as.parameter.type); + LLVMTypeRef fty = LLVMFunctionType(to_llvm_type(fn->as.function.return_type), + ptypes, (unsigned)fn->as.function.param_count, false); + LLVMValueRef lfn = LLVMAddFunction(ctx.module, fn->as.function.name, fty); + struct FnEntry* entry = malloc(sizeof(*entry)); + entry->name = fn->as.function.name; entry->fn = lfn; + entry->ret = fn->as.function.return_type; + entry->next = ctx.fn_table; + ctx.fn_table = entry; + free(ptypes); + } + + // 第二遍:生成各函数体 + for (size_t i = 0; i < ast->as.program.fn_count; i++) { + AstNode* fn = ast->as.program.functions[i]; + LLVMValueRef lfn = find_fn(&ctx, fn->as.function.name); + LLVMBasicBlockRef entry = LLVMAppendBasicBlock(lfn, "entry"); + LLVMPositionBuilderAtEnd(ctx.builder, entry); + + // 清空变量表 + ctx.var_table = NULL; + + // 注册参数变量 + for (size_t j = 0; j < fn->as.function.param_count; j++) { + LLVMValueRef param = LLVMGetParam(lfn, (unsigned)j); + LLVMValueRef alloca = LLVMBuildAlloca(ctx.builder, + to_llvm_type(fn->as.function.params[j]->as.parameter.type), + fn->as.function.params[j]->as.parameter.name); + LLVMBuildStore(ctx.builder, param, alloca); + add_var(&ctx, fn->as.function.params[j]->as.parameter.name, alloca); + } + + codegen_stmt(&ctx, fn->as.function.body); + } + + // 验证模块 + char* verify_err = NULL; + LLVMVerifyModule(ctx.module, LLVMPrintMessageAction, &verify_err); + if (verify_err) { + *error_msg = verify_err; + LLVMDisposeBuilder(ctx.builder); + LLVMDisposeModule(ctx.module); + return NULL; + } + + LLVMDisposeBuilder(ctx.builder); + return ctx.module; +} +``` + +- [ ] **Step 3: 更新 CMakeLists.txt 以链接 LLVM 所有必要库** + +- [ ] **Step 4: 更新 CMakeLists.txt 以链接 LLVM 所有必要库** + +```cmake +# 替换原先的 LLVM 链接为: +llvm_map_components_to_libnames(LLVM_LIBS core analysis native) +target_link_libraries(l_lang ${LLVM_LIBS}) +target_link_libraries(l_lang_test ${LLVM_LIBS}) +``` + +- [ ] **Step 5: 构建验证** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM" +mingw32-make -j4 +``` + +Expected: 编译通过。 + +--- + +### Task 9: 驱动程序 — 入口和命令行 + +**Files:** +- Create: `src/driver/main.c` + +- [ ] **Step 1: 编写 `src/driver/main.c`** + +```c +#include "l_lang.h" +#include "lexer.h" +#include "parser.h" +#include "sema.h" +#include "codegen.h" +#include "error.h" +#include "arena.h" + +#include +#include +#include +#include +#include +#include + +// 读取整个文件到内存 +static char* read_file(const char* path, size_t* size) { + FILE* f = fopen(path, "rb"); + if (!f) { fprintf(stderr, "无法打开文件: %s\n", path); return NULL; } + fseek(f, 0, SEEK_END); + *size = ftell(f); + fseek(f, 0, SEEK_SET); + char* buf = malloc(*size + 1); + fread(buf, 1, *size, f); + buf[*size] = '\0'; + fclose(f); + return buf; +} + +// 写入字符串到文件 +static bool write_file(const char* path, const char* data) { + FILE* f = fopen(path, "w"); + if (!f) return false; + fputs(data, f); + fclose(f); + return true; +} + +int main(int argc, char** argv) { + const char* input = NULL; + const char* output = "a.exe"; + bool emit_ir = false; + + // 解析命令行参数 + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--emit-ir") == 0) { emit_ir = true; } + else if (strcmp(argv[i], "-o") == 0 && i + 1 < argc) { output = argv[++i]; } + else if (argv[i][0] != '-') { input = argv[i]; } + } + + if (!input) { + fprintf(stderr, "用法: l_lang <文件.l> [-o <输出>] [--emit-ir]\n"); + return 1; + } + + // 1. 读取源文件 + size_t src_size; + char* source = read_file(input, &src_size); + if (!source) return 1; + + // 2. 初始化 + Arena arena = arena_create(8); // 8 MB + ErrorInfo error = {0}; + ErrorList error_list; error_init(&error_list); + + // 3. 词法分析 + size_t token_count; + Token* tokens = lex(&arena, source, input, &token_count, &error); + if (!tokens) { + fprintf(stderr, "词法错误: %s:%d:%d: %s\n", + error.filename, error.line, error.col, error.message); + free(source); arena_destroy(&arena); + return 1; + } + + // 4. 语法分析 + AstNode* ast = parse(&arena, tokens, token_count, input, &error); + if (!ast) { + fprintf(stderr, "语法错误: %s:%d:%d: %s\n", + error.filename, error.line, error.col, error.message); + free(source); arena_destroy(&arena); + return 1; + } + + // 5. 语义分析 + sema_analyze(ast, &error_list, &arena); + if (error_list.count > 0) { + error_print(&error_list); + free(source); arena_destroy(&arena); + return 1; + } + + // 6. LLVM IR 生成 + const char* codegen_error = NULL; + LLVMModuleRef module = codegen_module(ast, "l_module", &codegen_error); + if (!module) { + fprintf(stderr, "IR 生成错误: %s\n", codegen_error); + free(source); arena_destroy(&arena); + return 1; + } + + if (emit_ir) { + // 输出 LLVM IR 文本 + char* ir = LLVMPrintModuleToString(module); + char ir_path[512]; + snprintf(ir_path, sizeof(ir_path), "%s.ll", input); + write_file(ir_path, ir); + printf("IR 已输出到: %s\n", ir_path); + LLVMDisposeMessage(ir); + } else { + // 生成目标文件和可执行文件 + LLVMInitializeNativeTarget(); + LLVMInitializeNativeAsmPrinter(); + + char* triple = LLVMGetDefaultTargetTriple(); + LLVMTargetRef target; + char* target_error = NULL; + if (LLVMGetTargetFromTriple(triple, &target, &target_error)) { + fprintf(stderr, "目标平台错误: %s\n", target_error); + LLVMDisposeMessage(target_error); LLVMDisposeMessage(triple); + free(source); arena_destroy(&arena); LLVMDisposeModule(module); + return 1; + } + + LLVMTargetMachineRef tm = LLVMCreateTargetMachine( + target, triple, "generic", "", + LLVMCodeGenLevelDefault, LLVMRelocDefault, + LLVMCodeModelDefault); + LLVMDisposeMessage(triple); + + // 输出目标文件 + char obj_path[512]; + snprintf(obj_path, sizeof(obj_path), "%s.o", input); + char* obj_error = NULL; + if (LLVMTargetMachineEmitToFile(tm, module, obj_path, + LLVMObjectFile, &obj_error)) { + fprintf(stderr, "目标代码生成错误: %s\n", obj_error); + LLVMDisposeMessage(obj_error); + free(source); arena_destroy(&arena); + LLVMDisposeTargetMachine(tm); LLVMDisposeModule(module); + return 1; + } + + // 调用 clang 链接 + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "\"D:\\settings\\Language\\LLVM\\bin\\clang.exe\" \"%s\" -o \"%s\" -fuse-ld=lld", + obj_path, output); + int ret = system(cmd); + if (ret != 0) { + fprintf(stderr, "链接失败 (exit code %d)\n", ret); + } else { + printf("编译成功: %s\n", output); + } + LLVMDisposeTargetMachine(tm); + } + + // 清理 + LLVMDisposeModule(module); + free(source); + arena_destroy(&arena); + return 0; +} +``` + +- [ ] **Step 2: 编写第一个测试程序 `test/programs/01_arithmetic.l`** + +```rust +fn main() -> i64 { + let x = 1 + 2 * 3; + print_i64(x); + return 0; +} +``` + +- [ ] **Step 3: 构建并端到端测试** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +mingw32-make -j4 +./l_lang.exe ../test/programs/01_arithmetic.l -o test_out.exe +./test_out.exe +``` + +Expected: 输出 `7`。 + +--- + +### Task 10: 集成测试 — 全部语言特性 + +**Files:** +- Create: `test/programs/02_if_else.l` +- Create: `test/programs/03_while.l` +- Create: `test/programs/04_fib_recursive.l` +- Create: `test/programs/05_fib_iterative.l` + +- [ ] **Step 1: `test/programs/02_if_else.l` — if/else 控制流** + +```rust +fn main() -> i64 { + let x = 10; + if x > 5 { + print_i64(1); + } else { + print_i64(0); + } + return 0; +} +``` + +Expected output: `1` + +- [ ] **Step 2: `test/programs/03_while.l` — while 循环 + 比较运算** + +```rust +fn countdown(n: i64) -> i64 { + let remaining = n; + if remaining > 0 { + print_i64(remaining); + return countdown(remaining - 1); + } + return 0; +} + +fn main() -> i64 { + print_i64(100); + return countdown(5); +} +``` + +Expected output: `100` `5` `4` `3` `2` `1`(每行一个,测试函数递归 + if/else) + +- [ ] **Step 3: `test/programs/04_fib_recursive.l` — 斐波那契递归** + +```rust +fn fib(n: i64) -> i64 { + if n < 2 { + return n; + } + return fib(n - 1) + fib(n - 2); +} + +fn main() -> i64 { + let result = fib(10); + print_i64(result); + return 0; +} +``` + +Expected output: `55` + +- [ ] **Step 4: `test/programs/05_float.l` — 浮点运算 + 多函数** + +```rust +fn square(x: f64) -> f64 { + return x * x; +} + +fn add_floats(a: f64, b: f64) -> f64 { + return a + b; +} + +fn main() -> i64 { + let s = square(3.0); + let sum = add_floats(s, 4.0); + print_f64(sum); + return 0; +} +``` + +Expected output: `13.000000`(3.0² + 4.0 = 13.0,测试浮点运算 + 多函数调用) + +- [ ] **Step 5: 批量运行所有集成测试** + +```bash +cd "D:\Code\doing_exercises\programs\L Language" +for f in test/programs/01_arithmetic.l test/programs/02_if_else.l \ + test/programs/03_while.l test/programs/04_fib_recursive.l \ + test/programs/05_float.l; do + echo "=== $f ===" + ./build/l_lang.exe "$f" -o "./build/test_out.exe" 2>&1 && ./build/test_out.exe 2>&1 + echo "" +done +``` + +Expected: 5 个程序全部编译运行,输出正确——四则运算 `7`,if/else `1`,递归 `5 4 3 2 1`,斐波那契 `55`,浮点 `13.000000`。 + +--- + +### Task 11: 完成和文档 + +**Files:** +- Create: `README.md` + +- [ ] **Step 1: 编写 `README.md`** + +```markdown +# L Language + +一门用 C 语言实现的编译型编程语言,静态类型 + 类型推断,Rust 风格语法。 + +## 构建 + +```bash +mkdir build && cd build +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM" +mingw32-make -j4 +``` + +## 使用 + +```bash +./l_lang.exe <源文件.l> [-o <输出.exe>] [--emit-ir] +``` + +## 语言特性 (v0.1) + +- 类型: `i64`, `f64`, `bool`, `void` +- 控制流: `if/else`, `while` +- 函数: 递归 + 多参数 +- 变量: `let` 不可变声明,类型推断 +- 内置函数: `print_i64`, `print_f64`, `print_bool` + +## 示例 + +```rust +fn fib(n: i64) -> i64 { + if n < 2 { return n; } + return fib(n - 1) + fib(n - 2); +} + +fn main() -> i64 { + print_i64(fib(10)); + return 0; +} +``` +``` + +- [ ] **Step 2: 最终验证** + +```bash +cd "D:\Code\doing_exercises\programs\L Language\build" +# 清理重建 +rm -rf * +cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM" +mingw32-make -j4 +# 运行单元测试 +./l_lang_test.exe +# 运行集成测试 +./l_lang.exe ../test/programs/04_fib_recursive.l -o fib.exe && ./fib.exe +``` + +Expected: +- `l_lang_test.exe` 8 个测试全部 PASS +- `fib.exe` 输出 `55` +- 5 个集成测试全部通过:算术 ✓ | if/else ✓ | 递归 ✓ | 斐波那契 ✓ | 浮点多函数 ✓ + +--- + +## 依赖关系图 + +``` +Task 1 (CMake + 骨架) + └─ Task 2 (Token 数据结构) + └─ Task 3 (词法分析器) + └─ Task 4 (AST 数据结构) + ├─ Task 5 (解析器) + │ ├─ Task 6 (符号表) + │ └─ Task 7 (语义分析) + ├─ Task 8 (代码生成) + └─ Task 9 (驱动) + └─ Task 10 (集成测试) + └─ Task 11 (文档) +``` + +Tasks 1-4 严格顺序依赖。Task 5 和 Task 6 可并行(Task 5 依赖 4,Task 6 独立)。Task 7 依赖 5+6。Task 8 依赖 Task 4(可独立于 5/6/7 先用硬编码 AST 测试 LLVM API)。Task 9 依赖全部前置任务。 + +## 积压问题(已知,v0.2 解决) + +1. **变量不可重新赋值** — v0.1 的 `let` 变量是不可变的,无法做 `x = x + 1` 这类赋值。因此 while 循环体无法修改循环变量。迭代算法需要改用递归实现。 +2. **作用域泄漏** — 变量表使用链表实现,离开作用域后未清理。 diff --git a/include/l_lang.h b/include/l_lang.h new file mode 100644 index 0000000..6cbc52b --- /dev/null +++ b/include/l_lang.h @@ -0,0 +1,38 @@ +#ifndef L_LANG_H +#define L_LANG_H + +#include +#include +#include + +// === 类型系统 === +typedef enum { + TYPE_I64, + TYPE_F64, + TYPE_BOOL, + TYPE_VOID, + TYPE_UNKNOWN, // 尚未推断 + TYPE_ERROR, // 类型错误 +} TypeKind; + +static inline const char* type_name(TypeKind kind) { + switch (kind) { + case TYPE_I64: return "i64"; + case TYPE_F64: return "f64"; + case TYPE_BOOL: return "bool"; + case TYPE_VOID: return "void"; + default: return ""; + } +} + +// === 向前声明 === +typedef struct Token Token; +typedef struct AstNode AstNode; +typedef struct Scope Scope; +typedef struct Arena Arena; + +// === 跨模块分配器接口(避免循环依赖,各模块通过 void* 使用 arena)=== +void* arena_alloc_impl(void* alloc, size_t size); +char* arena_strdup_impl(void* alloc, const char* src, size_t len); + +#endif diff --git a/src/ast/ast.c b/src/ast/ast.c new file mode 100644 index 0000000..e7a998a --- /dev/null +++ b/src/ast/ast.c @@ -0,0 +1,113 @@ +#include "ast.h" +#include + +// 使用宏简化节点创建 +#define NEW(alloc, k) \ + AstNode* n = (AstNode*)arena_alloc_impl(alloc, sizeof(AstNode)); \ + n->kind = (k); n->type.kind = TYPE_UNKNOWN; \ + n->line = line; n->col = col + +AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col) { + NEW(alloc, AST_PROGRAM); + n->as.program.functions = fns; + n->as.program.fn_count = count; + return n; +} + +AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount, + TypeKind ret, AstNode* body, int line, int col) { + NEW(alloc, AST_FUNCTION); + n->as.function.name = name; n->as.function.params = params; + n->as.function.param_count = pcount; n->as.function.return_type = ret; + n->as.function.body = body; + return n; +} + +AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col) { + NEW(alloc, AST_PARAMETER); + n->as.parameter.name = name; n->as.parameter.type = type; + return n; +} + +AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col) { + NEW(alloc, AST_BLOCK); + n->as.block.stmts = stmts; n->as.block.stmt_count = count; + return n; +} + +AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, AstNode* init, int line, int col) { + NEW(alloc, AST_LET_STMT); + n->as.let_stmt.name = name; n->as.let_stmt.annot_type = annot_type; + n->as.let_stmt.has_type_annot = has_type_annot; n->as.let_stmt.init = init; + return n; +} + +AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col) { + NEW(alloc, AST_IF_STMT); + n->as.if_stmt.cond = cond; n->as.if_stmt.then_block = then_b; + n->as.if_stmt.else_block = else_b; + return n; +} + +AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col) { + NEW(alloc, AST_WHILE_STMT); + n->as.while_stmt.cond = cond; n->as.while_stmt.body = body; + return n; +} + +AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col) { + NEW(alloc, AST_RETURN_STMT); + n->as.return_stmt.expr = expr; + return n; +} + +AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col) { + NEW(alloc, AST_EXPR_STMT); + n->as.expr_stmt.expr = expr; + return n; +} + +AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col) { + NEW(alloc, AST_BINARY_EXPR); + n->as.binary.op = op; n->as.binary.left = left; n->as.binary.right = right; + return n; +} + +AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col) { + NEW(alloc, AST_UNARY_EXPR); + n->as.unary.op = op; n->as.unary.operand = operand; + return n; +} + +AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col) { + NEW(alloc, AST_CALL_EXPR); + n->as.call.name = name; n->as.call.args = args; n->as.call.arg_count = count; + return n; +} + +AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_I64; n->as.literal.i64_val = val; + n->type.kind = TYPE_I64; + return n; +} + +AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_F64; n->as.literal.f64_val = val; + n->type.kind = TYPE_F64; + return n; +} + +AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_BOOL; n->as.literal.bool_val = val; + n->type.kind = TYPE_BOOL; + return n; +} + +AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) { + NEW(alloc, AST_IDENT_EXPR); + n->as.ident.name = name; + return n; +} diff --git a/src/ast/ast.h b/src/ast/ast.h new file mode 100644 index 0000000..fe1dc43 --- /dev/null +++ b/src/ast/ast.h @@ -0,0 +1,96 @@ +#ifndef AST_H +#define AST_H + +#include "l_lang.h" +#include + +typedef enum { + AST_PROGRAM, + AST_FUNCTION, + AST_PARAMETER, + AST_BLOCK, + AST_LET_STMT, + AST_IF_STMT, + AST_WHILE_STMT, + AST_RETURN_STMT, + AST_EXPR_STMT, + AST_BINARY_EXPR, + AST_UNARY_EXPR, + AST_CALL_EXPR, + AST_LITERAL_EXPR, + AST_IDENT_EXPR, +} AstKind; + +typedef enum { + OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_EQ, OP_NE, OP_LT, OP_GT, OP_LE, OP_GE, + OP_AND, OP_OR, + OP_NEG, OP_NOT, +} BinaryOp; + +// 类型信息(语义分析阶段填充) +typedef struct { + TypeKind kind; +} TypeInfo; + +// AST 节点 +struct AstNode { + AstKind kind; + TypeInfo type; // 语义分析后填充,默认为 TYPE_UNKNOWN + int line; // 源文件行号 + int col; // 源文件列号 + + // 节点特有数据(按 kind 解释) + union { + // AST_PROGRAM + struct { struct AstNode** functions; size_t fn_count; } program; + // AST_FUNCTION + struct { const char* name; struct AstNode** params; size_t param_count; + TypeKind return_type; struct AstNode* body; } function; + // AST_PARAMETER + struct { const char* name; TypeKind type; } parameter; + // AST_BLOCK + struct { struct AstNode** stmts; size_t stmt_count; } block; + // AST_LET_STMT + struct { const char* name; TypeKind annot_type; bool has_type_annot; struct AstNode* init; } let_stmt; + // AST_IF_STMT + struct { struct AstNode* cond; struct AstNode* then_block; struct AstNode* else_block; } if_stmt; + // AST_WHILE_STMT + struct { struct AstNode* cond; struct AstNode* body; } while_stmt; + // AST_RETURN_STMT + struct { struct AstNode* expr; } return_stmt; + // AST_EXPR_STMT + struct { struct AstNode* expr; } expr_stmt; + // AST_BINARY_EXPR + struct { BinaryOp op; struct AstNode* left; struct AstNode* right; } binary; + // AST_UNARY_EXPR + struct { BinaryOp op; struct AstNode* operand; } unary; + // AST_CALL_EXPR + struct { const char* name; struct AstNode** args; size_t arg_count; } call; + // AST_LITERAL_EXPR + struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; }; } literal; + // AST_IDENT_EXPR + struct { const char* name; } ident; + } as; +}; + +// 创建节点的辅助函数(内存来自 arena,通过 void* 传递避免循环依赖) +AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col); +AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount, + TypeKind ret, AstNode* body, int line, int col); +AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col); +AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col); +AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, AstNode* init, int line, int col); +AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col); +AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col); +AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col); +AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col); +AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col); +AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col); +AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col); +AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col); +AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col); +AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col); +AstNode* ast_make_ident(void* alloc, const char* name, int line, int col); + +#endif diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c new file mode 100644 index 0000000..6ede560 --- /dev/null +++ b/src/codegen/codegen.c @@ -0,0 +1,383 @@ +#include "codegen.h" +#include +#include +#include +#include +#include + +// === 内部状态 === +typedef struct VarEntry { + const char* name; + LLVMValueRef alloca; + struct VarEntry* next; +} VarEntry; + +typedef struct FnEntry { + const char* name; + LLVMValueRef fn; + TypeKind ret; + TypeKind* params; + size_t pc; + struct FnEntry* next; +} FnEntry; + +typedef struct { + LLVMContextRef context; // LLVM 19+ 需要显式 Context + LLVMModuleRef module; + LLVMBuilderRef builder; + VarEntry* var_table; + const char* error; + FnEntry* fn_table; + // printf 运行时支持(内置 print 函数委托给 printf) + LLVMValueRef printf_fn; + LLVMTypeRef printf_ty; +} CgCtx; + +// === 类型映射(需要 Context)=== +static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) { + switch (kind) { + case TYPE_I64: return LLVMInt64TypeInContext(ctx->context); + case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context); + case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context); + default: return LLVMVoidTypeInContext(ctx->context); + } +} + +static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) { + switch (lit->as.literal.lit_type) { + case TYPE_I64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true); + case TYPE_F64: return LLVMConstReal(ty, lit->as.literal.f64_val); + case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false); + default: return NULL; + } +} + +// === 变量表 === +static LLVMValueRef find_var(CgCtx* ctx, const char* name) { + for (VarEntry* e = ctx->var_table; e; e = e->next) + if (strcmp(e->name, name) == 0) return e->alloca; + return NULL; +} + +static void add_var(CgCtx* ctx, const char* name, LLVMValueRef alloca) { + VarEntry* e = malloc(sizeof(*e)); + e->name = name; e->alloca = alloca; e->next = ctx->var_table; + ctx->var_table = e; +} + +// === 函数表 === +static LLVMValueRef find_fn(CgCtx* ctx, const char* name) { + for (FnEntry* e = ctx->fn_table; e; e = e->next) + if (strcmp(e->name, name) == 0) return e->fn; + return NULL; +} + +static void add_fn(CgCtx* ctx, const char* name, LLVMValueRef fn) { + FnEntry* e = malloc(sizeof(*e)); + e->name = name; e->fn = fn; + e->ret = TYPE_VOID; + e->params = NULL; + e->pc = 0; + e->next = ctx->fn_table; + ctx->fn_table = e; +} + +// === 向前声明 === +static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node); +static void codegen_stmt(CgCtx* ctx, AstNode* node); + +// === 表达式代码生成 === +static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { + if (!node) return NULL; + + switch (node->kind) { + case AST_LITERAL_EXPR: + return to_llvm_const(to_llvm_type(ctx, node->type.kind), node); + + case AST_IDENT_EXPR: { + LLVMValueRef ptr = find_var(ctx, node->as.ident.name); + if (!ptr) return NULL; + return LLVMBuildLoad2(ctx->builder, to_llvm_type(ctx, node->type.kind), ptr, "load"); + } + + case AST_UNARY_EXPR: { + LLVMValueRef operand = codegen_expr(ctx, node->as.unary.operand); + if (!operand) return NULL; + if (node->as.unary.op == OP_NEG) { + if (node->type.kind == TYPE_F64) + return LLVMBuildFNeg(ctx->builder, operand, "fneg"); + else + return LLVMBuildNeg(ctx->builder, operand, "ineg"); + } else { + return LLVMBuildNot(ctx->builder, operand, "not"); + } + } + + case AST_BINARY_EXPR: { + LLVMValueRef l = codegen_expr(ctx, node->as.binary.left); + LLVMValueRef r = codegen_expr(ctx, node->as.binary.right); + if (!l || !r) return NULL; + bool is_float = (node->type.kind == TYPE_F64); + + switch (node->as.binary.op) { + case OP_ADD: + return is_float ? LLVMBuildFAdd(ctx->builder, l, r, "fadd") + : LLVMBuildAdd(ctx->builder, l, r, "iadd"); + case OP_SUB: + return is_float ? LLVMBuildFSub(ctx->builder, l, r, "fsub") + : LLVMBuildSub(ctx->builder, l, r, "isub"); + case OP_MUL: + return is_float ? LLVMBuildFMul(ctx->builder, l, r, "fmul") + : LLVMBuildMul(ctx->builder, l, r, "imul"); + case OP_DIV: + return is_float ? LLVMBuildFDiv(ctx->builder, l, r, "fdiv") + : LLVMBuildSDiv(ctx->builder, l, r, "sdiv"); + case OP_MOD: + return LLVMBuildSRem(ctx->builder, l, r, "srem"); + case OP_EQ: + return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOEQ, l, r, "feq") + : LLVMBuildICmp(ctx->builder, LLVMIntEQ, l, r, "ieq"); + case OP_NE: + return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealONE, l, r, "fne") + : LLVMBuildICmp(ctx->builder, LLVMIntNE, l, r, "ine"); + case OP_LT: + return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOLT, l, r, "flt") + : LLVMBuildICmp(ctx->builder, LLVMIntSLT, l, r, "ilt"); + case OP_GT: + return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOGT, l, r, "fgt") + : LLVMBuildICmp(ctx->builder, LLVMIntSGT, l, r, "igt"); + case OP_LE: + return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOLE, l, r, "fle") + : LLVMBuildICmp(ctx->builder, LLVMIntSLE, l, r, "ile"); + case OP_GE: + return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOGE, l, r, "fge") + : LLVMBuildICmp(ctx->builder, LLVMIntSGE, l, r, "ige"); + case OP_AND: + return LLVMBuildAnd(ctx->builder, l, r, "and"); + case OP_OR: + return LLVMBuildOr(ctx->builder, l, r, "or"); + default: + return NULL; + } + } + + case AST_CALL_EXPR: { + // === 内置 print 函数:委托给 printf === + if (strcmp(node->as.call.name, "print_i64") == 0) { + LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]); + if (!arg) return NULL; + LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%lld\n", "fmt_i64"); + LLVMValueRef printf_args[] = { fmt, arg }; + return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, + printf_args, 2, ""); + } + if (strcmp(node->as.call.name, "print_f64") == 0) { + LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]); + if (!arg) return NULL; + LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%f\n", "fmt_f64"); + LLVMValueRef printf_args[] = { fmt, arg }; + return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, + printf_args, 2, ""); + } + if (strcmp(node->as.call.name, "print_bool") == 0) { + LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]); + if (!arg) return NULL; + // 将 bool 转为字符串:通过 select 在 "true\n" 和 "false\n" 之间选择 + LLVMValueRef c = LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, + LLVMConstInt(LLVMInt1TypeInContext(ctx->context), 1, false), "bool_cmp"); + LLVMValueRef true_str = LLVMBuildGlobalStringPtr(ctx->builder, "true\n", "true_str"); + LLVMValueRef false_str = LLVMBuildGlobalStringPtr(ctx->builder, "false\n", "false_str"); + LLVMValueRef selected = LLVMBuildSelect(ctx->builder, c, true_str, false_str, "bool_sel"); + return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, + (LLVMValueRef[]){selected}, 1, ""); + } + + // === 常规函数调用 === + LLVMValueRef fn = find_fn(ctx, node->as.call.name); + if (!fn) return NULL; + LLVMValueRef args[16]; + for (size_t i = 0; i < node->as.call.arg_count; i++) { + args[i] = codegen_expr(ctx, node->as.call.args[i]); + if (!args[i]) return NULL; + } + LLVMTypeRef fn_ty = LLVMGlobalGetValueType(fn); + LLVMTypeRef ret_ty = LLVMGetReturnType(fn_ty); + return LLVMBuildCall2(ctx->builder, fn_ty, fn, + args, (unsigned)node->as.call.arg_count, + ret_ty == LLVMVoidTypeInContext(ctx->context) ? "" : "call"); + } + + default: + return NULL; + } +} + +// === 语句代码生成 === +static void codegen_stmt(CgCtx* ctx, AstNode* node) { + if (!node) return; + + switch (node->kind) { + case AST_LET_STMT: { + LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init); + if (!init_val) return; + LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder, + to_llvm_type(ctx, node->as.let_stmt.init->type.kind), node->as.let_stmt.name); + LLVMBuildStore(ctx->builder, init_val, alloca); + add_var(ctx, node->as.let_stmt.name, alloca); + break; + } + + case AST_EXPR_STMT: + codegen_expr(ctx, node->as.expr_stmt.expr); + break; + + case AST_RETURN_STMT: + if (node->as.return_stmt.expr) { + LLVMValueRef val = codegen_expr(ctx, node->as.return_stmt.expr); + if (val) LLVMBuildRet(ctx->builder, val); + } else { + LLVMBuildRetVoid(ctx->builder); + } + break; + + case AST_BLOCK: + for (size_t i = 0; i < node->as.block.stmt_count; i++) { + codegen_stmt(ctx, node->as.block.stmts[i]); + } + break; + + case AST_IF_STMT: { + LLVMValueRef cond = codegen_expr(ctx, node->as.if_stmt.cond); + if (!cond) return; + LLVMBasicBlockRef cur_bb = LLVMGetInsertBlock(ctx->builder); + LLVMValueRef cur_fn = LLVMGetBasicBlockParent(cur_bb); + LLVMBasicBlockRef then_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "then"); + LLVMBasicBlockRef else_bb = node->as.if_stmt.else_block + ? LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "else") : NULL; + LLVMBasicBlockRef merge_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "if_merge"); + + if (else_bb) + LLVMBuildCondBr(ctx->builder, cond, then_bb, else_bb); + else + LLVMBuildCondBr(ctx->builder, cond, then_bb, merge_bb); + + LLVMPositionBuilderAtEnd(ctx->builder, then_bb); + codegen_stmt(ctx, node->as.if_stmt.then_block); + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx->builder))) + LLVMBuildBr(ctx->builder, merge_bb); + + if (else_bb) { + LLVMPositionBuilderAtEnd(ctx->builder, else_bb); + codegen_stmt(ctx, node->as.if_stmt.else_block); + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx->builder))) + LLVMBuildBr(ctx->builder, merge_bb); + } + + LLVMPositionBuilderAtEnd(ctx->builder, merge_bb); + break; + } + + case AST_WHILE_STMT: { + LLVMBasicBlockRef cur_bb = LLVMGetInsertBlock(ctx->builder); + LLVMValueRef cur_fn = LLVMGetBasicBlockParent(cur_bb); + LLVMBasicBlockRef cond_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "while_cond"); + LLVMBasicBlockRef body_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "while_body"); + LLVMBasicBlockRef exit_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "while_exit"); + + LLVMBuildBr(ctx->builder, cond_bb); + LLVMPositionBuilderAtEnd(ctx->builder, cond_bb); + LLVMValueRef cond = codegen_expr(ctx, node->as.while_stmt.cond); + if (!cond) return; + LLVMBuildCondBr(ctx->builder, cond, body_bb, exit_bb); + + LLVMPositionBuilderAtEnd(ctx->builder, body_bb); + codegen_stmt(ctx, node->as.while_stmt.body); + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx->builder))) + LLVMBuildBr(ctx->builder, cond_bb); + + LLVMPositionBuilderAtEnd(ctx->builder, exit_bb); + break; + } + + default: + break; + } +} + +// === 程序级代码生成 === +LLVMModuleRef codegen_module(AstNode* ast, const char* name, const char** error_msg) { + CgCtx ctx = {0}; + ctx.context = LLVMContextCreate(); + if (!ctx.context) { + *error_msg = "无法创建 LLVM Context"; + return NULL; + } + ctx.module = LLVMModuleCreateWithNameInContext(name, ctx.context); + ctx.builder = LLVMCreateBuilderInContext(ctx.context); + + // 声明 C 标准库 printf(内置 print 函数依赖它) + LLVMTypeRef printf_param_types[] = { + LLVMPointerType(LLVMInt8TypeInContext(ctx.context), 0) + }; + ctx.printf_ty = LLVMFunctionType( + LLVMInt32TypeInContext(ctx.context), printf_param_types, 1, true); + ctx.printf_fn = LLVMAddFunction(ctx.module, "printf", ctx.printf_ty); + + // 第一遍:声明所有 L 函数 + for (size_t i = 0; i < ast->as.program.fn_count; i++) { + AstNode* fn = ast->as.program.functions[i]; + LLVMTypeRef* ptypes = malloc(fn->as.function.param_count * sizeof(LLVMTypeRef)); + for (size_t j = 0; j < fn->as.function.param_count; j++) + ptypes[j] = to_llvm_type(&ctx, fn->as.function.params[j]->as.parameter.type); + LLVMTypeRef fty = LLVMFunctionType( + to_llvm_type(&ctx, fn->as.function.return_type), + ptypes, (unsigned)fn->as.function.param_count, false); + LLVMValueRef lfn = LLVMAddFunction(ctx.module, fn->as.function.name, fty); + add_fn(&ctx, fn->as.function.name, lfn); + free(ptypes); + } + + // 第二遍:生成函数体 + for (size_t i = 0; i < ast->as.program.fn_count; i++) { + AstNode* fn = ast->as.program.functions[i]; + LLVMValueRef lfn = find_fn(&ctx, fn->as.function.name); + LLVMBasicBlockRef entry = LLVMAppendBasicBlockInContext(ctx.context, lfn, "entry"); + LLVMPositionBuilderAtEnd(ctx.builder, entry); + + // 清空变量表(每个函数独立作用域) + ctx.var_table = NULL; + + // 将参数注册为变量 + for (size_t j = 0; j < fn->as.function.param_count; j++) { + LLVMValueRef param = LLVMGetParam(lfn, (unsigned)j); + LLVMValueRef alloca = LLVMBuildAlloca(ctx.builder, + to_llvm_type(&ctx, fn->as.function.params[j]->as.parameter.type), + fn->as.function.params[j]->as.parameter.name); + LLVMBuildStore(ctx.builder, param, alloca); + add_var(&ctx, fn->as.function.params[j]->as.parameter.name, alloca); + } + + codegen_stmt(&ctx, fn->as.function.body); + + // 确保函数有终止指令(terminator) + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx.builder))) { + if (fn->as.function.return_type == TYPE_VOID) + LLVMBuildRetVoid(ctx.builder); + else + LLVMBuildRet(ctx.builder, + LLVMConstInt(to_llvm_type(&ctx, fn->as.function.return_type), 0, false)); + } + } + + // 验证模块(使用 ReturnStatus 以获取完整错误消息) + char* verify_err = NULL; + if (LLVMVerifyModule(ctx.module, LLVMReturnStatusAction, &verify_err)) { + *error_msg = verify_err ? verify_err : "模块验证失败(错误消息为 NULL)"; + LLVMDisposeBuilder(ctx.builder); + LLVMContextDispose(ctx.context); + return NULL; + } + + LLVMDisposeBuilder(ctx.builder); + return ctx.module; +} diff --git a/src/codegen/codegen.h b/src/codegen/codegen.h new file mode 100644 index 0000000..c8aaa4c --- /dev/null +++ b/src/codegen/codegen.h @@ -0,0 +1,12 @@ +#ifndef CODEGEN_H +#define CODEGEN_H + +#include "ast.h" +#include + +// 生成 LLVM Module。模块已 verify,可直接 dump 或写入文件。 +// 出错时返回 NULL 并设置 *error_msg。 +LLVMModuleRef codegen_module(AstNode* ast, const char* module_name, + const char** error_msg); + +#endif diff --git a/src/driver/error.c b/src/driver/error.c new file mode 100644 index 0000000..bce4b4e --- /dev/null +++ b/src/driver/error.c @@ -0,0 +1,49 @@ +#include "error.h" +#include +#include +#include +#include + +void error_init(ErrorList* list) { + list->capacity = 8; + list->errors = malloc(list->capacity * sizeof(ErrorInfo)); + list->count = 0; + if (!list->errors) list->capacity = 0; +} + +void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...) { + if (!list->errors) return; + if (list->count >= list->capacity) { + size_t new_cap = list->capacity * 2; + ErrorInfo* new_errs = realloc(list->errors, new_cap * sizeof(ErrorInfo)); + if (!new_errs) return; + list->errors = new_errs; + list->capacity = new_cap; + } + char buf[512]; + va_list args; + va_start(args, fmt); + int n = vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + if (n < 0) return; + char* msg = strdup(buf); + char* fname = strdup(filename); + if (!msg || !fname) { + free(msg); free(fname); + return; + } + list->errors[list->count++] = (ErrorInfo){ + .message = msg, + .filename = fname, + .line = line, + .col = col, + }; +} + +void error_print(const ErrorList* list) { + for (size_t i = 0; i < list->count; i++) { + const ErrorInfo* e = &list->errors[i]; + fprintf(stderr, "\033[1;31m错误:\033[0m %s:%d:%d: %s\n", + e->filename, e->line, e->col, e->message); + } +} diff --git a/src/driver/error.h b/src/driver/error.h new file mode 100644 index 0000000..3305fd6 --- /dev/null +++ b/src/driver/error.h @@ -0,0 +1,23 @@ +#ifndef ERROR_H +#define ERROR_H + +#include + +typedef struct { + const char* message; + const char* filename; + int line; + int col; +} ErrorInfo; + +typedef struct { + ErrorInfo* errors; + size_t count; + size_t capacity; +} ErrorList; + +void error_init(ErrorList* list); +void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...); +void error_print(const ErrorList* list); + +#endif diff --git a/src/driver/main.c b/src/driver/main.c new file mode 100644 index 0000000..79bfcd1 --- /dev/null +++ b/src/driver/main.c @@ -0,0 +1,169 @@ +#include "l_lang.h" +#include "lexer.h" +#include "parser.h" +#include "sema.h" +#include "codegen.h" +#include "error.h" +#include "arena.h" + +#include +#include +#include +#include +#include +#include + +// 读取整个文件到内存 +static char* read_file(const char* path, size_t* size) { + FILE* f = fopen(path, "rb"); + if (!f) { fprintf(stderr, "无法打开文件: %s\n", path); return NULL; } + fseek(f, 0, SEEK_END); + *size = ftell(f); + fseek(f, 0, SEEK_SET); + char* buf = malloc(*size + 1); + if (!buf) { fclose(f); return NULL; } + fread(buf, 1, *size, f); + buf[*size] = '\0'; + fclose(f); + return buf; +} + +// 写入字符串到文件 +static bool write_file(const char* path, const char* data) { + FILE* f = fopen(path, "w"); + if (!f) return false; + fputs(data, f); + fclose(f); + return true; +} + +int main(int argc, char** argv) { + const char* input = NULL; + const char* output = "a.exe"; + bool emit_ir = false; + + // 解析命令行参数 + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--emit-ir") == 0) { emit_ir = true; } + else if (strcmp(argv[i], "-o") == 0 && i + 1 < argc) { output = argv[++i]; } + else if (argv[i][0] != '-') { input = argv[i]; } + } + + if (!input) { + fprintf(stderr, "用法: l_lang <文件.l> [-o <输出>] [--emit-ir]\n"); + return 1; + } + + // 1. 读取源文件 + size_t src_size; + char* source = read_file(input, &src_size); + if (!source) return 1; + + // 2. 初始化 + Arena arena = arena_create(8); // 8 MB + if (!arena.memory) { fprintf(stderr, "内存分配失败\n"); free(source); return 1; } + + ErrorInfo error = {0}; + ErrorList error_list; error_init(&error_list); + + // 3. 词法分析 + size_t token_count; + Token* tokens = lex(&arena, source, input, &token_count, &error); + if (!tokens) { + fprintf(stderr, "词法错误: %s:%d:%d: %s\n", + error.filename, error.line, error.col, error.message); + free(source); arena_destroy(&arena); + return 1; + } + + // 4. 语法分析 + AstNode* ast = parse(&arena, tokens, token_count, input, &error); + if (!ast) { + fprintf(stderr, "语法错误: %s:%d:%d: %s\n", + error.filename, error.line, error.col, error.message); + free(source); arena_destroy(&arena); + return 1; + } + + // 5. 语义分析 + sema_analyze(ast, &error_list, &arena); + if (error_list.count > 0) { + error_print(&error_list); + free(source); arena_destroy(&arena); + return 1; + } + + // 6. LLVM IR 生成 + const char* codegen_error = NULL; + LLVMModuleRef module = codegen_module(ast, "l_module", &codegen_error); + if (!module) { + fprintf(stderr, "IR 生成错误: %s\n", codegen_error); + free(source); arena_destroy(&arena); + return 1; + } + + if (emit_ir) { + // 输出 LLVM IR 文本 + char* ir = LLVMPrintModuleToString(module); + char ir_path[512]; + snprintf(ir_path, sizeof(ir_path), "%s.ll", input); + write_file(ir_path, ir); + printf("IR 已输出到: %s\n", ir_path); + LLVMDisposeMessage(ir); + } else { + // 初始化 X86 目标(LLVM-C.lib 中没有 InitializeAll 系列符号) + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86Target(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86AsmPrinter(); + LLVMInitializeX86AsmParser(); + + char* triple = LLVMGetDefaultTargetTriple(); + LLVMTargetRef target; + char* target_error = NULL; + if (LLVMGetTargetFromTriple(triple, &target, &target_error)) { + fprintf(stderr, "目标平台错误: %s\n", target_error); + LLVMDisposeMessage(target_error); LLVMDisposeMessage(triple); + free(source); arena_destroy(&arena); LLVMDisposeModule(module); + return 1; + } + + LLVMTargetMachineRef tm = LLVMCreateTargetMachine( + target, triple, "generic", "", + LLVMCodeGenLevelDefault, LLVMRelocDefault, + LLVMCodeModelDefault); + LLVMDisposeMessage(triple); + + // 输出目标文件 + char obj_path[512]; + snprintf(obj_path, sizeof(obj_path), "%s.o", input); + char* obj_error = NULL; + if (LLVMTargetMachineEmitToFile(tm, module, obj_path, + LLVMObjectFile, &obj_error)) { + fprintf(stderr, "目标代码生成错误: %s\n", obj_error); + LLVMDisposeMessage(obj_error); + free(source); arena_destroy(&arena); + LLVMDisposeTargetMachine(tm); LLVMDisposeModule(module); + return 1; + } + + // 调用 gcc 链接(MinGW 环境可用) + char cmd[1024]; + snprintf(cmd, sizeof(cmd), + "gcc \"%s\" -o \"%s\"", + obj_path, output); + int ret = system(cmd); + if (ret != 0) { + fprintf(stderr, "链接失败 (exit code %d)\n", ret); + } else { + printf("编译成功: %s\n", output); + } + LLVMDisposeTargetMachine(tm); + } + + // 清理 + LLVMDisposeModule(module); + free(source); + arena_destroy(&arena); + return 0; +} diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c new file mode 100644 index 0000000..4b7807a --- /dev/null +++ b/src/lexer/lexer.c @@ -0,0 +1,127 @@ +#include "lexer.h" +#include +#include + +typedef struct { + const char* src; + const char* filename; + int pos; + int line; + int col; +} Lexer; + +static char peek(const Lexer* l) { return l->src[l->pos]; } +static char peek_next(const Lexer* l) { return l->src[l->pos + 1]; } +static void advance(Lexer* l) { + if (l->src[l->pos] == '\n') { l->line++; l->col = 1; } + else { l->col++; } + l->pos++; +} +static void skip_whitespace(Lexer* l) { + while (1) { + char c = peek(l); + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { advance(l); continue; } + if (c == '/' && peek_next(l) == '/') { + while (peek(l) != '\n' && peek(l) != '\0') advance(l); + continue; + } + if (c == '/' && peek_next(l) == '*') { + advance(l); advance(l); + while (peek(l) != '\0' && !(peek(l) == '*' && peek_next(l) == '/')) advance(l); + if (peek(l) != '\0') { advance(l); advance(l); } // skip */ + continue; + } + break; + } +} + +static Token make_token(Lexer* l, TokenKind kind, int start_pos, int len) { + Token t = {.kind = kind, .start = l->src + start_pos, + .length = len, .line = l->line, .col = l->col}; + return t; +} + +static Token lex_number(Lexer* l) { + int start = l->pos; + TokenKind kind = TOK_INT_LIT; + while (isdigit(peek(l))) advance(l); + if (peek(l) == '.') { + kind = TOK_FLOAT_LIT; advance(l); + while (isdigit(peek(l))) advance(l); + } + return make_token(l, kind, start, l->pos - start); +} + +static TokenKind check_keyword(const Token* tok) { + #define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k + KW("fn", TOK_FN); KW("let", TOK_LET); + KW("if", TOK_IF); KW("else", TOK_ELSE); + KW("while", TOK_WHILE); KW("return", TOK_RETURN); + KW("i64", TOK_I64); KW("f64", TOK_F64); + KW("bool", TOK_BOOL); KW("void", TOK_VOID); + KW("true", TOK_TRUE); KW("false", TOK_FALSE); + #undef KW + return TOK_IDENT; +} + +static Token lex_ident_or_keyword(Lexer* l) { + int start = l->pos; + while (isalnum(peek(l)) || peek(l) == '_') advance(l); + Token t = make_token(l, TOK_IDENT, start, l->pos - start); + t.kind = check_keyword(&t); + return t; +} + +Token* lex(Arena* a, const char* source, const char* filename, + size_t* count, ErrorInfo* error) { + Lexer l = {.src = source, .filename = filename, .pos = 0, .line = 1, .col = 1}; + // 预估容量:源码长度的 1/3 + size_t cap = strlen(source) / 3 + 16; + Token* tokens = arena_alloc(a, cap * sizeof(Token)); + if (!tokens) { *count = 0; return NULL; } + size_t idx = 0; + + while (peek(&l) != '\0') { + skip_whitespace(&l); + if (peek(&l) == '\0') break; + + int line = l.line, col = l.col; + char c = peek(&l); + + if (isdigit(c)) { tokens[idx++] = lex_number(&l); } + else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); } + else if (c == '+' && peek_next(&l) != '=') { tokens[idx++] = make_token(&l, TOK_PLUS, l.pos, 1); advance(&l); } + else if (c == '-' && peek_next(&l) != '>') { tokens[idx++] = make_token(&l, TOK_MINUS, l.pos, 1); advance(&l); } + else if (c == '-' && peek_next(&l) == '>') { tokens[idx++] = make_token(&l, TOK_ARROW, l.pos, 2); advance(&l); advance(&l); } + else if (c == '*') { tokens[idx++] = make_token(&l, TOK_STAR, l.pos, 1); advance(&l); } + else if (c == '/') { tokens[idx++] = make_token(&l, TOK_SLASH, l.pos, 1); advance(&l); } + else if (c == '%') { tokens[idx++] = make_token(&l, TOK_PERCENT, l.pos, 1); advance(&l); } + else if (c == '=' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_EQ_EQ, l.pos, 2); advance(&l); advance(&l); } + else if (c == '=') { tokens[idx++] = make_token(&l, TOK_ASSIGN, l.pos, 1); advance(&l); } + else if (c == '!' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_BANG_EQ, l.pos, 2); advance(&l); advance(&l); } + else if (c == '!') { tokens[idx++] = make_token(&l, TOK_BANG, l.pos, 1); advance(&l); } + else if (c == '<' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_LT_EQ, l.pos, 2); advance(&l); advance(&l); } + else if (c == '<') { tokens[idx++] = make_token(&l, TOK_LT, l.pos, 1); advance(&l); } + else if (c == '>' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_GT_EQ, l.pos, 2); advance(&l); advance(&l); } + else if (c == '>') { tokens[idx++] = make_token(&l, TOK_GT, l.pos, 1); advance(&l); } + else if (c == '&' && peek_next(&l) == '&') { tokens[idx++] = make_token(&l, TOK_AND_AND, l.pos, 2); advance(&l); advance(&l); } + else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); } + else if (c == '(') { tokens[idx++] = make_token(&l, TOK_LPAREN, l.pos, 1); advance(&l); } + else if (c == ')') { tokens[idx++] = make_token(&l, TOK_RPAREN, l.pos, 1); advance(&l); } + else if (c == '{') { tokens[idx++] = make_token(&l, TOK_LBRACE, l.pos, 1); advance(&l); } + else if (c == '}') { tokens[idx++] = make_token(&l, TOK_RBRACE, l.pos, 1); advance(&l); } + else if (c == ',') { tokens[idx++] = make_token(&l, TOK_COMMA, l.pos, 1); advance(&l); } + else if (c == ':') { tokens[idx++] = make_token(&l, TOK_COLON, l.pos, 1); advance(&l); } + else if (c == ';') { tokens[idx++] = make_token(&l, TOK_SEMICOLON, l.pos, 1); advance(&l); } + else { + *error = (ErrorInfo){ + .message = "无法识别的字符", + .filename = filename, .line = line, .col = col + }; + return NULL; + } + } + tokens[idx++] = make_token(&l, TOK_EOF, l.pos, 0); + *count = idx; + return tokens; +} diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h new file mode 100644 index 0000000..6f6956b --- /dev/null +++ b/src/lexer/lexer.h @@ -0,0 +1,13 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "token.h" +#include "arena.h" +#include "error.h" + +// 返回 Token 数组(分配在 arena 中),*count 为数量。 +// 如遇错误,error 被填充并返回 NULL。 +Token* lex(Arena* a, const char* source, const char* filename, + size_t* count, ErrorInfo* error); + +#endif diff --git a/src/lexer/token.c b/src/lexer/token.c new file mode 100644 index 0000000..49f3ea2 --- /dev/null +++ b/src/lexer/token.c @@ -0,0 +1,49 @@ +#include "token.h" +#include +#include +#include +#include + +static const char* NAMES[] = { + [TOK_FN] = "fn", [TOK_LET] = "let", [TOK_IF] = "if", + [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_RETURN] = "return", + [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_VOID] = "void", + [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", + [TOK_TRUE] = "true", [TOK_FALSE] = "false", + [TOK_IDENT] = "标识符", + [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*", + [TOK_SLASH] = "/", [TOK_PERCENT] = "%", + [TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=", + [TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=", + [TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_BANG] = "!", + [TOK_ARROW] = "->", + [TOK_LPAREN] = "(", [TOK_RPAREN] = ")", + [TOK_LBRACE] = "{", [TOK_RBRACE] = "}", + [TOK_COMMA] = ",", [TOK_COLON] = ":", [TOK_SEMICOLON] = ";", + [TOK_ASSIGN] = "=", + [TOK_EOF] = "EOF", [TOK_ERROR] = "错误", +}; + +const char* tok_name(TokenKind kind) { + return NAMES[kind]; +} + +bool tok_is_type(TokenKind kind) { + return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_VOID; +} + +int64_t tok_int_value(const Token* tok) { + if (tok->length <= 0 || tok->length >= 32) return 0; + char buf[32]; + memcpy(buf, tok->start, tok->length); + buf[tok->length] = '\0'; + return strtoll(buf, NULL, 10); +} + +double tok_float_value(const Token* tok) { + if (tok->length <= 0 || tok->length >= 64) return 0.0; + char buf[64]; + memcpy(buf, tok->start, tok->length); + buf[tok->length] = '\0'; + return strtod(buf, NULL); +} diff --git a/src/lexer/token.h b/src/lexer/token.h new file mode 100644 index 0000000..470409e --- /dev/null +++ b/src/lexer/token.h @@ -0,0 +1,45 @@ +#ifndef TOKEN_H +#define TOKEN_H + +#include "l_lang.h" + +// === Token 类型枚举 === +typedef enum { + // 关键字 + TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_RETURN, + // 类型关键字 + TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, + // 字面量 + TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, + // 标识符 + TOK_IDENT, + // 运算符 + TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT, + TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ, + TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, + TOK_ARROW, + // 分隔符 + TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE, + TOK_COMMA, TOK_COLON, TOK_SEMICOLON, TOK_ASSIGN, + // 特殊 + TOK_EOF, TOK_ERROR, +} TokenKind; + +// === Token 结构体 === +struct Token { + TokenKind kind; + const char* start; // 指向源码中 token 起始位置 + int length; // token 文本长度 + int line; + int col; +}; + +// === 工具函数 === +const char* tok_name(TokenKind kind); +bool tok_is_type(TokenKind kind); + +// 从 Token 提取值 +int64_t tok_int_value(const Token* tok); +double tok_float_value(const Token* tok); + +#endif diff --git a/src/parser/parser.c b/src/parser/parser.c new file mode 100644 index 0000000..0ebed43 --- /dev/null +++ b/src/parser/parser.c @@ -0,0 +1,325 @@ +#include "parser.h" +#include +#include + +typedef struct { + const Token* tokens; + size_t count; + size_t pos; + const char* filename; + Arena* arena; +} Parser; + +// === 向前看 === +static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; } +static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; } +static bool match(Parser* p, TokenKind k) { + if (peek(p)->kind == k) { p->pos++; return true; } + return false; +} +static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) { + if (peek(p)->kind == k) return advance(p); + e->message = msg; e->filename = p->filename; + e->line = peek(p)->line; e->col = peek(p)->col; + return NULL; +} + +// === 运算符优先级定义 === +typedef enum { + PREC_NONE = 0, + PREC_OR = 20, + PREC_AND = 30, + PREC_COMPARE = 40, + PREC_TERM = 50, + PREC_FACTOR = 60, + PREC_UNARY = 70, +} Precedence; + +static Precedence tok_to_prec(TokenKind kind) { + switch (kind) { + case TOK_PIPE_PIPE: return PREC_OR; + case TOK_AND_AND: return PREC_AND; + case TOK_EQ_EQ: case TOK_BANG_EQ: + case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE; + case TOK_PLUS: case TOK_MINUS: return PREC_TERM; + case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR; + default: return PREC_NONE; + } +} + +static BinaryOp tok_to_binop(TokenKind kind) { + switch (kind) { + case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB; + case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV; + case TOK_PERCENT: return OP_MOD; + case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE; + case TOK_LT: return OP_LT; case TOK_GT: return OP_GT; + case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE; + case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR; + default: return OP_ADD; + } +} + +// 向前声明 +static AstNode* parse_expr(Parser* p, ErrorInfo* error); +static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error); +static AstNode* parse_block(Parser* p, ErrorInfo* error); + +// === 前缀解析 === +static AstNode* parse_unary(Parser* p, ErrorInfo* error) { + const Token* op = advance(p); + AstNode* operand = parse_expr_prec(p, PREC_UNARY, error); + if (!operand) return NULL; + BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT; + return ast_make_unary(p->arena, uop, operand, op->line, op->col); +} + +static AstNode* parse_group(Parser* p, ErrorInfo* error) { + advance(p); // 跳过 ( + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + return expr; +} + +static AstNode* parse_literal(Parser* p) { + const Token* t = advance(p); + switch (t->kind) { + case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), t->line, t->col); + case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col); + case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col); + case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col); + default: return NULL; + } +} + +static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) { + const Token* name = advance(p); + if (match(p, TOK_LPAREN)) { + // 函数调用 + AstNode* args[16]; int arg_count = 0; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + if (arg_count >= 16) { + error->message = "函数参数过多"; error->filename = p->filename; + error->line = peek(p)->line; error->col = peek(p)->col; return NULL; + } + args[arg_count] = parse_expr(p, error); + if (!args[arg_count]) return NULL; + arg_count++; + if (peek(p)->kind == TOK_COMMA) advance(p); + else break; + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); + memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); + return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), + arg_arr, arg_count, name->line, name->col); + } + return ast_make_ident(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + name->line, name->col); +} + +// === Pratt 主循环 === +static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { + const Token* tok = peek(p); + AstNode* left = NULL; + + // 前缀解析 + if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { + left = parse_unary(p, error); + } else if (tok->kind == TOK_LPAREN) { + left = parse_group(p, error); + } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || + tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) { + left = parse_literal(p); + } else if (tok->kind == TOK_IDENT) { + left = parse_ident_or_call(p, error); + } else { + error->message = "无法识别的表达式"; error->filename = p->filename; + error->line = tok->line; error->col = tok->col; + return NULL; + } + if (!left) return NULL; + + // 中缀解析循环 + while (!error->message) { + TokenKind kind = peek(p)->kind; + Precedence prec = tok_to_prec(kind); + if (prec <= min_prec) break; + + const Token* op = advance(p); + AstNode* right = parse_expr_prec(p, prec, error); + if (!right) return NULL; + left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, op->line, op->col); + } + + return left; +} + +static AstNode* parse_expr(Parser* p, ErrorInfo* error) { + return parse_expr_prec(p, PREC_NONE, error); +} + +// === 类型工具 === +static bool is_type_token(TokenKind k) { + return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID; +} + +static TypeKind token_to_type(TokenKind k) { + switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64; + case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; } +} + +// === 语句解析 === +static AstNode* parse_statement(Parser* p, ErrorInfo* error); + +static AstNode* parse_block(Parser* p, ErrorInfo* error) { + const Token* open = peek(p); + if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL; + AstNode* stmts[256]; int count = 0; + while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) { + AstNode* s = parse_statement(p, error); + if (!s) return NULL; + stmts[count++] = s; + } + if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; + AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*)); + memcpy(arr, stmts, count * sizeof(AstNode*)); + return ast_make_block(p->arena, arr, count, open->line, open->col); +} + +static AstNode* parse_statement(Parser* p, ErrorInfo* error) { + const Token* t = peek(p); + + if (t->kind == TOK_LET) { + advance(p); + const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名"); + if (!name) return NULL; + // 可选的类型标注 + TypeKind annot_type = TYPE_UNKNOWN; + bool has_type_annot = false; + if (match(p, TOK_COLON)) { + const Token* type_tok = advance(p); + if (!is_type_token(type_tok->kind)) { + error->message = "无效的类型标注"; error->filename = p->filename; + error->line = type_tok->line; error->col = type_tok->col; return NULL; + } + annot_type = token_to_type(type_tok->kind); + has_type_annot = true; + } + if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL; + AstNode* init = parse_expr(p, error); + if (!init) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_let(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + annot_type, has_type_annot, init, t->line, t->col); + } + + if (t->kind == TOK_IF) { + advance(p); + AstNode* cond = parse_expr(p, error); + if (!cond) return NULL; + AstNode* then_block = parse_block(p, error); + if (!then_block) return NULL; + AstNode* else_block = NULL; + if (match(p, TOK_ELSE)) { + if (peek(p)->kind == TOK_IF) { + else_block = parse_statement(p, error); + } else { + else_block = parse_block(p, error); + } + if (!else_block) return NULL; + } + return ast_make_if(p->arena, cond, then_block, else_block, t->line, t->col); + } + + if (t->kind == TOK_WHILE) { + advance(p); + AstNode* cond = parse_expr(p, error); + if (!cond) return NULL; + AstNode* body = parse_block(p, error); + if (!body) return NULL; + return ast_make_while(p->arena, cond, body, t->line, t->col); + } + + if (t->kind == TOK_RETURN) { + advance(p); + if (match(p, TOK_SEMICOLON)) { + return ast_make_return(p->arena, NULL, t->line, t->col); + } + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_return(p->arena, expr, t->line, t->col); + } + + // 表达式语句 + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_expr_stmt(p->arena, expr, t->line, t->col); +} + +// === 函数解析 === +static AstNode* parse_function(Parser* p, ErrorInfo* error) { + const Token* fn_tok = advance(p); // fn + const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名"); + if (!name) return NULL; + if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL; + + // 参数列表 + AstNode* params[64]; int pcount = 0; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + const Token* pname = expect(p, TOK_IDENT, error, "参数名"); + if (!pname) return NULL; + if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; + const Token* ptype = advance(p); + if (!is_type_token(ptype->kind)) { + error->message = "无效的参数类型"; error->filename = p->filename; + error->line = ptype->line; error->col = ptype->col; return NULL; + } + params[pcount++] = ast_make_parameter(p->arena, + arena_strdup_impl(p->arena, pname->start, pname->length), + token_to_type(ptype->kind), pname->line, pname->col); + if (match(p, TOK_COMMA)) continue; + else break; + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + + // 返回类型 + TypeKind ret = TYPE_VOID; + if (match(p, TOK_ARROW)) { + const Token* rt = advance(p); + if (!is_type_token(rt->kind)) { + error->message = "无效的返回类型"; error->filename = p->filename; + error->line = rt->line; error->col = rt->col; return NULL; + } + ret = token_to_type(rt->kind); + } + + AstNode* body = parse_block(p, error); + if (!body) return NULL; + + AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*)); + memcpy(parr, params, pcount * sizeof(AstNode*)); + return ast_make_function(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + parr, pcount, ret, body, fn_tok->line, fn_tok->col); +} + +// === 程序入口 === +AstNode* parse(Arena* a, const Token* tokens, size_t count, + const char* filename, ErrorInfo* error) { + Parser p = {.tokens = tokens, .count = count, .pos = 0, + .filename = filename, .arena = a}; + AstNode* functions[256]; int fn_count = 0; + while (peek(&p)->kind != TOK_EOF && !error->message) { + functions[fn_count++] = parse_function(&p, error); + } + if (error->message) return NULL; + AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*)); + memcpy(arr, functions, fn_count * sizeof(AstNode*)); + return ast_make_program(a, arr, fn_count, 0, 0); +} diff --git a/src/parser/parser.h b/src/parser/parser.h new file mode 100644 index 0000000..e535670 --- /dev/null +++ b/src/parser/parser.h @@ -0,0 +1,13 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "ast.h" +#include "token.h" +#include "error.h" + +// 解析 Token 数组,返回 Program 节点(内存来自 arena)。 +// 出错时 error 被填充并返回 NULL。 +AstNode* parse(Arena* a, const Token* tokens, size_t count, + const char* filename, ErrorInfo* error); + +#endif diff --git a/src/sema/sema.c b/src/sema/sema.c new file mode 100644 index 0000000..2ff4074 --- /dev/null +++ b/src/sema/sema.c @@ -0,0 +1,264 @@ +#include "sema.h" +#include + +// === 类型关系 === +static TypeKind promote(TypeKind a, TypeKind b) { + if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64; + if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64; + if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL; + return TYPE_ERROR; +} + +static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64; } +static bool is_comparable(TypeKind a, TypeKind b) { return a == b; } + +// === 向前声明 === +static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a); + +// === 检查表达式 === +static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) { + switch (node->kind) { + case AST_LITERAL_EXPR: + break; // 类型已在创建时设置 + + case AST_IDENT_EXPR: { + Symbol* sym = scope_lookup(scope, node->as.ident.name); + if (!sym) { + error_add(errors, "", node->line, node->col, + "未定义的变量 '%s'", node->as.ident.name); + node->type.kind = TYPE_ERROR; + } else if (sym->kind == SYM_FUNCTION) { + error_add(errors, "", node->line, node->col, + "'%s' 是函数,不能作为表达式使用", node->as.ident.name); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = sym->type; + } + break; + } + + case AST_UNARY_EXPR: { + analyze_expr(node->as.unary.operand, scope, errors, a); + TypeKind inner = node->as.unary.operand->type.kind; + if (node->as.unary.op == OP_NEG) { + if (!is_numeric(inner)) { + error_add(errors, "", node->line, node->col, + "一元 '-' 只能用于数值类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = inner; + } + } else { // OP_NOT + if (inner != TYPE_BOOL) { + error_add(errors, "", node->line, node->col, + "'!' 只能用于布尔类型,得到 '%s'", type_name(inner)); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = TYPE_BOOL; + } + } + break; + } + + case AST_BINARY_EXPR: { + analyze_expr(node->as.binary.left, scope, errors, a); + analyze_expr(node->as.binary.right, scope, errors, a); + TypeKind l = node->as.binary.left->type.kind; + TypeKind r = node->as.binary.right->type.kind; + if (l == TYPE_ERROR || r == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; } + + switch (node->as.binary.op) { + case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: + if (!is_numeric(l) || !is_numeric(r)) { + error_add(errors, "", node->line, node->col, + "算术运算需要数值类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = promote(l, r); + } + break; + case OP_EQ: case OP_NE: case OP_LT: case OP_GT: case OP_LE: case OP_GE: + if (!is_comparable(l, r)) { + error_add(errors, "", node->line, node->col, + "类型 '%s' 和 '%s' 无法比较", type_name(l), type_name(r)); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = TYPE_BOOL; + } + break; + case OP_AND: case OP_OR: + if (l != TYPE_BOOL || r != TYPE_BOOL) { + error_add(errors, "", node->line, node->col, + "逻辑运算需要布尔类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = TYPE_BOOL; + } + break; + default: break; + } + break; + } + + case AST_CALL_EXPR: { + Symbol* sym = scope_lookup(scope, node->as.call.name); + if (!sym || sym->kind != SYM_FUNCTION) { + error_add(errors, "", node->line, node->col, + "未定义的函数 '%s'", node->as.call.name); + node->type.kind = TYPE_ERROR; + // 即使函数未定义,也要分析参数表达式(它们可能有更多错误) + for (size_t i = 0; i < node->as.call.arg_count; i++) { + analyze_expr(node->as.call.args[i], scope, errors, a); + } + break; + } + if (node->as.call.arg_count != sym->param_count) { + error_add(errors, "", node->line, node->col, + "函数 '%s' 需要 %zu 个参数,但提供了 %zu 个", + node->as.call.name, sym->param_count, node->as.call.arg_count); + node->type.kind = TYPE_ERROR; + // 即使参数数量不匹配,也分析已有的参数 + for (size_t i = 0; i < node->as.call.arg_count; i++) { + analyze_expr(node->as.call.args[i], scope, errors, a); + } + break; + } + for (size_t i = 0; i < node->as.call.arg_count; i++) { + analyze_expr(node->as.call.args[i], scope, errors, a); + TypeKind actual = node->as.call.args[i]->type.kind; + TypeKind expected = sym->param_types[i]; + if (actual != TYPE_ERROR && actual != expected) { + error_add(errors, "", node->line, node->col, + "参数 %zu 类型不匹配: 期望 '%s',得到 '%s'", + i + 1, type_name(expected), type_name(actual)); + } + } + node->type.kind = sym->return_type; + break; + } + + default: break; + } +} + +static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) { + if (!node) return; + + switch (node->kind) { + case AST_PROGRAM: + // 第一遍:收集所有函数签名 + for (size_t i = 0; i < node->as.program.fn_count; i++) { + AstNode* fn = node->as.program.functions[i]; + TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind)); + for (size_t j = 0; j < fn->as.function.param_count; j++) { + pts[j] = fn->as.function.params[j]->as.parameter.type; + } + scope_insert_function(scope, a, fn->as.function.name, + fn->as.function.return_type, pts, + fn->as.function.param_count); + } + // 第二遍:分析每个函数体 + for (size_t i = 0; i < node->as.program.fn_count; i++) { + analyze_node(node->as.program.functions[i], scope, errors, a); + } + break; + + case AST_FUNCTION: { + Scope* fn_scope = scope_new(a, scope); + // 注册参数 + for (size_t i = 0; i < node->as.function.param_count; i++) { + AstNode* p = node->as.function.params[i]; + scope_insert(fn_scope, a, p->as.parameter.name, SYM_PARAMETER, p->as.parameter.type); + } + analyze_node(node->as.function.body, fn_scope, errors, a); + break; + } + + case AST_BLOCK: + for (size_t i = 0; i < node->as.block.stmt_count; i++) { + analyze_node(node->as.block.stmts[i], scope, errors, a); + } + break; + + case AST_LET_STMT: { + analyze_expr(node->as.let_stmt.init, scope, errors, a); + TypeKind inferred = node->as.let_stmt.init->type.kind; + TypeKind var_type; + + if (node->as.let_stmt.has_type_annot) { + // 使用显式类型标注 + var_type = node->as.let_stmt.annot_type; + if (inferred != TYPE_ERROR && inferred != var_type) { + error_add(errors, "", node->line, node->col, + "变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'", + node->as.let_stmt.name, type_name(var_type), type_name(inferred)); + } + } else { + // 类型推断 + if (inferred == TYPE_ERROR || inferred == TYPE_VOID) { + error_add(errors, "", node->line, node->col, + "无法从表达式推断变量 '%s' 的类型", node->as.let_stmt.name); + break; + } + var_type = inferred; + } + + node->type.kind = var_type; + if (!scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, var_type)) { + error_add(errors, "", node->line, node->col, + "变量 '%s' 重复定义", node->as.let_stmt.name); + } + break; + } + + case AST_IF_STMT: + analyze_expr(node->as.if_stmt.cond, scope, errors, a); + if (node->as.if_stmt.cond->type.kind != TYPE_BOOL && + node->as.if_stmt.cond->type.kind != TYPE_ERROR) { + error_add(errors, "", node->line, node->col, "if 条件必须是布尔类型"); + } + analyze_node(node->as.if_stmt.then_block, scope, errors, a); + if (node->as.if_stmt.else_block) { + analyze_node(node->as.if_stmt.else_block, scope, errors, a); + } + break; + + case AST_WHILE_STMT: + analyze_expr(node->as.while_stmt.cond, scope, errors, a); + if (node->as.while_stmt.cond->type.kind != TYPE_BOOL && + node->as.while_stmt.cond->type.kind != TYPE_ERROR) { + error_add(errors, "", node->line, node->col, "while 条件必须是布尔类型"); + } + analyze_node(node->as.while_stmt.body, scope, errors, a); + break; + + case AST_RETURN_STMT: + if (node->as.return_stmt.expr) { + analyze_expr(node->as.return_stmt.expr, scope, errors, a); + node->type.kind = node->as.return_stmt.expr->type.kind; + } + break; + + case AST_EXPR_STMT: + analyze_expr(node->as.expr_stmt.expr, scope, errors, a); + break; + + default: + analyze_expr(node, scope, errors, a); + break; + } +} + +void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) { + Scope* global = scope_new(arena, NULL); + + // 注册内置函数 + TypeKind params_i64[] = {TYPE_I64}; + scope_insert_function(global, arena, "print_i64", TYPE_VOID, params_i64, 1); + TypeKind params_f64[] = {TYPE_F64}; + scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1); + TypeKind params_bool[] = {TYPE_BOOL}; + scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1); + + analyze_node(ast, global, errors, arena); +} diff --git a/src/sema/sema.h b/src/sema/sema.h new file mode 100644 index 0000000..f785c50 --- /dev/null +++ b/src/sema/sema.h @@ -0,0 +1,12 @@ +#ifndef SEMA_H +#define SEMA_H + +#include "ast.h" +#include "error.h" +#include "symbol.h" + +// 对 AST 进行语义分析(类型推断 + 类型检查) +// 为每个节点填充 type 字段,错误收集到 errors 列表中。 +void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena); + +#endif diff --git a/src/sema/symbol.c b/src/sema/symbol.c new file mode 100644 index 0000000..aeb67b7 --- /dev/null +++ b/src/sema/symbol.c @@ -0,0 +1,48 @@ +#include "symbol.h" +#include "l_lang.h" +#include + +Scope* scope_new(void* alloc, Scope* parent) { + Scope* s = (Scope*)arena_alloc_impl(alloc, sizeof(Scope)); + s->head = NULL; + s->parent = parent; + return s; +} + +Symbol* scope_lookup(const Scope* scope, const char* name) { + for (const Scope* s = scope; s; s = s->parent) { + for (Symbol* sym = s->head; sym; sym = sym->next) { + if (strcmp(sym->name, name) == 0) return sym; + } + } + return NULL; +} + +Symbol* scope_insert(Scope* scope, void* alloc, const char* name, + SymbolKind kind, TypeKind type) { + if (scope->head) { + for (Symbol* sym = scope->head; sym; sym = sym->next) { + if (strcmp(sym->name, name) == 0) return NULL; + } + } + Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + sym->name = name; sym->kind = kind; sym->type = type; + sym->next = scope->head; + scope->head = sym; + return sym; +} + +Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, + TypeKind ret, TypeKind* pt, size_t pc) { + if (scope->head) { + for (Symbol* sym = scope->head; sym; sym = sym->next) { + if (strcmp(sym->name, name) == 0) return NULL; + } + } + Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + sym->name = name; sym->kind = SYM_FUNCTION; sym->type = TYPE_VOID; + sym->return_type = ret; sym->param_types = pt; sym->param_count = pc; + sym->next = scope->head; + scope->head = sym; + return sym; +} diff --git a/src/sema/symbol.h b/src/sema/symbol.h new file mode 100644 index 0000000..72a36b0 --- /dev/null +++ b/src/sema/symbol.h @@ -0,0 +1,40 @@ +#ifndef SYMBOL_H +#define SYMBOL_H + +#include "l_lang.h" +#include "ast.h" + +typedef enum { SYM_VARIABLE, SYM_PARAMETER, SYM_FUNCTION } SymbolKind; + +typedef struct Symbol { + const char* name; + SymbolKind kind; + TypeKind type; // 变量/参数的类型 + // 函数特有 + TypeKind return_type; + TypeKind* param_types; + size_t param_count; + // 链表(同一作用域内的下一个符号) + struct Symbol* next; +} Symbol; + +typedef struct Scope { + Symbol* head; // 符号链表头 + struct Scope* parent; // 上级作用域 +} Scope; + +// 创建新作用域(子作用域) +Scope* scope_new(void* alloc, Scope* parent); + +// 在当前作用域及其父作用域中查找符号 +Symbol* scope_lookup(const Scope* scope, const char* name); + +// 在当前作用域中插入符号(重复插入返回 NULL) +Symbol* scope_insert(Scope* scope, void* alloc, const char* name, + SymbolKind kind, TypeKind type); + +// 插入函数符号 +Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, + TypeKind ret, TypeKind* pt, size_t pc); + +#endif diff --git a/src/util/arena.c b/src/util/arena.c new file mode 100644 index 0000000..ce72df4 --- /dev/null +++ b/src/util/arena.c @@ -0,0 +1,45 @@ +#include "arena.h" +#include +#include + +Arena arena_create(size_t capacity_mb) { + Arena a; + a.capacity = capacity_mb * 1024 * 1024; + a.memory = (char*)malloc(a.capacity); + a.offset = 0; + if (!a.memory) a.capacity = 0; + return a; +} + +void arena_destroy(Arena* a) { + free(a->memory); + a->memory = NULL; + a->capacity = 0; + a->offset = 0; +} + +void* arena_alloc(Arena* a, size_t size) { + size = (size + 7) & ~7; // 8 字节对齐 + if (a->offset + size > a->capacity) return NULL; + void* ptr = a->memory + a->offset; + a->offset += size; + return ptr; +} + +char* arena_strdup(Arena* a, const char* src) { + size_t len = strlen(src) + 1; + char* dst = arena_alloc(a, len); + if (dst) memcpy(dst, src, len); + return dst; +} + +// === 跨模块分配器(void* 接口,供 parser/sema 等模块复用)=== +void* arena_alloc_impl(void* alloc, size_t size) { + return arena_alloc((Arena*)alloc, size); +} + +char* arena_strdup_impl(void* alloc, const char* src, size_t len) { + char* dst = arena_alloc_impl(alloc, len + 1); + if (dst) { memcpy(dst, src, len); dst[len] = '\0'; } + return dst; +} diff --git a/src/util/arena.h b/src/util/arena.h new file mode 100644 index 0000000..dd88c0f --- /dev/null +++ b/src/util/arena.h @@ -0,0 +1,17 @@ +#ifndef ARENA_H +#define ARENA_H + +#include + +typedef struct Arena { + char* memory; + size_t capacity; + size_t offset; +} Arena; + +Arena arena_create(size_t capacity_mb); +void arena_destroy(Arena* a); +void* arena_alloc(Arena* a, size_t size); +char* arena_strdup(Arena* a, const char* src); + +#endif diff --git a/test/programs/01_arithmetic.l b/test/programs/01_arithmetic.l new file mode 100644 index 0000000..7fc48a3 --- /dev/null +++ b/test/programs/01_arithmetic.l @@ -0,0 +1,5 @@ +fn main() -> i64 { + let x: i64 = 1 + 2 * 3; + print_i64(x); + return 0; +} diff --git a/test/programs/02_if_else.l b/test/programs/02_if_else.l new file mode 100644 index 0000000..71e5fd8 --- /dev/null +++ b/test/programs/02_if_else.l @@ -0,0 +1,9 @@ +fn main() -> i64 { + let x: i64 = 10; + if x > 5 { + print_i64(1); + } else { + print_i64(0); + } + return 0; +} diff --git a/test/programs/03_recurse.l b/test/programs/03_recurse.l new file mode 100644 index 0000000..af9b658 --- /dev/null +++ b/test/programs/03_recurse.l @@ -0,0 +1,11 @@ +fn countdown(n: i64) -> i64 { + if n > 0 { + print_i64(n); + return countdown(n - 1); + } + return 0; +} + +fn main() -> i64 { + return countdown(5); +} diff --git a/test/programs/04_fib_recursive.l b/test/programs/04_fib_recursive.l new file mode 100644 index 0000000..cc2dde5 --- /dev/null +++ b/test/programs/04_fib_recursive.l @@ -0,0 +1,12 @@ +fn fib(n: i64) -> i64 { + if n < 2 { + return n; + } + return fib(n - 1) + fib(n - 2); +} + +fn main() -> i64 { + let result: i64 = fib(10); + print_i64(result); + return 0; +} diff --git a/test/programs/05_float.l b/test/programs/05_float.l new file mode 100644 index 0000000..9bf2c9c --- /dev/null +++ b/test/programs/05_float.l @@ -0,0 +1,14 @@ +fn square(x: f64) -> f64 { + return x * x; +} + +fn add_floats(a: f64, b: f64) -> f64 { + return a + b; +} + +fn main() -> i64 { + let s: f64 = square(3.0); + let sum: f64 = add_floats(s, 4.0); + print_f64(sum); + return 0; +} diff --git a/test/test_lexer.c b/test/test_lexer.c new file mode 100644 index 0000000..29194e1 --- /dev/null +++ b/test/test_lexer.c @@ -0,0 +1,53 @@ +#include "test_utils.h" +#include "lexer.h" +#include "arena.h" + +void test_simple_tokens() { + Arena a = arena_create(1); + const char* src = "fn main() { return 42; }"; + size_t count; ErrorInfo error = {0}; + Token* tokens = lex(&a, src, "test", &count, &error); + ASSERT(tokens != NULL); + ASSERT(count >= 8); + ASSERT(tokens[0].kind == TOK_FN); + ASSERT(tokens[1].kind == TOK_IDENT); + ASSERT(tokens[2].kind == TOK_LPAREN); + ASSERT(tokens[3].kind == TOK_RPAREN); + ASSERT(tokens[4].kind == TOK_LBRACE); + ASSERT(tokens[5].kind == TOK_RETURN); + ASSERT(tokens[6].kind == TOK_INT_LIT); + ASSERT(tok_int_value(&tokens[6]) == 42); + arena_destroy(&a); +} + +void test_keywords() { + Arena a = arena_create(1); + const char* src = "fn let if else while return i64 f64 bool void true false"; + TokenKind expected[] = {TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE, + TOK_RETURN, TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, TOK_TRUE, TOK_FALSE, TOK_EOF}; + size_t count; ErrorInfo error = {0}; + Token* tokens = lex(&a, src, "test", &count, &error); + ASSERT(tokens != NULL); + for (int i = 0; i < 13; i++) ASSERT(tokens[i].kind == expected[i]); + arena_destroy(&a); +} + +void test_operators() { + Arena a = arena_create(1); + const char* src = "+ - * / % == != < > <= >= && || ! ->"; + TokenKind expected[] = {TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT, + TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ, + TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, TOK_ARROW, TOK_EOF}; + size_t count; ErrorInfo error = {0}; + Token* tokens = lex(&a, src, "test", &count, &error); + ASSERT(tokens != NULL); + for (int i = 0; i < 16; i++) ASSERT(tokens[i].kind == expected[i]); + arena_destroy(&a); +} + +int main(void) { + TEST_RUN(test_simple_tokens); + TEST_RUN(test_keywords); + TEST_RUN(test_operators); + return test_summary(); +} diff --git a/test/test_parser.c b/test/test_parser.c new file mode 100644 index 0000000..31e0188 --- /dev/null +++ b/test/test_parser.c @@ -0,0 +1,68 @@ +#include "test_utils.h" +#include "parser.h" +#include "lexer.h" +#include "arena.h" + +static AstNode* parse_string(const char* src) { + Arena* a = malloc(sizeof(Arena)); + *a = arena_create(1); + size_t tcount; + ErrorInfo lex_err = {0}; + Token* tokens = lex(a, src, "test", &tcount, &lex_err); + if (!tokens) { arena_destroy(a); free(a); return NULL; } + ErrorInfo parse_err = {0}; + AstNode* ast = parse(a, tokens, tcount, "test", &parse_err); + if (!ast) { arena_destroy(a); free(a); return NULL; } + // NOTE: arena and tokens must stay alive for AST - leak intentionally in test + return ast; +} + +void test_simple_function() { + AstNode* ast = parse_string("fn main() { return 42; }"); + ASSERT(ast != NULL); + ASSERT(ast->kind == AST_PROGRAM); + ASSERT(ast->as.program.fn_count == 1); + AstNode* fn = ast->as.program.functions[0]; + ASSERT(fn->kind == AST_FUNCTION); +} + +void test_arithmetic_expr() { + AstNode* ast = parse_string("fn main() { return 1 + 2 * 3; }"); + ASSERT(ast != NULL); + AstNode* body = ast->as.program.functions[0]->as.function.body; + AstNode* ret = body->as.block.stmts[0]; + ASSERT(ret->kind == AST_RETURN_STMT); + AstNode* expr = ret->as.return_stmt.expr; + ASSERT(expr->kind == AST_BINARY_EXPR); + ASSERT(expr->as.binary.op == OP_ADD); + // 1 + (2 * 3): right should be *, left should be 1 + ASSERT(expr->as.binary.right->kind == AST_BINARY_EXPR); + ASSERT(expr->as.binary.right->as.binary.op == OP_MUL); +} + +void test_if_statement() { + AstNode* ast = parse_string("fn main() { if true { return 1; } else { return 0; } }"); + ASSERT(ast != NULL); +} + +void test_while_loop() { + AstNode* ast = parse_string("fn main() { while true { return; } }"); + ASSERT(ast != NULL); +} + +void test_function_with_params() { + AstNode* ast = parse_string("fn add(a: i64, b: i64) -> i64 { return a + b; }"); + ASSERT(ast != NULL); + AstNode* fn = ast->as.program.functions[0]; + ASSERT(fn->as.function.param_count == 2); + ASSERT(fn->as.function.return_type == TYPE_I64); +} + +int main(void) { + TEST_RUN(test_simple_function); + TEST_RUN(test_arithmetic_expr); + TEST_RUN(test_if_statement); + TEST_RUN(test_while_loop); + TEST_RUN(test_function_with_params); + return test_summary(); +} diff --git a/test/test_sema.c b/test/test_sema.c new file mode 100644 index 0000000..da0accf --- /dev/null +++ b/test/test_sema.c @@ -0,0 +1,59 @@ +#include "test_utils.h" +#include "parser.h" +#include "lexer.h" +#include "sema.h" +#include "arena.h" + +void test_type_error() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, "fn main() { let x: i64 = 1; let y: i64 = x + true; return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count > 0); + arena_destroy(&a); +} + +void test_undefined_var() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, "fn main() { let x: i64 = y; return; }", "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count > 0); + arena_destroy(&a); +} + +void test_simple_ok() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, "fn main() { let x: i64 = 42; print_i64(x); return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count == 0); + arena_destroy(&a); +} + +int main(void) { + TEST_RUN(test_type_error); + TEST_RUN(test_undefined_var); + TEST_RUN(test_simple_ok); + return test_summary(); +} diff --git a/test/test_utils.h b/test/test_utils.h new file mode 100644 index 0000000..93db5a7 --- /dev/null +++ b/test/test_utils.h @@ -0,0 +1,27 @@ +#ifndef TEST_UTILS_H +#define TEST_UTILS_H +#include +#include + +static int _tests_run = 0; +static int _tests_failed = 0; + +#define ASSERT(expr) do { \ + _tests_run++; \ + if (!(expr)) { \ + fprintf(stderr, "\033[1;31mFAIL\033[0m %s:%d: %s\n", __FILE__, __LINE__, #expr); \ + _tests_failed++; \ + } \ +} while(0) + +#define TEST_RUN(func) do { \ + fprintf(stderr, " RUN %s\n", #func); \ + func(); \ +} while(0) + +static inline int test_summary(void) { + fprintf(stderr, "\n%d tests, %d passed, %d failed\n", + _tests_run, _tests_run - _tests_failed, _tests_failed); + return _tests_failed > 0 ? 1 : 0; +} +#endif