feat: L Language v0.1 编译器完整实现
5 阶段编译流水线: 词法分析 → 语法分析(Pratt) → 语义分析(类型推断) → LLVM IR → .exe 模块: - lexer: 手写状态机, 40 种 Token, // 和 /* */ 注释 - parser: Pratt 表达式解析(9 级优先级) + 递归下降语句/函数 - ast: 14 种节点类型 + 工厂函数 - sema: 作用域链符号表 + 类型推断 + 类型检查 - codegen: AST → LLVM-C API, print_i64/f64/bool 内建 - driver: 命令行 + 流水线串联 + 错误报告 - util: Arena bump allocator (8MB) 测试: 65 单元测试(词法41+语法15+语义9) + 5 集成测试 全部通过 语言特性: i64/f64/bool/void, let不可变变量, if/else, while, 递归函数
This commit is contained in:
+37
@@ -0,0 +1,37 @@
|
||||
/bin/
|
||||
/build/
|
||||
/.idea/
|
||||
/.vscode/
|
||||
/.claude/
|
||||
/.trae/
|
||||
/.dist/
|
||||
*.iml
|
||||
|
||||
# 代码图索引
|
||||
/.codegraph/
|
||||
|
||||
# 编译产物
|
||||
*.o
|
||||
*.obj
|
||||
*.exe
|
||||
*.out
|
||||
*.a
|
||||
*.lib
|
||||
*.dll
|
||||
*.exp
|
||||
|
||||
# LLVM IR
|
||||
*.ll
|
||||
*.bc
|
||||
|
||||
# 临时文件
|
||||
*.tmp
|
||||
*.swp
|
||||
*~
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# 归档
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.7z
|
||||
@@ -0,0 +1,19 @@
|
||||
# Changelog
|
||||
|
||||
## 0.1.0 (2026-06-05)
|
||||
|
||||
### Added
|
||||
- 词法分析器:手写状态机,40 种 Token 类型,支持 `//` 和 `/* */` 注释
|
||||
- 语法分析器:Pratt 表达式解析(9 级优先级)+ 递归下降语句/函数解析
|
||||
- AST:14 种节点类型,工厂函数模式创建
|
||||
- 语义分析器:作用域链符号表 + 类型推断 + 类型检查
|
||||
- LLVM IR 代码生成:全 AST 节点覆盖,内建 `print_i64` / `print_f64` / `print_bool`
|
||||
- 驱动程序:命令行参数解析 + 编译流水线串联 + `--emit-ir` 调试模式
|
||||
- Arena bump allocator (8MB)
|
||||
- 错误报告:ANSI 红色高亮,文件名:行号:列号 格式
|
||||
- 类型系统:`i64` / `f64` / `bool` / `void`,`let` 不可变变量,类型推断
|
||||
- 控制流:`if` / `else`,`while` 循环,`return` 语句
|
||||
- 函数:多参数、递归、可选返回类型标注
|
||||
- 65 个单元测试 (词法 41 + 语法 15 + 语义 9)
|
||||
- 5 个集成测试 (算术、分支、递归、斐波那契、浮点)
|
||||
- CMake 构建系统,静态库 + 可执行文件 + 测试分离
|
||||
@@ -0,0 +1,184 @@
|
||||
# CLAUDE.md
|
||||
|
||||
## 项目概述
|
||||
|
||||
L Language v0.1 — 用 C17 实现的静态类型编译型编程语言,Rust 风格语法,LLVM 22.x 后端。经典 5 阶段流水线:词法 → 语法 → 语义 → IR → 可执行文件。
|
||||
|
||||
## 构建命令
|
||||
|
||||
```bash
|
||||
# 配置(仅首次)
|
||||
cd build
|
||||
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM"
|
||||
|
||||
# 编译
|
||||
mingw32-make -j4
|
||||
|
||||
# 编译单个目标
|
||||
mingw32-make l_lang
|
||||
mingw32-make l_lang_lib
|
||||
```
|
||||
|
||||
## 架构
|
||||
|
||||
```
|
||||
源文件(.l) → 词法分析 → 语法分析 → 语义分析 → IR 生成 → 可执行文件
|
||||
Token[] AstNode* 带类型AST LLVM Module .exe
|
||||
```
|
||||
|
||||
```
|
||||
L Language/
|
||||
├── include/
|
||||
│ └── l_lang.h 公共头文件 (TypeKind 枚举, 向前声明)
|
||||
├── src/
|
||||
│ ├── lexer/
|
||||
│ │ ├── token.h/c Token {kind, start, length, line, col}
|
||||
│ │ └── lexer.h/c 手写状态机,40 种 Token 类型
|
||||
│ ├── parser/
|
||||
│ │ └── parser.h/c Pratt 表达式 (9 级优先级) + 递归下降语句
|
||||
│ ├── ast/
|
||||
│ │ └── ast.h/c 14 种节点 (PROGRAM..IDENT_EXPR) + 工厂函数
|
||||
│ ├── sema/
|
||||
│ │ ├── symbol.h/c 作用域链 (Scope* parent 链表)
|
||||
│ │ └── sema.h/c 类型推断 + 类型检查 + 3 个内建函数注册
|
||||
│ ├── codegen/
|
||||
│ │ └── codegen.h/c AST → LLVM-C API → LLVMModuleRef
|
||||
│ ├── driver/
|
||||
│ │ ├── main.c 入口 + 命令行解析 + 流水线串联
|
||||
│ │ └── error.h/c ErrorInfo / ErrorList 错误报告
|
||||
│ └── util/
|
||||
│ └── arena.h/c Bump allocator (8MB, 8 字节对齐)
|
||||
├── test/
|
||||
│ ├── test_utils.h 断言宏 (ASSERT / TEST_RUN / test_summary)
|
||||
│ ├── test_lexer.c 词法测试 (41 tests)
|
||||
│ ├── test_parser.c 语法测试 (15 tests)
|
||||
│ ├── test_sema.c 语义测试 (9 tests)
|
||||
│ └── programs/ .l 集成测试 (5 个程序)
|
||||
├── docs/
|
||||
│ ├── PRD.md 产品需求文档
|
||||
│ └── superpowers/plans/ 实现计划
|
||||
├── CMakeLists.txt l_lang_lib (静态库) + l_lang (exe) + 测试
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## 核心 API 参考
|
||||
|
||||
### 词法分析
|
||||
|
||||
```c
|
||||
// lexer.h
|
||||
Token* lex(Arena* a, const char* source, const char* filename,
|
||||
size_t* count, ErrorInfo* error);
|
||||
// 返回: Token 数组(分配在 arena),出错返回 NULL
|
||||
// Token: {TokenKind kind, const char* start, int length, int line, int col}
|
||||
```
|
||||
|
||||
### 语法分析
|
||||
|
||||
```c
|
||||
// parser.h
|
||||
AstNode* parse(Arena* a, const Token* tokens, size_t count,
|
||||
const char* filename, ErrorInfo* error);
|
||||
// 返回: AST_PROGRAM 节点,出错返回 NULL
|
||||
// 支持: 所有语句 (let/if/while/return) + 表达式 (Pratt precedence climbing)
|
||||
```
|
||||
|
||||
### 语义分析
|
||||
|
||||
```c
|
||||
// sema.h
|
||||
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena);
|
||||
// 副作用: AST 节点填充 type 字段, errors 收集类型错误
|
||||
// 内建: scope_insert_function(print_i64, print_f64, print_bool)
|
||||
```
|
||||
|
||||
### 代码生成
|
||||
|
||||
```c
|
||||
// codegen.h
|
||||
LLVMModuleRef codegen_module(AstNode* ast, const char* module_name,
|
||||
const char** error_msg);
|
||||
// 返回: 已验证的 LLVM Module,出错返回 NULL
|
||||
// 内建 print_* 函数生成对应的 printf 调用
|
||||
```
|
||||
|
||||
## 类型系统
|
||||
|
||||
| L 类型 | LLVM 类型 | C 常量创建 |
|
||||
|--------|-----------|-----------|
|
||||
| `i64` | `LLVMInt64Type()` | `LLVMConstInt(ty, val, true)` |
|
||||
| `f64` | `LLVMDoubleType()` | `LLVMConstReal(ty, val)` |
|
||||
| `bool` | `LLVMInt1Type()` | `LLVMConstInt(ty, val, false)` |
|
||||
| `void` | `LLVMVoidType()` | — |
|
||||
|
||||
类型推断规则:
|
||||
- 字面量:`42` → `i64`, `3.14` → `f64`, `true` → `bool`
|
||||
- `let x = expr` → 从 expr 推断
|
||||
- `let x: i64 = expr` → 显式标注优先
|
||||
- 算术运算:i64 + i64 → i64, i64 + f64 → f64 (提升)
|
||||
- 比较运算:返回 `bool`
|
||||
|
||||
## 运算符优先级
|
||||
|
||||
| 优先级 | 运算符 |
|
||||
|--------|--------|
|
||||
| 70 (最高) | `-` (一元负), `!` (一元非) |
|
||||
| 60 | `*` `/` `%` |
|
||||
| 50 | `+` `-` |
|
||||
| 40 | `==` `!=` `<` `>` `<=` `>=` |
|
||||
| 30 | `&&` |
|
||||
| 20 | `\|\|` |
|
||||
| 10 (最低) | — |
|
||||
|
||||
## 错误处理
|
||||
|
||||
| 阶段 | 策略 |
|
||||
|------|------|
|
||||
| 词法分析 | 首个非法字符 → 立即终止,返回 ErrorInfo |
|
||||
| 语法分析 | 首个语法错误 → 立即终止,返回 ErrorInfo |
|
||||
| 语义分析 | 收集所有类型错误到 ErrorList → 批量输出 (ANSI 红色) |
|
||||
| IR 生成 | LLVMVerifyModule → 返回 char* 错误消息 |
|
||||
| 链接 | system() 返回值检查 → 打印 exit code |
|
||||
| 分配失败 | arena_alloc 返回 NULL → 逐层检查 |
|
||||
|
||||
|
||||
## 测试
|
||||
|
||||
```bash
|
||||
# 单元测试 (每个 test_*.c 独立编译运行,各有自己的 main)
|
||||
./l_lang_lexer_test.exe # 41 个断言
|
||||
./l_lang_test.exe # 15 个断言
|
||||
./l_lang_sema_test.exe # 9 个断言
|
||||
|
||||
# 集成测试 (编译 .l → 运行 .exe → 检查输出)
|
||||
for f in ../test/programs/*.l; do
|
||||
echo "=== $f ==="
|
||||
./l_lang.exe "$f" -o /tmp/out.exe && /tmp/out.exe
|
||||
done
|
||||
```
|
||||
|
||||
## 关键约束
|
||||
|
||||
- **C17 标准**:`-Wall -Wextra -g`,零编译警告
|
||||
- **Arena 分配**:Token、AST、符号表全部从 arena 分配,无 malloc/free 散落
|
||||
- **LLVM 路径**:`D:\settings\Language\LLVM`,C API 头文件手动补充(v22.1.7 预编译包缺少部分头文件)
|
||||
- **链接器**:MinGW 环境用 **gcc** 链接(非 clang,避免 MSVC 依赖)
|
||||
- **Windows**:仅支持 Windows 11 + MinGW-w64
|
||||
- **错误消息**:中文,格式 `文件名:行号:列号: 描述`
|
||||
|
||||
## 已知限制 (v0.1)
|
||||
|
||||
- `let` 变量不可变(无 `mut`),循环计数器无法修改 — 迭代算法需递归实现
|
||||
- 无字符串类型(`print_*` 是编译器内建,非语言特性)
|
||||
- 无数组、结构体、枚举、泛型、trait
|
||||
- 无模块系统(所有函数在单文件)
|
||||
- 作用域未清理(同函数内变量名不可重用)
|
||||
- `main` 返回值未被 OS 使用(需 CRT 包装)
|
||||
|
||||
## 版本号升级清单
|
||||
|
||||
| 文件 | 字段 |
|
||||
|------|------|
|
||||
| `CMakeLists.txt` | `VERSION` 变量 |
|
||||
| `README.md` | badges |
|
||||
| `CHANGELOG.md` | 版本标题 |
|
||||
@@ -0,0 +1,94 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(l_lang C)
|
||||
|
||||
set(CMAKE_C_STANDARD 17)
|
||||
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
|
||||
# === LLVM 配置 ===
|
||||
set(LLVM_PREFIX "D:/settings/Language/LLVM" CACHE PATH "LLVM installation prefix")
|
||||
|
||||
# 查找 LLVM(优先使用 CONFIG 模式,失败则手动配置)
|
||||
find_package(LLVM 19 QUIET CONFIG
|
||||
HINTS ${LLVM_PREFIX}/lib/cmake/llvm
|
||||
${LLVM_PREFIX}/cmake
|
||||
)
|
||||
|
||||
if(NOT LLVM_FOUND)
|
||||
message(STATUS "LLVM CONFIG not found — using manual LLVM paths")
|
||||
set(LLVM_FOUND TRUE)
|
||||
set(LLVM_INCLUDE_DIRS "${LLVM_PREFIX}/include")
|
||||
set(LLVM_LIBRARY_DIR "${LLVM_PREFIX}/lib")
|
||||
set(LLVM_LIBRARIES "${LLVM_PREFIX}/lib/LLVM-C.lib")
|
||||
# 标记使用手动模式
|
||||
set(LLVM_MANUAL_MODE ON)
|
||||
else()
|
||||
message(STATUS "LLVM found: ${LLVM_DIR}")
|
||||
message(STATUS "LLVM includes: ${LLVM_INCLUDE_DIRS}")
|
||||
message(STATUS "LLVM available libs: ${LLVM_AVAILABLE_LIBS}")
|
||||
set(LLVM_MANUAL_MODE OFF)
|
||||
endif()
|
||||
|
||||
# === 编译器库(不含 main.c,供测试复用)===
|
||||
file(GLOB_RECURSE L_LANG_LIB_SOURCES "src/*.c")
|
||||
list(REMOVE_ITEM L_LANG_LIB_SOURCES "${CMAKE_SOURCE_DIR}/src/driver/main.c")
|
||||
|
||||
add_library(l_lang_lib STATIC ${L_LANG_LIB_SOURCES})
|
||||
target_include_directories(l_lang_lib PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${LLVM_INCLUDE_DIRS}
|
||||
src/util src/lexer src/ast src/parser src/sema src/codegen src/driver
|
||||
)
|
||||
target_compile_options(l_lang_lib PRIVATE -Wall -Wextra -g)
|
||||
|
||||
# === 编译器可执行文件 ===
|
||||
add_executable(l_lang src/driver/main.c)
|
||||
target_link_libraries(l_lang PRIVATE l_lang_lib)
|
||||
target_include_directories(l_lang PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${LLVM_INCLUDE_DIRS}
|
||||
src/util src/lexer src/ast src/parser src/sema src/codegen src/driver
|
||||
)
|
||||
|
||||
# === 链接 LLVM(通过库)===
|
||||
if(LLVM_MANUAL_MODE)
|
||||
target_link_libraries(l_lang_lib PUBLIC ${LLVM_LIBRARIES})
|
||||
message(STATUS "Linking LLVM manually: ${LLVM_LIBRARIES}")
|
||||
else()
|
||||
target_link_libraries(l_lang_lib PUBLIC LLVM)
|
||||
endif()
|
||||
|
||||
# === LLVM 定义 ===
|
||||
if(NOT LLVM_MANUAL_MODE)
|
||||
target_compile_definitions(l_lang_lib PRIVATE ${LLVM_DEFINITIONS})
|
||||
endif()
|
||||
|
||||
# === 测试可执行文件(每个测试文件独立编译,各有自己的 main)===
|
||||
# Parser 测试
|
||||
add_executable(l_lang_test test/test_parser.c)
|
||||
target_link_libraries(l_lang_test PRIVATE l_lang_lib)
|
||||
target_include_directories(l_lang_test PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${LLVM_INCLUDE_DIRS}
|
||||
src/util src/lexer src/ast src/parser src/sema src/codegen src/driver
|
||||
test
|
||||
)
|
||||
|
||||
# Lexer 测试
|
||||
add_executable(l_lang_lexer_test test/test_lexer.c)
|
||||
target_link_libraries(l_lang_lexer_test PRIVATE l_lang_lib)
|
||||
target_include_directories(l_lang_lexer_test PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${LLVM_INCLUDE_DIRS}
|
||||
src/util src/lexer src/ast src/parser src/sema src/codegen src/driver
|
||||
test
|
||||
)
|
||||
|
||||
# Sema 测试
|
||||
add_executable(l_lang_sema_test test/test_sema.c)
|
||||
target_link_libraries(l_lang_sema_test PRIVATE l_lang_lib)
|
||||
target_include_directories(l_lang_sema_test PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${LLVM_INCLUDE_DIRS}
|
||||
src/util src/lexer src/ast src/parser src/sema src/codegen src/driver
|
||||
test
|
||||
)
|
||||
@@ -0,0 +1,36 @@
|
||||
# 贡献者行为准则
|
||||
|
||||
## 我们的承诺
|
||||
|
||||
为了营造一个开放和友好的环境,我们作为贡献者和维护者承诺:无论年龄、体型、残障、种族、性别认同和表达、经验水平、国籍、个人外貌、宗教、性取向或身份,参与本项目不会受到骚扰。
|
||||
|
||||
## 我们的标准
|
||||
|
||||
有助于创造积极环境的行为包括:
|
||||
|
||||
- 使用友好和包容的语言
|
||||
- 尊重不同的观点和经验
|
||||
- 优雅地接受建设性批评
|
||||
- 关注对社区最有利的事情
|
||||
- 对其他社区成员表示同理心
|
||||
|
||||
不可接受的行为包括:
|
||||
|
||||
- 使用性暗示语言或图像以及不受欢迎的性关注
|
||||
- 侮辱/贬损性评论以及人身攻击或政治攻击
|
||||
- 公开或私下的骚扰
|
||||
- 未经明确许可发布他人的私人信息
|
||||
|
||||
## 我们的责任
|
||||
|
||||
项目维护者有责任澄清可接受行为的标准,并应对任何不可接受的行为采取适当和公平的纠正措施。
|
||||
|
||||
## 范围
|
||||
|
||||
本行为准则适用于项目空间和代表项目的公共空间。
|
||||
|
||||
## 执行
|
||||
|
||||
可通过 GitHub Issues 或直接联系维护者报告辱骂、骚扰或其他不可接受的行为。所有投诉将被审查和调查,并将产生被认为必要且适合情况的回应。
|
||||
|
||||
本项目改编自 [Contributor Covenant](https://www.contributor-covenant.org) 2.1 版。
|
||||
@@ -0,0 +1,79 @@
|
||||
# 贡献指南
|
||||
|
||||
## 本地开发环境
|
||||
|
||||
- **GCC** 14.x+ (MinGW-w64)
|
||||
- **CMake** 3.20+
|
||||
- **LLVM** 22.x(需要 C API 库和头文件,路径见 CLAUDE.md)
|
||||
|
||||
## 开发流程
|
||||
|
||||
1. Fork 本仓库
|
||||
2. `git clone <你的 fork>`
|
||||
3. `git checkout -b feature/xxx`
|
||||
4. 开发 + 测试
|
||||
5. `git commit`(遵循约定式提交格式)
|
||||
6. `git push`
|
||||
7. 提交 Pull Request
|
||||
|
||||
## 运行测试
|
||||
|
||||
```bash
|
||||
cd build
|
||||
|
||||
# 单元测试
|
||||
./l_lang_lexer_test.exe # 词法分析
|
||||
./l_lang_test.exe # 语法分析
|
||||
./l_lang_sema_test.exe # 语义分析
|
||||
|
||||
# 集成测试
|
||||
for f in ../test/programs/*.l; do
|
||||
./l_lang.exe "$f" -o /tmp/out.exe && /tmp/out.exe
|
||||
done
|
||||
```
|
||||
|
||||
## 代码规范
|
||||
|
||||
### C 代码
|
||||
|
||||
- C17 标准,`-Wall -Wextra -g` 零警告
|
||||
- 注释用中文
|
||||
- 内存统一使用 Arena bump allocator,不在局部函数内 malloc/free
|
||||
- 错误信息格式:`文件名:行号:列号: 描述`
|
||||
- 函数聚焦(< 100 行),文件内聚(< 500 行)
|
||||
|
||||
### 编译器架构约定
|
||||
|
||||
- 每个编译阶段独立模块,通过头文件声明公共接口
|
||||
- 新增 AST 节点需同步更新 `sema.c` 和 `codegen.c`
|
||||
- 新增 Token 类型需同步更新 `lexer.c` 和 `NAMES[]` 数组
|
||||
- 新增内建函数需在 `sema.c`(注册签名)和 `codegen.c`(生成 IR)两处实现
|
||||
|
||||
## 提交格式
|
||||
|
||||
```
|
||||
<类型>: <描述>
|
||||
```
|
||||
|
||||
类型:`feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `perf`
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
src/
|
||||
├── lexer/ 词法分析器
|
||||
├── parser/ 语法分析器
|
||||
├── ast/ AST 定义
|
||||
├── sema/ 语义分析
|
||||
├── codegen/ LLVM IR 生成
|
||||
├── driver/ 主入口 + 错误报告
|
||||
└── util/ 内存池
|
||||
test/ 测试
|
||||
docs/ 文档
|
||||
```
|
||||
|
||||
## 开始贡献前
|
||||
|
||||
- 大改动建议先开 Issue 讨论
|
||||
- 新语言特性需要在 `test/programs/` 添加对应的集成测试
|
||||
- 不要引入编译警告
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 刘航宇 (LHY0125)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,238 @@
|
||||
<p align="center">
|
||||
<h1>L Language</h1>
|
||||
<p>用 C17 实现的静态类型编译型编程语言</p>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img src="https://img.shields.io/badge/version-0.1.0-blue" alt="version">
|
||||
<img src="https://img.shields.io/badge/C-17-555555" alt="C">
|
||||
<img src="https://img.shields.io/badge/LLVM-22.1.7-4B8BBE" alt="LLVM">
|
||||
<img src="https://img.shields.io/badge/GCC-15.x-darkgreen" alt="GCC">
|
||||
<img src="https://img.shields.io/badge/tests-70%20passed-brightgreen" alt="tests">
|
||||
<img src="https://img.shields.io/badge/license-MIT-green" alt="license">
|
||||
</p>
|
||||
|
||||
---
|
||||
|
||||
## 简介
|
||||
|
||||
L Language 是一门学习型编译语言,手写词法分析、递归下降 + Pratt 解析、语义分析和 LLVM IR 代码生成,最终生成原生可执行文件。语法借鉴 Rust,类型系统支持类型推断。
|
||||
|
||||
```rust
|
||||
fn fib(n: i64) -> i64 {
|
||||
if n < 2 { return n; }
|
||||
return fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
|
||||
fn main() -> i64 {
|
||||
print_i64(fib(10)); // 输出 55
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## 架构
|
||||
|
||||
```
|
||||
源码(.l) → 词法分析(Token) → 语法分析(AST) → 语义分析(类型标注) → LLVM IR → 可执行文件
|
||||
```
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph 前端["编译器前端"]
|
||||
Lexer[词法分析器<br/>手写状态机<br/>40 种 Token]
|
||||
Parser[语法分析器<br/>递归下降 + Pratt<br/>14 种 AST 节点]
|
||||
Sema[语义分析器<br/>作用域链 + 类型推断<br/>类型检查 + 错误收集]
|
||||
end
|
||||
|
||||
subgraph 后端["编译器后端"]
|
||||
Codegen[LLVM IR 生成<br/>AST → LLVM-C API<br/>内建 print 函数]
|
||||
Link[链接器<br/>clang/lld<br/>生成 .exe]
|
||||
end
|
||||
|
||||
subgraph 运行时["运行时支持"]
|
||||
Builtins[内建函数<br/>print_i64 / print_f64<br/>print_bool → printf]
|
||||
end
|
||||
|
||||
Source[源码 .l] --> Lexer
|
||||
Lexer --> Parser
|
||||
Parser --> Sema
|
||||
Sema --> Codegen
|
||||
Codegen --> Link
|
||||
Link --> Exe[可执行文件 .exe]
|
||||
Builtins -.-> Codegen
|
||||
```
|
||||
|
||||
### 模块职责
|
||||
|
||||
| 模块 | 输入 | 输出 | 核心结构 |
|
||||
|------|------|------|----------|
|
||||
| `lexer/` | `char*` 源码 | `Token[]` | `Token` {kind, start, length, line, col} |
|
||||
| `parser/` | `Token[]` | `AstNode*` | 14 种节点 (Program..IdentExpr) |
|
||||
| `ast/` | — | 工厂函数 | `AstNode` {kind, type, as{union}} |
|
||||
| `sema/` | `AstNode*` | 类型标注 | `Scope` 作用域链 + `Symbol` 符号表 |
|
||||
| `codegen/` | `AstNode*` | `LLVMModuleRef` | `CgCtx` {module, builder, var_table} |
|
||||
| `driver/` | 命令行参数 | exit code | 流水线串联 + 错误报告 |
|
||||
|
||||
## 功能 (v0.1)
|
||||
|
||||
### 类型系统
|
||||
|
||||
| 类型 | 关键字 | 说明 |
|
||||
|------|--------|------|
|
||||
| 64 位有符号整数 | `i64` | `42`, `-7` |
|
||||
| 64 位浮点数 | `f64` | `3.14`, `-0.5` |
|
||||
| 布尔值 | `bool` | `true`, `false` |
|
||||
| 无返回值 | `void` | 函数不返回值时使用 |
|
||||
|
||||
- `let` 不可变变量,支持可选类型标注和类型推断
|
||||
- 类型在编译时完全确定,无隐式转换(除 `i64` → `f64` 自动提升)
|
||||
|
||||
### 控制流
|
||||
|
||||
- `if` / `else` 条件分支(支持 `else if` 链)
|
||||
- `while` 循环
|
||||
- `return` 提前返回(可选带表达式)
|
||||
|
||||
### 函数
|
||||
|
||||
- 多参数,显式返回类型(可省略,默认 `void`)
|
||||
- 递归调用
|
||||
- 内建函数:`print_i64`, `print_f64`, `print_bool`
|
||||
|
||||
## 安装
|
||||
|
||||
### 依赖
|
||||
|
||||
- **GCC** 15.x (MinGW-w64)
|
||||
- **CMake** ≥ 3.20
|
||||
- **LLVM** 22.x(C API 库 + 头文件)
|
||||
|
||||
### 从源码构建
|
||||
|
||||
```bash
|
||||
git clone <repo-url>
|
||||
cd "L Language"
|
||||
mkdir build && cd build
|
||||
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM"
|
||||
mingw32-make -j4
|
||||
```
|
||||
|
||||
生成 `l_lang.exe`。
|
||||
|
||||
## 使用
|
||||
|
||||
```bash
|
||||
# 编译并运行
|
||||
./l_lang.exe example.l -o example.exe
|
||||
./example.exe
|
||||
|
||||
# 查看生成的 LLVM IR
|
||||
./l_lang.exe example.l --emit-ir
|
||||
```
|
||||
|
||||
## 开发
|
||||
|
||||
```bash
|
||||
# 构建
|
||||
cd build && mingw32-make -j4
|
||||
|
||||
# 运行全部测试 (65 单元 + 5 集成)
|
||||
./l_lang_lexer_test.exe # 词法分析 (41 tests)
|
||||
./l_lang_test.exe # 语法分析 (15 tests)
|
||||
./l_lang_sema_test.exe # 语义分析 (9 tests)
|
||||
|
||||
# 集成测试
|
||||
for f in ../test/programs/*.l; do
|
||||
./l_lang.exe "$f" -o out.exe && ./out.exe
|
||||
done
|
||||
```
|
||||
|
||||
### 技术栈
|
||||
|
||||
| 层 | 技术 |
|
||||
|----|------|
|
||||
| 实现语言 | C17 (GCC 15.x) |
|
||||
| 构建系统 | CMake 3.20+ |
|
||||
| IR 后端 | LLVM 22.1.7 C API |
|
||||
| 链接器 | clang / lld |
|
||||
| 内存管理 | Arena bump allocator |
|
||||
| 测试框架 | 手写断言宏 (ASSERT / TEST_RUN / test_summary) |
|
||||
|
||||
### 项目结构
|
||||
|
||||
```
|
||||
include/l_lang.h # 公共类型定义 (TypeKind, 向前声明)
|
||||
src/
|
||||
├── lexer/ # 词法分析器
|
||||
│ ├── token.h/c # Token 类型 + 工具函数
|
||||
│ └── lexer.h/c # 状态机 lex()
|
||||
├── parser/
|
||||
│ └── parser.h/c # Pratt 表达式 + 递归下降 parse()
|
||||
├── ast/
|
||||
│ └── ast.h/c # 14 种节点定义 + 创建函数
|
||||
├── sema/
|
||||
│ ├── symbol.h/c # 作用域链 (查/插)
|
||||
│ └── sema.h/c # 类型推断 + 检查 sema_analyze()
|
||||
├── codegen/
|
||||
│ └── codegen.h/c # AST → LLVM IR codegen_module()
|
||||
├── driver/
|
||||
│ ├── main.c # 入口 + 命令行 + 流水线串联
|
||||
│ └── error.h/c # 错误报告 (ErrorInfo / ErrorList)
|
||||
└── util/
|
||||
└── arena.h/c # Bump allocator (8MB)
|
||||
test/
|
||||
├── test_utils.h # 断言宏
|
||||
├── test_lexer.c # 词法测试 (41 tests)
|
||||
├── test_parser.c # 语法测试 (15 tests)
|
||||
├── test_sema.c # 语义测试 (9 tests)
|
||||
└── programs/ # 集成测试 (.l 源文件)
|
||||
docs/
|
||||
├── PRD.md # 产品需求文档
|
||||
└── superpowers/plans/ # 实现计划
|
||||
```
|
||||
|
||||
## 错误处理
|
||||
|
||||
| 阶段 | 策略 |
|
||||
|------|------|
|
||||
| 词法分析 | 首个非法字符即终止,报告 文件名:行:列: 错误信息 |
|
||||
| 语法分析 | 首个语法错误即终止,报告期望 vs 实际 |
|
||||
| 语义分析 | 收集所有类型错误后批量输出,红色 ANSI 高亮 |
|
||||
| IR 生成 | LLVMVerifyModule 验证失败 → 输出 LLVM 诊断 |
|
||||
| 链接 | system() 返回值检查,失败时打印 exit code |
|
||||
|
||||
## 贡献
|
||||
|
||||
欢迎提交 Issue 和 Pull Request。
|
||||
|
||||
### 本地开发环境
|
||||
|
||||
- GCC 14.x+ (MinGW-w64)
|
||||
- CMake 3.20+
|
||||
- LLVM 22.x(需要 C API 库和头文件)
|
||||
|
||||
### 代码规范
|
||||
|
||||
- C17 标准,`-Wall -Wextra -g` 零警告
|
||||
- 注释用中文
|
||||
- Arena 内存池贯穿全流水线,不在局部函数内 malloc
|
||||
- 错误信息格式:`文件名:行:列: 描述`
|
||||
- 提交格式:`<类型>: <描述>`(feat/fix/refactor/docs/test/chore)
|
||||
|
||||
## 版本号升级清单
|
||||
|
||||
版本号需在 **3 个地方** 手动修改:
|
||||
|
||||
| 文件 | 字段 | 说明 |
|
||||
|------|------|------|
|
||||
| `CMakeLists.txt` | `VERSION` 变量 | CMake `project()` |
|
||||
| `README.md` | badges | 文档徽章 |
|
||||
| `CHANGELOG.md` | 版本标题 | 变更日志 |
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT License
|
||||
|
||||
## 作者
|
||||
|
||||
[刘航宇](https://github.com/LHY0125) — 河南理工大学人工智能协会
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# 安全策略
|
||||
|
||||
## 报告漏洞
|
||||
|
||||
如果你发现安全漏洞,请**不要**在公开 Issue 中报告。请通过以下方式私下报告:
|
||||
|
||||
- GitHub: 在 [Security Advisories](https://github.com/LHY0125/l-language/security/advisories) 页面提交
|
||||
- 邮件: 联系项目维护者
|
||||
|
||||
我们会在 **48 小时内**确认收到报告,并在 7 天内提供初步评估和修复计划。
|
||||
|
||||
## 安全最佳实践
|
||||
|
||||
### 作为用户
|
||||
|
||||
- 仅从 [Releases](https://github.com/LHY0125/l-language/releases) 页面下载编译好的二进制文件
|
||||
- 编译的 `.l` 源文件在当前目录生成 `.o` 和 `.exe` 文件
|
||||
- 不要编译不可信来源的 `.l` 源文件
|
||||
|
||||
### 作为开发者
|
||||
|
||||
- 永远不要在源代码中硬编码密钥或凭据
|
||||
- 所有外部输入(源文件)在系统边界验证
|
||||
- Arena 分配失败时逐层返回 NULL,不做静默回退
|
||||
- `vsnprintf` 缓冲区用完时检查返回值
|
||||
- `strtoll` / `strtod` 解析 Token 值前检查长度边界
|
||||
|
||||
## 已知限制
|
||||
|
||||
- v0.1 不支持沙箱或权限控制 — 编译出的可执行文件具有当前用户的所有权限
|
||||
- `print_*` 内建函数直接调用 `printf`,format string 为硬编码常量,无注入风险
|
||||
- 不支持文件 I/O、网络、外部 FFI 调用(v0.1 语言能力有限,攻击面极小)
|
||||
|
||||
## 支持版本
|
||||
|
||||
| 版本 | 支持状态 |
|
||||
|------|----------|
|
||||
| v0.1.x | 活跃开发中 |
|
||||
|
||||
> v0.1 处于早期开发阶段,API 和语言语法可能发生破坏性变更。生产环境请勿使用。
|
||||
+405
@@ -0,0 +1,405 @@
|
||||
# L Language PRD(产品需求文档)
|
||||
|
||||
> 版本: v0.1 | 日期: 2026-06-04 | 作者: 刘航宇 (AI 辅助)
|
||||
|
||||
---
|
||||
|
||||
## 1. 项目概述
|
||||
|
||||
### 1.1 一句话描述
|
||||
|
||||
用 C 语言实现一门静态类型、Rust 风格语法、多范式混合的编译型编程语言 "L Language"。
|
||||
|
||||
### 1.2 目标
|
||||
|
||||
| | 短期(v0.1) | 远期 |
|
||||
|---|-------------|------|
|
||||
| 定位 | 学习编译器全流程 | 真正能用的通用编程语言 |
|
||||
| 能力 | 计算器级 — 基本类型、算术、if/while、函数 | 模块、泛型、trait、所有权等 |
|
||||
| 标准 | 跑通全流水线就是胜利 | 自举 |
|
||||
|
||||
### 1.3 非目标(v0.1 不做)
|
||||
|
||||
- 字符串类型(只有字面量用于 `print`)
|
||||
- 数组 / 切片 / 结构体
|
||||
- 模块系统和多文件编译
|
||||
- 泛型、trait、模式匹配
|
||||
- 任何标准库
|
||||
- 垃圾回收或自动内存管理
|
||||
|
||||
---
|
||||
|
||||
## 2. 语言规范(v0.1)
|
||||
|
||||
### 2.1 类型系统
|
||||
|
||||
| 类型 | 关键字 | 占位 | 示例 |
|
||||
|------|--------|------|------|
|
||||
| 有符号 64 位整数 | `i64` | 64 bit | `42`、`-7` |
|
||||
| 64 位浮点数 | `f64` | 64 bit | `3.14`、`-0.5` |
|
||||
| 布尔值 | `bool` | 1 bit | `true`、`false` |
|
||||
| 无返回值 | `void` | — | 函数不返回值时使用 |
|
||||
|
||||
类型推断规则:
|
||||
- `let` 声明时从初始化表达式推断类型,无需显式标注
|
||||
- 函数参数和返回值必须显式标注类型
|
||||
- 变量一旦推断出类型就固定(强类型、静态类型)
|
||||
|
||||
### 2.2 语法(EBNF 摘要)
|
||||
|
||||
```ebnf
|
||||
program = { function }
|
||||
function = "fn" IDENT "(" [params] ")" ["->" type] block
|
||||
params = param { "," param }
|
||||
param = IDENT ":" type
|
||||
type = "i64" | "f64" | "bool" | "void"
|
||||
|
||||
block = "{" { statement } [expression] "}"
|
||||
statement = let_stmt | if_stmt | while_stmt | return_stmt | expr_stmt
|
||||
let_stmt = "let" IDENT "=" expression ";" (* 变量不可变,无赋值语句 *)
|
||||
if_stmt = "if" expression block ["else" (if_stmt | block)]
|
||||
while_stmt = "while" expression block
|
||||
return_stmt = "return" [expression] ";"
|
||||
expr_stmt = expression ";"
|
||||
|
||||
expression = logical_or
|
||||
logical_or = logical_and { "||" logical_and }
|
||||
logical_and = comparison { "&&" comparison }
|
||||
comparison = term { ("==" | "!=" | "<" | ">" | "<=" | ">=") term }
|
||||
term = factor { ("+" | "-") factor }
|
||||
factor = unary { ("*" | "/" | "%") unary }
|
||||
unary = ("-" | "!") unary | primary
|
||||
primary = NUMBER | BOOL | IDENT | call | "(" expression ")"
|
||||
call = IDENT "(" [args] ")"
|
||||
args = expression { "," expression }
|
||||
```
|
||||
|
||||
### 2.3 内置函数(编译器提供,非语言特性)
|
||||
|
||||
| 函数 | 说明 |
|
||||
|------|------|
|
||||
| `print_i64(x: i64) -> void` | 打印整数并换行 |
|
||||
| `print_f64(x: f64) -> void` | 打印浮点数并换行 |
|
||||
| `print_bool(x: bool) -> void` | 打印布尔值并换行 |
|
||||
|
||||
### 2.4 示例程序
|
||||
|
||||
```rust
|
||||
fn fib(n: i64) -> i64 {
|
||||
if n < 2 {
|
||||
return n;
|
||||
}
|
||||
return fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
|
||||
fn main() -> i64 {
|
||||
let result = fib(10);
|
||||
print_i64(result); // 输出: 55
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. 编译器架构
|
||||
|
||||
### 3.1 整体流水线
|
||||
|
||||
```
|
||||
源文件(.l) ──▶ 词法分析 ──▶ 语法分析 ──▶ 语义分析 ──▶ IR生成 ──▶ 可执行文件(.exe)
|
||||
Token流 AST 带类型AST LLVM Module 机器码
|
||||
```
|
||||
|
||||
### 3.2 各阶段输入输出
|
||||
|
||||
| 阶段 | 输入 | 输出 | 关键数据结构 |
|
||||
|------|------|------|-------------|
|
||||
| 词法分析 | `char*` 源码 | Token 数组 | `Token`(类型 + 行号 + 列号 + 值)|
|
||||
| 语法分析 | Token 数组 | AST 根节点 | `AstNode`(递归树,每个节点有类型枚举 + 子节点)|
|
||||
| 语义分析 | AST 根节点 | 带类型标注的 AST | 在 `AstNode` 上附加 `TypeInfo` |
|
||||
| IR 生成 | 带类型 AST | LLVM Module | `LLVMModuleRef`、`LLVMValueRef` 等 |
|
||||
| 代码生成 | LLVM Module | `.exe` 可执行文件 | LLVM 的 `LLVMTargetMachineEmitToFile` |
|
||||
|
||||
### 3.3 错误处理策略
|
||||
|
||||
- 词法/语法错误:打印 `文件:行号:列号: 错误信息` 后立即终止
|
||||
- 语义错误(类型不匹配等):收集当前阶段所有错误后统一输出,再终止
|
||||
- 不尝试错误恢复,不做增量编译
|
||||
|
||||
---
|
||||
|
||||
## 4. 模块详细设计
|
||||
|
||||
### 4.1 词法分析器(Lexer)
|
||||
|
||||
**职责**:将源代码字符串转换为 Token 流
|
||||
|
||||
**Token 类型清单**:
|
||||
|
||||
| 类别 | Token |
|
||||
|------|-------|
|
||||
| 关键字 | `fn` `let` `if` `else` `while` `return` `true` `false` |
|
||||
| 类型 | `i64` `f64` `bool` `void` |
|
||||
| 字面量 | 整数、浮点数 |
|
||||
| 标识符 | 用户定义的变量名、函数名 |
|
||||
| 运算符/分隔 | `+` `-` `*` `/` `%` `==` `!=` `<` `>` `<=` `>=` `&&` `||` `!` `=` `->` `(` `)` `{` `}` `,` `:` `;` |
|
||||
|
||||
**实现要点**:
|
||||
- 手写状态机,不依赖 flex/lex
|
||||
- 跳过空白(空格、`\t`、`\r`)和注释(`//` 行注释 + `/* */` 块注释)
|
||||
- 每个 Token 记录行号和列号,用于错误报告
|
||||
- 关键字通过哈希表或完美哈希识别
|
||||
|
||||
**关键函数签名**:
|
||||
```c
|
||||
Token* lex(const char* source, size_t* token_count, ErrorInfo* error);
|
||||
```
|
||||
|
||||
### 4.2 语法分析器(Parser)
|
||||
|
||||
**职责**:将 Token 流转换为抽象语法树
|
||||
|
||||
**实现方式**:手写递归下降解析器(Pratt parsing 处理表达式)
|
||||
|
||||
**AST 节点类型**:
|
||||
|
||||
```
|
||||
Program — 程序根节点,包含多个函数
|
||||
Function — 函数定义(名称、参数列表、返回类型、函数体)
|
||||
Parameter — 函数参数(名称、类型)
|
||||
Block — 代码块,包含语句列表
|
||||
LetStmt — let 声明(不可变变量)
|
||||
IfStmt — if 语句(条件、then块、可选的else块)
|
||||
WhileStmt — while 循环
|
||||
ReturnStmt — return 语句
|
||||
BinaryExpr — 二元运算(运算符 + 左右操作数)
|
||||
UnaryExpr — 一元运算(-、!)
|
||||
CallExpr — 函数调用
|
||||
LiteralExpr — 字面量(整数、浮点、布尔)
|
||||
IdentifierExpr — 标识符引用
|
||||
```
|
||||
|
||||
**关键函数签名**:
|
||||
```c
|
||||
AstNode* parse(const Token* tokens, size_t token_count, ErrorInfo* error);
|
||||
```
|
||||
|
||||
### 4.3 语义分析器(Sema / Semantic Analyzer)
|
||||
|
||||
**职责**:类型推断和类型检查
|
||||
|
||||
**核心工作**:
|
||||
1. **符号表管理** — 作用域栈(全局作用域 → 函数作用域 → 块作用域)
|
||||
2. **类型推断** — 从 `let x = 42` 推断出 `x: i64`
|
||||
3. **类型检查** — `if`/`while` 条件必须是 `bool`;二元运算两边类型必须一致
|
||||
4. **隐式类型转换** — 整数可自动提升为浮点数(`i64` → `f64`)
|
||||
5. **函数签名检查** — 调用时参数数量和类型必须匹配声明
|
||||
6. **未定义检查** — 所有引用的标识符必须在作用域内已定义
|
||||
|
||||
**数据结构**:
|
||||
```c
|
||||
typedef struct {
|
||||
const char* name; // 符号名称
|
||||
TypeKind type; // 推断出的类型
|
||||
SymbolKind kind; // 变量 / 参数 / 函数
|
||||
// 函数符号额外信息
|
||||
TypeKind return_type;
|
||||
TypeKind* param_types;
|
||||
size_t param_count;
|
||||
} Symbol;
|
||||
|
||||
typedef struct Scope {
|
||||
Symbol* symbols; // 当前作用域的符号表
|
||||
size_t count;
|
||||
struct Scope* parent; // 上级作用域
|
||||
} Scope;
|
||||
```
|
||||
|
||||
**关键函数签名**:
|
||||
```c
|
||||
void analyze(AstNode* ast, ErrorList* errors);
|
||||
```
|
||||
|
||||
### 4.4 LLVM IR 生成器(Codegen)
|
||||
|
||||
**职责**:遍历带类型的 AST,调用 LLVM-C API 生成 LLVM IR
|
||||
|
||||
**类型映射**:
|
||||
|
||||
| L 类型 | LLVM 类型 |
|
||||
|--------|-----------|
|
||||
| `i64` | `LLVMInt64Type()` |
|
||||
| `f64` | `LLVMDoubleType()` |
|
||||
| `bool` | `LLVMInt1Type()` |
|
||||
| `void` | `LLVMVoidType()` |
|
||||
|
||||
**各 AST 节点的生成策略**:
|
||||
|
||||
| AST 节点 | IR 生成策略 |
|
||||
|----------|------------|
|
||||
| `Function` | 创建 `LLVMAddFunction`,分配 entry BB,生成函数体 |
|
||||
| `Block` | 顺序生成每条语句/表达式 |
|
||||
| `LetStmt` | `alloca` 分配栈空间,计算初始化表达式,`store` |
|
||||
| `BinaryExpr` | 生成左右操作数,按运算符选 `LLVMBuildAdd`/`LLVMBuildSub`/... |
|
||||
| `IfStmt` | 创建 3 个 BB: then/else/merge,`LLVMBuildCondBr` |
|
||||
| `WhileStmt` | 创建 cond/body/merge 三个 BB,`LLVMBuildCondBr` + `LLVMBuildBr` |
|
||||
| `CallExpr` | 查找函数,`LLVMBuildCall2` |
|
||||
| `ReturnStmt` | `LLVMBuildRet` |
|
||||
| `LiteralExpr` | `LLVMConstInt`/`LLVMConstReal` |
|
||||
| 标识符读取 | 从 `alloca` 地址 `LLVMBuildLoad2` |
|
||||
|
||||
**内置函数实现**:`print_i64`/`print_f64`/`print_bool` 在编译器内部用 C `printf` 实现,生成时直接映射到 LLVM IR 调用 `printf`。
|
||||
|
||||
**关键函数签名**:
|
||||
```c
|
||||
LLVMModuleRef codegen(AstNode* ast, const char* module_name);
|
||||
```
|
||||
|
||||
### 4.5 驱动层(Driver)
|
||||
|
||||
**职责**:串联各阶段,处理命令行参数
|
||||
|
||||
```
|
||||
l-language.exe <source.l> [-o <output>] [--emit-ir]
|
||||
--emit-ir 输出 LLVM IR 文本(.ll),不生成可执行文件
|
||||
-o <file> 指定输出文件名(默认 a.exe)
|
||||
```
|
||||
|
||||
**流程**:
|
||||
1. 读取源文件到内存
|
||||
2. 调用 `lex()` → 检查词法错误
|
||||
3. 调用 `parse()` → 检查语法错误
|
||||
4. 调用 `analyze()` → 检查语义错误
|
||||
5. 调用 `codegen()` → 生成 LLVM Module
|
||||
6. 调用 `LLVMTargetMachineEmitToFile()` → 输出目标文件
|
||||
7. 调用系统链接器(`clang` 或 `gcc`)→ 生成可执行文件
|
||||
|
||||
---
|
||||
|
||||
## 5. 开发阶段划分
|
||||
|
||||
### Phase 1:基础设施(预计 2-3 天)
|
||||
|
||||
- [ ] CMake 构建系统(查找 LLVM、配置编译选项)
|
||||
- [ ] 词法分析器:完整的 Token 识别 + 注释跳过
|
||||
- [ ] 单元测试框架搭建(CUnit 或手写断言宏)
|
||||
- [ ] 错误报告基础设施(行号/列号 + 彩色输出)
|
||||
|
||||
### Phase 2:表达式计算(预计 2-3 天)
|
||||
|
||||
- [ ] AST 数据结构定义
|
||||
- [ ] Pratt 表达式解析器(算术、比较、逻辑)
|
||||
- [ ] 字面量 + 一元运算 + 二元运算的 IR 生成
|
||||
- [ ] 生成第一个可执行文件:`print_i64(1 + 2 * 3)`
|
||||
|
||||
### Phase 3:变量和控制流(预计 3-4 天)
|
||||
|
||||
- [ ] `let` 声明 + 标识符引用
|
||||
- [ ] 语义分析:符号表 + 类型推断
|
||||
- [ ] `if` / `else` 语句
|
||||
- [ ] `while` 循环
|
||||
- [ ] `return` 语句
|
||||
|
||||
### Phase 4:函数(预计 3-4 天)
|
||||
|
||||
- [ ] 函数定义解析(参数 + 返回类型)
|
||||
- [ ] 函数调用 IR 生成
|
||||
- [ ] 作用域链管理
|
||||
- [ ] 完整的斐波那契程序跑通
|
||||
|
||||
### Phase 5:集成验证(预计 1-2 天)
|
||||
|
||||
- [ ] 端到端测试(多个 `.l` 程序编译运行验证结果)
|
||||
- [ ] README 文档
|
||||
- [ ] 编译错误信息完善
|
||||
|
||||
**总预计工期**:约 2-3 周(每天投入 2-4 小时)
|
||||
|
||||
---
|
||||
|
||||
## 6. 技术依赖
|
||||
|
||||
| 依赖 | 版本 | 用途 |
|
||||
|------|------|------|
|
||||
| C 编译器 | GCC 14.x (MinGW) | 编译编译器自身 |
|
||||
| CMake | ≥ 3.20 | 构建系统 |
|
||||
| LLVM | 19.x | IR 生成 + 目标代码输出 |
|
||||
| 操作系统 | Windows 11 | 开发和运行 |
|
||||
|
||||
LLVM 安装路径:`D:\settings\Language\LLVM`
|
||||
|
||||
---
|
||||
|
||||
## 7. 目录结构
|
||||
|
||||
```
|
||||
L Language/
|
||||
├── docs/
|
||||
│ └── PRD.md 本文档
|
||||
├── src/
|
||||
│ ├── lexer/
|
||||
│ │ ├── lexer.c 词法分析器主逻辑
|
||||
│ │ ├── token.c Token 数据结构
|
||||
│ │ └── lexer.h
|
||||
│ ├── parser/
|
||||
│ │ ├── parser.c 递归下降 + Pratt 解析
|
||||
│ │ └── parser.h
|
||||
│ ├── ast/
|
||||
│ │ ├── ast.c AST 节点创建/销毁
|
||||
│ │ └── ast.h
|
||||
│ ├── sema/
|
||||
│ │ ├── sema.c 语义分析 + 类型检查
|
||||
│ │ ├── symbol.c 符号表管理
|
||||
│ │ └── sema.h
|
||||
│ ├── codegen/
|
||||
│ │ ├── codegen.c LLVM IR 生成
|
||||
│ │ └── codegen.h
|
||||
│ ├── driver/
|
||||
│ │ ├── main.c 入口 + 命令行参数
|
||||
│ │ └── error.c 错误报告
|
||||
│ └── util/
|
||||
│ └── arena.c 内存池(简化内存管理)
|
||||
├── include/
|
||||
│ └── l_lang.h 公共头文件(类型定义等)
|
||||
├── test/
|
||||
│ ├── test_lexer.c
|
||||
│ ├── test_parser.c
|
||||
│ ├── test_sema.c
|
||||
│ ├── test_codegen.c
|
||||
│ └── programs/ .l 测试程序
|
||||
│ ├── hello.l
|
||||
│ ├── fib.l
|
||||
│ └── ...
|
||||
├── CMakeLists.txt
|
||||
└── README.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. 成功标准
|
||||
|
||||
v0.1 完成的判定标准:
|
||||
|
||||
1. 斐波那契程序(递归 + 循环两个版本)编译并输出正确结果
|
||||
2. 至少 3 个不同算例编译运行通过
|
||||
3. 以下语法元素均有覆盖:
|
||||
- [x] `let` 变量声明和类型推断
|
||||
- [x] 算术运算(`+` `-` `*` `/` `%`)
|
||||
- [x] 比较运算(`==` `!=` `<` `>` `<=` `>=`)
|
||||
- [x] 逻辑运算(`&&` `||` `!`)
|
||||
- [x] `if` / `else` 控制流
|
||||
- [x] `while` 循环
|
||||
- [x] 函数定义和调用
|
||||
- [x] 递归
|
||||
4. 类型错误能被正确检测并给出可读的错误信息
|
||||
|
||||
---
|
||||
|
||||
## 9. 风险与缓解
|
||||
|
||||
| 风险 | 概率 | 缓解措施 |
|
||||
|------|------|----------|
|
||||
| LLVM-C API 复杂度过高 | 中 | 先用 LLVM 官方 Kaleidoscope 教程预热,理解核心 API |
|
||||
| 类型推断实现困难 | 中 | v0.1 只做最简单的 "从初始化表达式推断",不涉及 Hindley-Milner 或泛型 |
|
||||
| 递归函数 IR 栈管理出错 | 中 | 所有变量用 `alloca`(栈分配),LLVM 的 `mem2reg` pass 自动优化 |
|
||||
| Windows/MinGW + LLVM 兼容问题 | 低 | 提前验证 LLVM 安装和 CMake 能找到 LLVM |
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,38 @@
|
||||
#ifndef L_LANG_H
|
||||
#define L_LANG_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// === 类型系统 ===
|
||||
typedef enum {
|
||||
TYPE_I64,
|
||||
TYPE_F64,
|
||||
TYPE_BOOL,
|
||||
TYPE_VOID,
|
||||
TYPE_UNKNOWN, // 尚未推断
|
||||
TYPE_ERROR, // 类型错误
|
||||
} TypeKind;
|
||||
|
||||
static inline const char* type_name(TypeKind kind) {
|
||||
switch (kind) {
|
||||
case TYPE_I64: return "i64";
|
||||
case TYPE_F64: return "f64";
|
||||
case TYPE_BOOL: return "bool";
|
||||
case TYPE_VOID: return "void";
|
||||
default: return "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
// === 向前声明 ===
|
||||
typedef struct Token Token;
|
||||
typedef struct AstNode AstNode;
|
||||
typedef struct Scope Scope;
|
||||
typedef struct Arena Arena;
|
||||
|
||||
// === 跨模块分配器接口(避免循环依赖,各模块通过 void* 使用 arena)===
|
||||
void* arena_alloc_impl(void* alloc, size_t size);
|
||||
char* arena_strdup_impl(void* alloc, const char* src, size_t len);
|
||||
|
||||
#endif
|
||||
+113
@@ -0,0 +1,113 @@
|
||||
#include "ast.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
// 使用宏简化节点创建
|
||||
#define NEW(alloc, k) \
|
||||
AstNode* n = (AstNode*)arena_alloc_impl(alloc, sizeof(AstNode)); \
|
||||
n->kind = (k); n->type.kind = TYPE_UNKNOWN; \
|
||||
n->line = line; n->col = col
|
||||
|
||||
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col) {
|
||||
NEW(alloc, AST_PROGRAM);
|
||||
n->as.program.functions = fns;
|
||||
n->as.program.fn_count = count;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount,
|
||||
TypeKind ret, AstNode* body, int line, int col) {
|
||||
NEW(alloc, AST_FUNCTION);
|
||||
n->as.function.name = name; n->as.function.params = params;
|
||||
n->as.function.param_count = pcount; n->as.function.return_type = ret;
|
||||
n->as.function.body = body;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col) {
|
||||
NEW(alloc, AST_PARAMETER);
|
||||
n->as.parameter.name = name; n->as.parameter.type = type;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col) {
|
||||
NEW(alloc, AST_BLOCK);
|
||||
n->as.block.stmts = stmts; n->as.block.stmt_count = count;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, AstNode* init, int line, int col) {
|
||||
NEW(alloc, AST_LET_STMT);
|
||||
n->as.let_stmt.name = name; n->as.let_stmt.annot_type = annot_type;
|
||||
n->as.let_stmt.has_type_annot = has_type_annot; n->as.let_stmt.init = init;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col) {
|
||||
NEW(alloc, AST_IF_STMT);
|
||||
n->as.if_stmt.cond = cond; n->as.if_stmt.then_block = then_b;
|
||||
n->as.if_stmt.else_block = else_b;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col) {
|
||||
NEW(alloc, AST_WHILE_STMT);
|
||||
n->as.while_stmt.cond = cond; n->as.while_stmt.body = body;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col) {
|
||||
NEW(alloc, AST_RETURN_STMT);
|
||||
n->as.return_stmt.expr = expr;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col) {
|
||||
NEW(alloc, AST_EXPR_STMT);
|
||||
n->as.expr_stmt.expr = expr;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col) {
|
||||
NEW(alloc, AST_BINARY_EXPR);
|
||||
n->as.binary.op = op; n->as.binary.left = left; n->as.binary.right = right;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col) {
|
||||
NEW(alloc, AST_UNARY_EXPR);
|
||||
n->as.unary.op = op; n->as.unary.operand = operand;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col) {
|
||||
NEW(alloc, AST_CALL_EXPR);
|
||||
n->as.call.name = name; n->as.call.args = args; n->as.call.arg_count = count;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col) {
|
||||
NEW(alloc, AST_LITERAL_EXPR);
|
||||
n->as.literal.lit_type = TYPE_I64; n->as.literal.i64_val = val;
|
||||
n->type.kind = TYPE_I64;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col) {
|
||||
NEW(alloc, AST_LITERAL_EXPR);
|
||||
n->as.literal.lit_type = TYPE_F64; n->as.literal.f64_val = val;
|
||||
n->type.kind = TYPE_F64;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col) {
|
||||
NEW(alloc, AST_LITERAL_EXPR);
|
||||
n->as.literal.lit_type = TYPE_BOOL; n->as.literal.bool_val = val;
|
||||
n->type.kind = TYPE_BOOL;
|
||||
return n;
|
||||
}
|
||||
|
||||
AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) {
|
||||
NEW(alloc, AST_IDENT_EXPR);
|
||||
n->as.ident.name = name;
|
||||
return n;
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
#ifndef AST_H
|
||||
#define AST_H
|
||||
|
||||
#include "l_lang.h"
|
||||
#include <stddef.h>
|
||||
|
||||
typedef enum {
|
||||
AST_PROGRAM,
|
||||
AST_FUNCTION,
|
||||
AST_PARAMETER,
|
||||
AST_BLOCK,
|
||||
AST_LET_STMT,
|
||||
AST_IF_STMT,
|
||||
AST_WHILE_STMT,
|
||||
AST_RETURN_STMT,
|
||||
AST_EXPR_STMT,
|
||||
AST_BINARY_EXPR,
|
||||
AST_UNARY_EXPR,
|
||||
AST_CALL_EXPR,
|
||||
AST_LITERAL_EXPR,
|
||||
AST_IDENT_EXPR,
|
||||
} AstKind;
|
||||
|
||||
typedef enum {
|
||||
OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD,
|
||||
OP_EQ, OP_NE, OP_LT, OP_GT, OP_LE, OP_GE,
|
||||
OP_AND, OP_OR,
|
||||
OP_NEG, OP_NOT,
|
||||
} BinaryOp;
|
||||
|
||||
// 类型信息(语义分析阶段填充)
|
||||
typedef struct {
|
||||
TypeKind kind;
|
||||
} TypeInfo;
|
||||
|
||||
// AST 节点
|
||||
struct AstNode {
|
||||
AstKind kind;
|
||||
TypeInfo type; // 语义分析后填充,默认为 TYPE_UNKNOWN
|
||||
int line; // 源文件行号
|
||||
int col; // 源文件列号
|
||||
|
||||
// 节点特有数据(按 kind 解释)
|
||||
union {
|
||||
// AST_PROGRAM
|
||||
struct { struct AstNode** functions; size_t fn_count; } program;
|
||||
// AST_FUNCTION
|
||||
struct { const char* name; struct AstNode** params; size_t param_count;
|
||||
TypeKind return_type; struct AstNode* body; } function;
|
||||
// AST_PARAMETER
|
||||
struct { const char* name; TypeKind type; } parameter;
|
||||
// AST_BLOCK
|
||||
struct { struct AstNode** stmts; size_t stmt_count; } block;
|
||||
// AST_LET_STMT
|
||||
struct { const char* name; TypeKind annot_type; bool has_type_annot; struct AstNode* init; } let_stmt;
|
||||
// AST_IF_STMT
|
||||
struct { struct AstNode* cond; struct AstNode* then_block; struct AstNode* else_block; } if_stmt;
|
||||
// AST_WHILE_STMT
|
||||
struct { struct AstNode* cond; struct AstNode* body; } while_stmt;
|
||||
// AST_RETURN_STMT
|
||||
struct { struct AstNode* expr; } return_stmt;
|
||||
// AST_EXPR_STMT
|
||||
struct { struct AstNode* expr; } expr_stmt;
|
||||
// AST_BINARY_EXPR
|
||||
struct { BinaryOp op; struct AstNode* left; struct AstNode* right; } binary;
|
||||
// AST_UNARY_EXPR
|
||||
struct { BinaryOp op; struct AstNode* operand; } unary;
|
||||
// AST_CALL_EXPR
|
||||
struct { const char* name; struct AstNode** args; size_t arg_count; } call;
|
||||
// AST_LITERAL_EXPR
|
||||
struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; }; } literal;
|
||||
// AST_IDENT_EXPR
|
||||
struct { const char* name; } ident;
|
||||
} as;
|
||||
};
|
||||
|
||||
// 创建节点的辅助函数(内存来自 arena,通过 void* 传递避免循环依赖)
|
||||
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col);
|
||||
AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount,
|
||||
TypeKind ret, AstNode* body, int line, int col);
|
||||
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col);
|
||||
AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col);
|
||||
AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, AstNode* init, int line, int col);
|
||||
AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col);
|
||||
AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col);
|
||||
AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col);
|
||||
AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col);
|
||||
AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col);
|
||||
AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col);
|
||||
AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col);
|
||||
AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col);
|
||||
AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col);
|
||||
AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col);
|
||||
AstNode* ast_make_ident(void* alloc, const char* name, int line, int col);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,383 @@
|
||||
#include "codegen.h"
|
||||
#include <llvm-c/Analysis.h>
|
||||
#include <llvm-c/Types.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// === 内部状态 ===
|
||||
typedef struct VarEntry {
|
||||
const char* name;
|
||||
LLVMValueRef alloca;
|
||||
struct VarEntry* next;
|
||||
} VarEntry;
|
||||
|
||||
typedef struct FnEntry {
|
||||
const char* name;
|
||||
LLVMValueRef fn;
|
||||
TypeKind ret;
|
||||
TypeKind* params;
|
||||
size_t pc;
|
||||
struct FnEntry* next;
|
||||
} FnEntry;
|
||||
|
||||
typedef struct {
|
||||
LLVMContextRef context; // LLVM 19+ 需要显式 Context
|
||||
LLVMModuleRef module;
|
||||
LLVMBuilderRef builder;
|
||||
VarEntry* var_table;
|
||||
const char* error;
|
||||
FnEntry* fn_table;
|
||||
// printf 运行时支持(内置 print 函数委托给 printf)
|
||||
LLVMValueRef printf_fn;
|
||||
LLVMTypeRef printf_ty;
|
||||
} CgCtx;
|
||||
|
||||
// === 类型映射(需要 Context)===
|
||||
static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) {
|
||||
switch (kind) {
|
||||
case TYPE_I64: return LLVMInt64TypeInContext(ctx->context);
|
||||
case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context);
|
||||
case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context);
|
||||
default: return LLVMVoidTypeInContext(ctx->context);
|
||||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) {
|
||||
switch (lit->as.literal.lit_type) {
|
||||
case TYPE_I64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true);
|
||||
case TYPE_F64: return LLVMConstReal(ty, lit->as.literal.f64_val);
|
||||
case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false);
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// === 变量表 ===
|
||||
static LLVMValueRef find_var(CgCtx* ctx, const char* name) {
|
||||
for (VarEntry* e = ctx->var_table; e; e = e->next)
|
||||
if (strcmp(e->name, name) == 0) return e->alloca;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void add_var(CgCtx* ctx, const char* name, LLVMValueRef alloca) {
|
||||
VarEntry* e = malloc(sizeof(*e));
|
||||
e->name = name; e->alloca = alloca; e->next = ctx->var_table;
|
||||
ctx->var_table = e;
|
||||
}
|
||||
|
||||
// === 函数表 ===
|
||||
static LLVMValueRef find_fn(CgCtx* ctx, const char* name) {
|
||||
for (FnEntry* e = ctx->fn_table; e; e = e->next)
|
||||
if (strcmp(e->name, name) == 0) return e->fn;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void add_fn(CgCtx* ctx, const char* name, LLVMValueRef fn) {
|
||||
FnEntry* e = malloc(sizeof(*e));
|
||||
e->name = name; e->fn = fn;
|
||||
e->ret = TYPE_VOID;
|
||||
e->params = NULL;
|
||||
e->pc = 0;
|
||||
e->next = ctx->fn_table;
|
||||
ctx->fn_table = e;
|
||||
}
|
||||
|
||||
// === 向前声明 ===
|
||||
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node);
|
||||
static void codegen_stmt(CgCtx* ctx, AstNode* node);
|
||||
|
||||
// === 表达式代码生成 ===
|
||||
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
|
||||
if (!node) return NULL;
|
||||
|
||||
switch (node->kind) {
|
||||
case AST_LITERAL_EXPR:
|
||||
return to_llvm_const(to_llvm_type(ctx, node->type.kind), node);
|
||||
|
||||
case AST_IDENT_EXPR: {
|
||||
LLVMValueRef ptr = find_var(ctx, node->as.ident.name);
|
||||
if (!ptr) return NULL;
|
||||
return LLVMBuildLoad2(ctx->builder, to_llvm_type(ctx, node->type.kind), ptr, "load");
|
||||
}
|
||||
|
||||
case AST_UNARY_EXPR: {
|
||||
LLVMValueRef operand = codegen_expr(ctx, node->as.unary.operand);
|
||||
if (!operand) return NULL;
|
||||
if (node->as.unary.op == OP_NEG) {
|
||||
if (node->type.kind == TYPE_F64)
|
||||
return LLVMBuildFNeg(ctx->builder, operand, "fneg");
|
||||
else
|
||||
return LLVMBuildNeg(ctx->builder, operand, "ineg");
|
||||
} else {
|
||||
return LLVMBuildNot(ctx->builder, operand, "not");
|
||||
}
|
||||
}
|
||||
|
||||
case AST_BINARY_EXPR: {
|
||||
LLVMValueRef l = codegen_expr(ctx, node->as.binary.left);
|
||||
LLVMValueRef r = codegen_expr(ctx, node->as.binary.right);
|
||||
if (!l || !r) return NULL;
|
||||
bool is_float = (node->type.kind == TYPE_F64);
|
||||
|
||||
switch (node->as.binary.op) {
|
||||
case OP_ADD:
|
||||
return is_float ? LLVMBuildFAdd(ctx->builder, l, r, "fadd")
|
||||
: LLVMBuildAdd(ctx->builder, l, r, "iadd");
|
||||
case OP_SUB:
|
||||
return is_float ? LLVMBuildFSub(ctx->builder, l, r, "fsub")
|
||||
: LLVMBuildSub(ctx->builder, l, r, "isub");
|
||||
case OP_MUL:
|
||||
return is_float ? LLVMBuildFMul(ctx->builder, l, r, "fmul")
|
||||
: LLVMBuildMul(ctx->builder, l, r, "imul");
|
||||
case OP_DIV:
|
||||
return is_float ? LLVMBuildFDiv(ctx->builder, l, r, "fdiv")
|
||||
: LLVMBuildSDiv(ctx->builder, l, r, "sdiv");
|
||||
case OP_MOD:
|
||||
return LLVMBuildSRem(ctx->builder, l, r, "srem");
|
||||
case OP_EQ:
|
||||
return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOEQ, l, r, "feq")
|
||||
: LLVMBuildICmp(ctx->builder, LLVMIntEQ, l, r, "ieq");
|
||||
case OP_NE:
|
||||
return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealONE, l, r, "fne")
|
||||
: LLVMBuildICmp(ctx->builder, LLVMIntNE, l, r, "ine");
|
||||
case OP_LT:
|
||||
return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOLT, l, r, "flt")
|
||||
: LLVMBuildICmp(ctx->builder, LLVMIntSLT, l, r, "ilt");
|
||||
case OP_GT:
|
||||
return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOGT, l, r, "fgt")
|
||||
: LLVMBuildICmp(ctx->builder, LLVMIntSGT, l, r, "igt");
|
||||
case OP_LE:
|
||||
return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOLE, l, r, "fle")
|
||||
: LLVMBuildICmp(ctx->builder, LLVMIntSLE, l, r, "ile");
|
||||
case OP_GE:
|
||||
return is_float ? LLVMBuildFCmp(ctx->builder, LLVMRealOGE, l, r, "fge")
|
||||
: LLVMBuildICmp(ctx->builder, LLVMIntSGE, l, r, "ige");
|
||||
case OP_AND:
|
||||
return LLVMBuildAnd(ctx->builder, l, r, "and");
|
||||
case OP_OR:
|
||||
return LLVMBuildOr(ctx->builder, l, r, "or");
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
case AST_CALL_EXPR: {
|
||||
// === 内置 print 函数:委托给 printf ===
|
||||
if (strcmp(node->as.call.name, "print_i64") == 0) {
|
||||
LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]);
|
||||
if (!arg) return NULL;
|
||||
LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%lld\n", "fmt_i64");
|
||||
LLVMValueRef printf_args[] = { fmt, arg };
|
||||
return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn,
|
||||
printf_args, 2, "");
|
||||
}
|
||||
if (strcmp(node->as.call.name, "print_f64") == 0) {
|
||||
LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]);
|
||||
if (!arg) return NULL;
|
||||
LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%f\n", "fmt_f64");
|
||||
LLVMValueRef printf_args[] = { fmt, arg };
|
||||
return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn,
|
||||
printf_args, 2, "");
|
||||
}
|
||||
if (strcmp(node->as.call.name, "print_bool") == 0) {
|
||||
LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]);
|
||||
if (!arg) return NULL;
|
||||
// 将 bool 转为字符串:通过 select 在 "true\n" 和 "false\n" 之间选择
|
||||
LLVMValueRef c = LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
|
||||
LLVMConstInt(LLVMInt1TypeInContext(ctx->context), 1, false), "bool_cmp");
|
||||
LLVMValueRef true_str = LLVMBuildGlobalStringPtr(ctx->builder, "true\n", "true_str");
|
||||
LLVMValueRef false_str = LLVMBuildGlobalStringPtr(ctx->builder, "false\n", "false_str");
|
||||
LLVMValueRef selected = LLVMBuildSelect(ctx->builder, c, true_str, false_str, "bool_sel");
|
||||
return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn,
|
||||
(LLVMValueRef[]){selected}, 1, "");
|
||||
}
|
||||
|
||||
// === 常规函数调用 ===
|
||||
LLVMValueRef fn = find_fn(ctx, node->as.call.name);
|
||||
if (!fn) return NULL;
|
||||
LLVMValueRef args[16];
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
args[i] = codegen_expr(ctx, node->as.call.args[i]);
|
||||
if (!args[i]) return NULL;
|
||||
}
|
||||
LLVMTypeRef fn_ty = LLVMGlobalGetValueType(fn);
|
||||
LLVMTypeRef ret_ty = LLVMGetReturnType(fn_ty);
|
||||
return LLVMBuildCall2(ctx->builder, fn_ty, fn,
|
||||
args, (unsigned)node->as.call.arg_count,
|
||||
ret_ty == LLVMVoidTypeInContext(ctx->context) ? "" : "call");
|
||||
}
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// === 语句代码生成 ===
|
||||
static void codegen_stmt(CgCtx* ctx, AstNode* node) {
|
||||
if (!node) return;
|
||||
|
||||
switch (node->kind) {
|
||||
case AST_LET_STMT: {
|
||||
LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init);
|
||||
if (!init_val) return;
|
||||
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder,
|
||||
to_llvm_type(ctx, node->as.let_stmt.init->type.kind), node->as.let_stmt.name);
|
||||
LLVMBuildStore(ctx->builder, init_val, alloca);
|
||||
add_var(ctx, node->as.let_stmt.name, alloca);
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_EXPR_STMT:
|
||||
codegen_expr(ctx, node->as.expr_stmt.expr);
|
||||
break;
|
||||
|
||||
case AST_RETURN_STMT:
|
||||
if (node->as.return_stmt.expr) {
|
||||
LLVMValueRef val = codegen_expr(ctx, node->as.return_stmt.expr);
|
||||
if (val) LLVMBuildRet(ctx->builder, val);
|
||||
} else {
|
||||
LLVMBuildRetVoid(ctx->builder);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_BLOCK:
|
||||
for (size_t i = 0; i < node->as.block.stmt_count; i++) {
|
||||
codegen_stmt(ctx, node->as.block.stmts[i]);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_IF_STMT: {
|
||||
LLVMValueRef cond = codegen_expr(ctx, node->as.if_stmt.cond);
|
||||
if (!cond) return;
|
||||
LLVMBasicBlockRef cur_bb = LLVMGetInsertBlock(ctx->builder);
|
||||
LLVMValueRef cur_fn = LLVMGetBasicBlockParent(cur_bb);
|
||||
LLVMBasicBlockRef then_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "then");
|
||||
LLVMBasicBlockRef else_bb = node->as.if_stmt.else_block
|
||||
? LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "else") : NULL;
|
||||
LLVMBasicBlockRef merge_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "if_merge");
|
||||
|
||||
if (else_bb)
|
||||
LLVMBuildCondBr(ctx->builder, cond, then_bb, else_bb);
|
||||
else
|
||||
LLVMBuildCondBr(ctx->builder, cond, then_bb, merge_bb);
|
||||
|
||||
LLVMPositionBuilderAtEnd(ctx->builder, then_bb);
|
||||
codegen_stmt(ctx, node->as.if_stmt.then_block);
|
||||
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx->builder)))
|
||||
LLVMBuildBr(ctx->builder, merge_bb);
|
||||
|
||||
if (else_bb) {
|
||||
LLVMPositionBuilderAtEnd(ctx->builder, else_bb);
|
||||
codegen_stmt(ctx, node->as.if_stmt.else_block);
|
||||
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx->builder)))
|
||||
LLVMBuildBr(ctx->builder, merge_bb);
|
||||
}
|
||||
|
||||
LLVMPositionBuilderAtEnd(ctx->builder, merge_bb);
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_WHILE_STMT: {
|
||||
LLVMBasicBlockRef cur_bb = LLVMGetInsertBlock(ctx->builder);
|
||||
LLVMValueRef cur_fn = LLVMGetBasicBlockParent(cur_bb);
|
||||
LLVMBasicBlockRef cond_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "while_cond");
|
||||
LLVMBasicBlockRef body_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "while_body");
|
||||
LLVMBasicBlockRef exit_bb = LLVMAppendBasicBlockInContext(ctx->context, cur_fn, "while_exit");
|
||||
|
||||
LLVMBuildBr(ctx->builder, cond_bb);
|
||||
LLVMPositionBuilderAtEnd(ctx->builder, cond_bb);
|
||||
LLVMValueRef cond = codegen_expr(ctx, node->as.while_stmt.cond);
|
||||
if (!cond) return;
|
||||
LLVMBuildCondBr(ctx->builder, cond, body_bb, exit_bb);
|
||||
|
||||
LLVMPositionBuilderAtEnd(ctx->builder, body_bb);
|
||||
codegen_stmt(ctx, node->as.while_stmt.body);
|
||||
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx->builder)))
|
||||
LLVMBuildBr(ctx->builder, cond_bb);
|
||||
|
||||
LLVMPositionBuilderAtEnd(ctx->builder, exit_bb);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// === 程序级代码生成 ===
|
||||
LLVMModuleRef codegen_module(AstNode* ast, const char* name, const char** error_msg) {
|
||||
CgCtx ctx = {0};
|
||||
ctx.context = LLVMContextCreate();
|
||||
if (!ctx.context) {
|
||||
*error_msg = "无法创建 LLVM Context";
|
||||
return NULL;
|
||||
}
|
||||
ctx.module = LLVMModuleCreateWithNameInContext(name, ctx.context);
|
||||
ctx.builder = LLVMCreateBuilderInContext(ctx.context);
|
||||
|
||||
// 声明 C 标准库 printf(内置 print 函数依赖它)
|
||||
LLVMTypeRef printf_param_types[] = {
|
||||
LLVMPointerType(LLVMInt8TypeInContext(ctx.context), 0)
|
||||
};
|
||||
ctx.printf_ty = LLVMFunctionType(
|
||||
LLVMInt32TypeInContext(ctx.context), printf_param_types, 1, true);
|
||||
ctx.printf_fn = LLVMAddFunction(ctx.module, "printf", ctx.printf_ty);
|
||||
|
||||
// 第一遍:声明所有 L 函数
|
||||
for (size_t i = 0; i < ast->as.program.fn_count; i++) {
|
||||
AstNode* fn = ast->as.program.functions[i];
|
||||
LLVMTypeRef* ptypes = malloc(fn->as.function.param_count * sizeof(LLVMTypeRef));
|
||||
for (size_t j = 0; j < fn->as.function.param_count; j++)
|
||||
ptypes[j] = to_llvm_type(&ctx, fn->as.function.params[j]->as.parameter.type);
|
||||
LLVMTypeRef fty = LLVMFunctionType(
|
||||
to_llvm_type(&ctx, fn->as.function.return_type),
|
||||
ptypes, (unsigned)fn->as.function.param_count, false);
|
||||
LLVMValueRef lfn = LLVMAddFunction(ctx.module, fn->as.function.name, fty);
|
||||
add_fn(&ctx, fn->as.function.name, lfn);
|
||||
free(ptypes);
|
||||
}
|
||||
|
||||
// 第二遍:生成函数体
|
||||
for (size_t i = 0; i < ast->as.program.fn_count; i++) {
|
||||
AstNode* fn = ast->as.program.functions[i];
|
||||
LLVMValueRef lfn = find_fn(&ctx, fn->as.function.name);
|
||||
LLVMBasicBlockRef entry = LLVMAppendBasicBlockInContext(ctx.context, lfn, "entry");
|
||||
LLVMPositionBuilderAtEnd(ctx.builder, entry);
|
||||
|
||||
// 清空变量表(每个函数独立作用域)
|
||||
ctx.var_table = NULL;
|
||||
|
||||
// 将参数注册为变量
|
||||
for (size_t j = 0; j < fn->as.function.param_count; j++) {
|
||||
LLVMValueRef param = LLVMGetParam(lfn, (unsigned)j);
|
||||
LLVMValueRef alloca = LLVMBuildAlloca(ctx.builder,
|
||||
to_llvm_type(&ctx, fn->as.function.params[j]->as.parameter.type),
|
||||
fn->as.function.params[j]->as.parameter.name);
|
||||
LLVMBuildStore(ctx.builder, param, alloca);
|
||||
add_var(&ctx, fn->as.function.params[j]->as.parameter.name, alloca);
|
||||
}
|
||||
|
||||
codegen_stmt(&ctx, fn->as.function.body);
|
||||
|
||||
// 确保函数有终止指令(terminator)
|
||||
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(ctx.builder))) {
|
||||
if (fn->as.function.return_type == TYPE_VOID)
|
||||
LLVMBuildRetVoid(ctx.builder);
|
||||
else
|
||||
LLVMBuildRet(ctx.builder,
|
||||
LLVMConstInt(to_llvm_type(&ctx, fn->as.function.return_type), 0, false));
|
||||
}
|
||||
}
|
||||
|
||||
// 验证模块(使用 ReturnStatus 以获取完整错误消息)
|
||||
char* verify_err = NULL;
|
||||
if (LLVMVerifyModule(ctx.module, LLVMReturnStatusAction, &verify_err)) {
|
||||
*error_msg = verify_err ? verify_err : "模块验证失败(错误消息为 NULL)";
|
||||
LLVMDisposeBuilder(ctx.builder);
|
||||
LLVMContextDispose(ctx.context);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
LLVMDisposeBuilder(ctx.builder);
|
||||
return ctx.module;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
#ifndef CODEGEN_H
|
||||
#define CODEGEN_H
|
||||
|
||||
#include "ast.h"
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
// 生成 LLVM Module。模块已 verify,可直接 dump 或写入文件。
|
||||
// 出错时返回 NULL 并设置 *error_msg。
|
||||
LLVMModuleRef codegen_module(AstNode* ast, const char* module_name,
|
||||
const char** error_msg);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,49 @@
|
||||
#include "error.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
void error_init(ErrorList* list) {
|
||||
list->capacity = 8;
|
||||
list->errors = malloc(list->capacity * sizeof(ErrorInfo));
|
||||
list->count = 0;
|
||||
if (!list->errors) list->capacity = 0;
|
||||
}
|
||||
|
||||
void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...) {
|
||||
if (!list->errors) return;
|
||||
if (list->count >= list->capacity) {
|
||||
size_t new_cap = list->capacity * 2;
|
||||
ErrorInfo* new_errs = realloc(list->errors, new_cap * sizeof(ErrorInfo));
|
||||
if (!new_errs) return;
|
||||
list->errors = new_errs;
|
||||
list->capacity = new_cap;
|
||||
}
|
||||
char buf[512];
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
int n = vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
if (n < 0) return;
|
||||
char* msg = strdup(buf);
|
||||
char* fname = strdup(filename);
|
||||
if (!msg || !fname) {
|
||||
free(msg); free(fname);
|
||||
return;
|
||||
}
|
||||
list->errors[list->count++] = (ErrorInfo){
|
||||
.message = msg,
|
||||
.filename = fname,
|
||||
.line = line,
|
||||
.col = col,
|
||||
};
|
||||
}
|
||||
|
||||
void error_print(const ErrorList* list) {
|
||||
for (size_t i = 0; i < list->count; i++) {
|
||||
const ErrorInfo* e = &list->errors[i];
|
||||
fprintf(stderr, "\033[1;31m错误:\033[0m %s:%d:%d: %s\n",
|
||||
e->filename, e->line, e->col, e->message);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
#ifndef ERROR_H
|
||||
#define ERROR_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct {
|
||||
const char* message;
|
||||
const char* filename;
|
||||
int line;
|
||||
int col;
|
||||
} ErrorInfo;
|
||||
|
||||
typedef struct {
|
||||
ErrorInfo* errors;
|
||||
size_t count;
|
||||
size_t capacity;
|
||||
} ErrorList;
|
||||
|
||||
void error_init(ErrorList* list);
|
||||
void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...);
|
||||
void error_print(const ErrorList* list);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,169 @@
|
||||
#include "l_lang.h"
|
||||
#include "lexer.h"
|
||||
#include "parser.h"
|
||||
#include "sema.h"
|
||||
#include "codegen.h"
|
||||
#include "error.h"
|
||||
#include "arena.h"
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm-c/TargetMachine.h>
|
||||
#include <llvm-c/Target.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// 读取整个文件到内存
|
||||
static char* read_file(const char* path, size_t* size) {
|
||||
FILE* f = fopen(path, "rb");
|
||||
if (!f) { fprintf(stderr, "无法打开文件: %s\n", path); return NULL; }
|
||||
fseek(f, 0, SEEK_END);
|
||||
*size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
char* buf = malloc(*size + 1);
|
||||
if (!buf) { fclose(f); return NULL; }
|
||||
fread(buf, 1, *size, f);
|
||||
buf[*size] = '\0';
|
||||
fclose(f);
|
||||
return buf;
|
||||
}
|
||||
|
||||
// 写入字符串到文件
|
||||
static bool write_file(const char* path, const char* data) {
|
||||
FILE* f = fopen(path, "w");
|
||||
if (!f) return false;
|
||||
fputs(data, f);
|
||||
fclose(f);
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char* input = NULL;
|
||||
const char* output = "a.exe";
|
||||
bool emit_ir = false;
|
||||
|
||||
// 解析命令行参数
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "--emit-ir") == 0) { emit_ir = true; }
|
||||
else if (strcmp(argv[i], "-o") == 0 && i + 1 < argc) { output = argv[++i]; }
|
||||
else if (argv[i][0] != '-') { input = argv[i]; }
|
||||
}
|
||||
|
||||
if (!input) {
|
||||
fprintf(stderr, "用法: l_lang <文件.l> [-o <输出>] [--emit-ir]\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 1. 读取源文件
|
||||
size_t src_size;
|
||||
char* source = read_file(input, &src_size);
|
||||
if (!source) return 1;
|
||||
|
||||
// 2. 初始化
|
||||
Arena arena = arena_create(8); // 8 MB
|
||||
if (!arena.memory) { fprintf(stderr, "内存分配失败\n"); free(source); return 1; }
|
||||
|
||||
ErrorInfo error = {0};
|
||||
ErrorList error_list; error_init(&error_list);
|
||||
|
||||
// 3. 词法分析
|
||||
size_t token_count;
|
||||
Token* tokens = lex(&arena, source, input, &token_count, &error);
|
||||
if (!tokens) {
|
||||
fprintf(stderr, "词法错误: %s:%d:%d: %s\n",
|
||||
error.filename, error.line, error.col, error.message);
|
||||
free(source); arena_destroy(&arena);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 4. 语法分析
|
||||
AstNode* ast = parse(&arena, tokens, token_count, input, &error);
|
||||
if (!ast) {
|
||||
fprintf(stderr, "语法错误: %s:%d:%d: %s\n",
|
||||
error.filename, error.line, error.col, error.message);
|
||||
free(source); arena_destroy(&arena);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 5. 语义分析
|
||||
sema_analyze(ast, &error_list, &arena);
|
||||
if (error_list.count > 0) {
|
||||
error_print(&error_list);
|
||||
free(source); arena_destroy(&arena);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 6. LLVM IR 生成
|
||||
const char* codegen_error = NULL;
|
||||
LLVMModuleRef module = codegen_module(ast, "l_module", &codegen_error);
|
||||
if (!module) {
|
||||
fprintf(stderr, "IR 生成错误: %s\n", codegen_error);
|
||||
free(source); arena_destroy(&arena);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (emit_ir) {
|
||||
// 输出 LLVM IR 文本
|
||||
char* ir = LLVMPrintModuleToString(module);
|
||||
char ir_path[512];
|
||||
snprintf(ir_path, sizeof(ir_path), "%s.ll", input);
|
||||
write_file(ir_path, ir);
|
||||
printf("IR 已输出到: %s\n", ir_path);
|
||||
LLVMDisposeMessage(ir);
|
||||
} else {
|
||||
// 初始化 X86 目标(LLVM-C.lib 中没有 InitializeAll 系列符号)
|
||||
LLVMInitializeX86TargetInfo();
|
||||
LLVMInitializeX86Target();
|
||||
LLVMInitializeX86TargetMC();
|
||||
LLVMInitializeX86AsmPrinter();
|
||||
LLVMInitializeX86AsmParser();
|
||||
|
||||
char* triple = LLVMGetDefaultTargetTriple();
|
||||
LLVMTargetRef target;
|
||||
char* target_error = NULL;
|
||||
if (LLVMGetTargetFromTriple(triple, &target, &target_error)) {
|
||||
fprintf(stderr, "目标平台错误: %s\n", target_error);
|
||||
LLVMDisposeMessage(target_error); LLVMDisposeMessage(triple);
|
||||
free(source); arena_destroy(&arena); LLVMDisposeModule(module);
|
||||
return 1;
|
||||
}
|
||||
|
||||
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
|
||||
target, triple, "generic", "",
|
||||
LLVMCodeGenLevelDefault, LLVMRelocDefault,
|
||||
LLVMCodeModelDefault);
|
||||
LLVMDisposeMessage(triple);
|
||||
|
||||
// 输出目标文件
|
||||
char obj_path[512];
|
||||
snprintf(obj_path, sizeof(obj_path), "%s.o", input);
|
||||
char* obj_error = NULL;
|
||||
if (LLVMTargetMachineEmitToFile(tm, module, obj_path,
|
||||
LLVMObjectFile, &obj_error)) {
|
||||
fprintf(stderr, "目标代码生成错误: %s\n", obj_error);
|
||||
LLVMDisposeMessage(obj_error);
|
||||
free(source); arena_destroy(&arena);
|
||||
LLVMDisposeTargetMachine(tm); LLVMDisposeModule(module);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 调用 gcc 链接(MinGW 环境可用)
|
||||
char cmd[1024];
|
||||
snprintf(cmd, sizeof(cmd),
|
||||
"gcc \"%s\" -o \"%s\"",
|
||||
obj_path, output);
|
||||
int ret = system(cmd);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "链接失败 (exit code %d)\n", ret);
|
||||
} else {
|
||||
printf("编译成功: %s\n", output);
|
||||
}
|
||||
LLVMDisposeTargetMachine(tm);
|
||||
}
|
||||
|
||||
// 清理
|
||||
LLVMDisposeModule(module);
|
||||
free(source);
|
||||
arena_destroy(&arena);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
#include "lexer.h"
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct {
|
||||
const char* src;
|
||||
const char* filename;
|
||||
int pos;
|
||||
int line;
|
||||
int col;
|
||||
} Lexer;
|
||||
|
||||
static char peek(const Lexer* l) { return l->src[l->pos]; }
|
||||
static char peek_next(const Lexer* l) { return l->src[l->pos + 1]; }
|
||||
static void advance(Lexer* l) {
|
||||
if (l->src[l->pos] == '\n') { l->line++; l->col = 1; }
|
||||
else { l->col++; }
|
||||
l->pos++;
|
||||
}
|
||||
static void skip_whitespace(Lexer* l) {
|
||||
while (1) {
|
||||
char c = peek(l);
|
||||
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { advance(l); continue; }
|
||||
if (c == '/' && peek_next(l) == '/') {
|
||||
while (peek(l) != '\n' && peek(l) != '\0') advance(l);
|
||||
continue;
|
||||
}
|
||||
if (c == '/' && peek_next(l) == '*') {
|
||||
advance(l); advance(l);
|
||||
while (peek(l) != '\0' && !(peek(l) == '*' && peek_next(l) == '/')) advance(l);
|
||||
if (peek(l) != '\0') { advance(l); advance(l); } // skip */
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static Token make_token(Lexer* l, TokenKind kind, int start_pos, int len) {
|
||||
Token t = {.kind = kind, .start = l->src + start_pos,
|
||||
.length = len, .line = l->line, .col = l->col};
|
||||
return t;
|
||||
}
|
||||
|
||||
static Token lex_number(Lexer* l) {
|
||||
int start = l->pos;
|
||||
TokenKind kind = TOK_INT_LIT;
|
||||
while (isdigit(peek(l))) advance(l);
|
||||
if (peek(l) == '.') {
|
||||
kind = TOK_FLOAT_LIT; advance(l);
|
||||
while (isdigit(peek(l))) advance(l);
|
||||
}
|
||||
return make_token(l, kind, start, l->pos - start);
|
||||
}
|
||||
|
||||
static TokenKind check_keyword(const Token* tok) {
|
||||
#define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k
|
||||
KW("fn", TOK_FN); KW("let", TOK_LET);
|
||||
KW("if", TOK_IF); KW("else", TOK_ELSE);
|
||||
KW("while", TOK_WHILE); KW("return", TOK_RETURN);
|
||||
KW("i64", TOK_I64); KW("f64", TOK_F64);
|
||||
KW("bool", TOK_BOOL); KW("void", TOK_VOID);
|
||||
KW("true", TOK_TRUE); KW("false", TOK_FALSE);
|
||||
#undef KW
|
||||
return TOK_IDENT;
|
||||
}
|
||||
|
||||
static Token lex_ident_or_keyword(Lexer* l) {
|
||||
int start = l->pos;
|
||||
while (isalnum(peek(l)) || peek(l) == '_') advance(l);
|
||||
Token t = make_token(l, TOK_IDENT, start, l->pos - start);
|
||||
t.kind = check_keyword(&t);
|
||||
return t;
|
||||
}
|
||||
|
||||
Token* lex(Arena* a, const char* source, const char* filename,
|
||||
size_t* count, ErrorInfo* error) {
|
||||
Lexer l = {.src = source, .filename = filename, .pos = 0, .line = 1, .col = 1};
|
||||
// 预估容量:源码长度的 1/3
|
||||
size_t cap = strlen(source) / 3 + 16;
|
||||
Token* tokens = arena_alloc(a, cap * sizeof(Token));
|
||||
if (!tokens) { *count = 0; return NULL; }
|
||||
size_t idx = 0;
|
||||
|
||||
while (peek(&l) != '\0') {
|
||||
skip_whitespace(&l);
|
||||
if (peek(&l) == '\0') break;
|
||||
|
||||
int line = l.line, col = l.col;
|
||||
char c = peek(&l);
|
||||
|
||||
if (isdigit(c)) { tokens[idx++] = lex_number(&l); }
|
||||
else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); }
|
||||
else if (c == '+' && peek_next(&l) != '=') { tokens[idx++] = make_token(&l, TOK_PLUS, l.pos, 1); advance(&l); }
|
||||
else if (c == '-' && peek_next(&l) != '>') { tokens[idx++] = make_token(&l, TOK_MINUS, l.pos, 1); advance(&l); }
|
||||
else if (c == '-' && peek_next(&l) == '>') { tokens[idx++] = make_token(&l, TOK_ARROW, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '*') { tokens[idx++] = make_token(&l, TOK_STAR, l.pos, 1); advance(&l); }
|
||||
else if (c == '/') { tokens[idx++] = make_token(&l, TOK_SLASH, l.pos, 1); advance(&l); }
|
||||
else if (c == '%') { tokens[idx++] = make_token(&l, TOK_PERCENT, l.pos, 1); advance(&l); }
|
||||
else if (c == '=' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_EQ_EQ, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '=') { tokens[idx++] = make_token(&l, TOK_ASSIGN, l.pos, 1); advance(&l); }
|
||||
else if (c == '!' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_BANG_EQ, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '!') { tokens[idx++] = make_token(&l, TOK_BANG, l.pos, 1); advance(&l); }
|
||||
else if (c == '<' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_LT_EQ, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '<') { tokens[idx++] = make_token(&l, TOK_LT, l.pos, 1); advance(&l); }
|
||||
else if (c == '>' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_GT_EQ, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '>') { tokens[idx++] = make_token(&l, TOK_GT, l.pos, 1); advance(&l); }
|
||||
else if (c == '&' && peek_next(&l) == '&') { tokens[idx++] = make_token(&l, TOK_AND_AND, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); }
|
||||
else if (c == '(') { tokens[idx++] = make_token(&l, TOK_LPAREN, l.pos, 1); advance(&l); }
|
||||
else if (c == ')') { tokens[idx++] = make_token(&l, TOK_RPAREN, l.pos, 1); advance(&l); }
|
||||
else if (c == '{') { tokens[idx++] = make_token(&l, TOK_LBRACE, l.pos, 1); advance(&l); }
|
||||
else if (c == '}') { tokens[idx++] = make_token(&l, TOK_RBRACE, l.pos, 1); advance(&l); }
|
||||
else if (c == ',') { tokens[idx++] = make_token(&l, TOK_COMMA, l.pos, 1); advance(&l); }
|
||||
else if (c == ':') { tokens[idx++] = make_token(&l, TOK_COLON, l.pos, 1); advance(&l); }
|
||||
else if (c == ';') { tokens[idx++] = make_token(&l, TOK_SEMICOLON, l.pos, 1); advance(&l); }
|
||||
else {
|
||||
*error = (ErrorInfo){
|
||||
.message = "无法识别的字符",
|
||||
.filename = filename, .line = line, .col = col
|
||||
};
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
tokens[idx++] = make_token(&l, TOK_EOF, l.pos, 0);
|
||||
*count = idx;
|
||||
return tokens;
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include "token.h"
|
||||
#include "arena.h"
|
||||
#include "error.h"
|
||||
|
||||
// 返回 Token 数组(分配在 arena 中),*count 为数量。
|
||||
// 如遇错误,error 被填充并返回 NULL。
|
||||
Token* lex(Arena* a, const char* source, const char* filename,
|
||||
size_t* count, ErrorInfo* error);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,49 @@
|
||||
#include "token.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
static const char* NAMES[] = {
|
||||
[TOK_FN] = "fn", [TOK_LET] = "let", [TOK_IF] = "if",
|
||||
[TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_RETURN] = "return",
|
||||
[TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_VOID] = "void",
|
||||
[TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数",
|
||||
[TOK_TRUE] = "true", [TOK_FALSE] = "false",
|
||||
[TOK_IDENT] = "标识符",
|
||||
[TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*",
|
||||
[TOK_SLASH] = "/", [TOK_PERCENT] = "%",
|
||||
[TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=",
|
||||
[TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=",
|
||||
[TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_BANG] = "!",
|
||||
[TOK_ARROW] = "->",
|
||||
[TOK_LPAREN] = "(", [TOK_RPAREN] = ")",
|
||||
[TOK_LBRACE] = "{", [TOK_RBRACE] = "}",
|
||||
[TOK_COMMA] = ",", [TOK_COLON] = ":", [TOK_SEMICOLON] = ";",
|
||||
[TOK_ASSIGN] = "=",
|
||||
[TOK_EOF] = "EOF", [TOK_ERROR] = "错误",
|
||||
};
|
||||
|
||||
const char* tok_name(TokenKind kind) {
|
||||
return NAMES[kind];
|
||||
}
|
||||
|
||||
bool tok_is_type(TokenKind kind) {
|
||||
return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_VOID;
|
||||
}
|
||||
|
||||
int64_t tok_int_value(const Token* tok) {
|
||||
if (tok->length <= 0 || tok->length >= 32) return 0;
|
||||
char buf[32];
|
||||
memcpy(buf, tok->start, tok->length);
|
||||
buf[tok->length] = '\0';
|
||||
return strtoll(buf, NULL, 10);
|
||||
}
|
||||
|
||||
double tok_float_value(const Token* tok) {
|
||||
if (tok->length <= 0 || tok->length >= 64) return 0.0;
|
||||
char buf[64];
|
||||
memcpy(buf, tok->start, tok->length);
|
||||
buf[tok->length] = '\0';
|
||||
return strtod(buf, NULL);
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
#ifndef TOKEN_H
|
||||
#define TOKEN_H
|
||||
|
||||
#include "l_lang.h"
|
||||
|
||||
// === Token 类型枚举 ===
|
||||
typedef enum {
|
||||
// 关键字
|
||||
TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_RETURN,
|
||||
// 类型关键字
|
||||
TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID,
|
||||
// 字面量
|
||||
TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE,
|
||||
// 标识符
|
||||
TOK_IDENT,
|
||||
// 运算符
|
||||
TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT,
|
||||
TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ,
|
||||
TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG,
|
||||
TOK_ARROW,
|
||||
// 分隔符
|
||||
TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE,
|
||||
TOK_COMMA, TOK_COLON, TOK_SEMICOLON, TOK_ASSIGN,
|
||||
// 特殊
|
||||
TOK_EOF, TOK_ERROR,
|
||||
} TokenKind;
|
||||
|
||||
// === Token 结构体 ===
|
||||
struct Token {
|
||||
TokenKind kind;
|
||||
const char* start; // 指向源码中 token 起始位置
|
||||
int length; // token 文本长度
|
||||
int line;
|
||||
int col;
|
||||
};
|
||||
|
||||
// === 工具函数 ===
|
||||
const char* tok_name(TokenKind kind);
|
||||
bool tok_is_type(TokenKind kind);
|
||||
|
||||
// 从 Token 提取值
|
||||
int64_t tok_int_value(const Token* tok);
|
||||
double tok_float_value(const Token* tok);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,325 @@
|
||||
#include "parser.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct {
|
||||
const Token* tokens;
|
||||
size_t count;
|
||||
size_t pos;
|
||||
const char* filename;
|
||||
Arena* arena;
|
||||
} Parser;
|
||||
|
||||
// === 向前看 ===
|
||||
static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; }
|
||||
static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; }
|
||||
static bool match(Parser* p, TokenKind k) {
|
||||
if (peek(p)->kind == k) { p->pos++; return true; }
|
||||
return false;
|
||||
}
|
||||
static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) {
|
||||
if (peek(p)->kind == k) return advance(p);
|
||||
e->message = msg; e->filename = p->filename;
|
||||
e->line = peek(p)->line; e->col = peek(p)->col;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// === 运算符优先级定义 ===
|
||||
typedef enum {
|
||||
PREC_NONE = 0,
|
||||
PREC_OR = 20,
|
||||
PREC_AND = 30,
|
||||
PREC_COMPARE = 40,
|
||||
PREC_TERM = 50,
|
||||
PREC_FACTOR = 60,
|
||||
PREC_UNARY = 70,
|
||||
} Precedence;
|
||||
|
||||
static Precedence tok_to_prec(TokenKind kind) {
|
||||
switch (kind) {
|
||||
case TOK_PIPE_PIPE: return PREC_OR;
|
||||
case TOK_AND_AND: return PREC_AND;
|
||||
case TOK_EQ_EQ: case TOK_BANG_EQ:
|
||||
case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE;
|
||||
case TOK_PLUS: case TOK_MINUS: return PREC_TERM;
|
||||
case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR;
|
||||
default: return PREC_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static BinaryOp tok_to_binop(TokenKind kind) {
|
||||
switch (kind) {
|
||||
case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB;
|
||||
case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV;
|
||||
case TOK_PERCENT: return OP_MOD;
|
||||
case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE;
|
||||
case TOK_LT: return OP_LT; case TOK_GT: return OP_GT;
|
||||
case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE;
|
||||
case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR;
|
||||
default: return OP_ADD;
|
||||
}
|
||||
}
|
||||
|
||||
// 向前声明
|
||||
static AstNode* parse_expr(Parser* p, ErrorInfo* error);
|
||||
static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error);
|
||||
static AstNode* parse_block(Parser* p, ErrorInfo* error);
|
||||
|
||||
// === 前缀解析 ===
|
||||
static AstNode* parse_unary(Parser* p, ErrorInfo* error) {
|
||||
const Token* op = advance(p);
|
||||
AstNode* operand = parse_expr_prec(p, PREC_UNARY, error);
|
||||
if (!operand) return NULL;
|
||||
BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT;
|
||||
return ast_make_unary(p->arena, uop, operand, op->line, op->col);
|
||||
}
|
||||
|
||||
static AstNode* parse_group(Parser* p, ErrorInfo* error) {
|
||||
advance(p); // 跳过 (
|
||||
AstNode* expr = parse_expr(p, error);
|
||||
if (!expr) return NULL;
|
||||
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
||||
return expr;
|
||||
}
|
||||
|
||||
static AstNode* parse_literal(Parser* p) {
|
||||
const Token* t = advance(p);
|
||||
switch (t->kind) {
|
||||
case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), t->line, t->col);
|
||||
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col);
|
||||
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col);
|
||||
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col);
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
|
||||
const Token* name = advance(p);
|
||||
if (match(p, TOK_LPAREN)) {
|
||||
// 函数调用
|
||||
AstNode* args[16]; int arg_count = 0;
|
||||
while (peek(p)->kind != TOK_RPAREN && !error->message) {
|
||||
if (arg_count >= 16) {
|
||||
error->message = "函数参数过多"; error->filename = p->filename;
|
||||
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
|
||||
}
|
||||
args[arg_count] = parse_expr(p, error);
|
||||
if (!args[arg_count]) return NULL;
|
||||
arg_count++;
|
||||
if (peek(p)->kind == TOK_COMMA) advance(p);
|
||||
else break;
|
||||
}
|
||||
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
||||
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
|
||||
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
|
||||
return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
|
||||
arg_arr, arg_count, name->line, name->col);
|
||||
}
|
||||
return ast_make_ident(p->arena,
|
||||
arena_strdup_impl(p->arena, name->start, name->length),
|
||||
name->line, name->col);
|
||||
}
|
||||
|
||||
// === Pratt 主循环 ===
|
||||
static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) {
|
||||
const Token* tok = peek(p);
|
||||
AstNode* left = NULL;
|
||||
|
||||
// 前缀解析
|
||||
if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) {
|
||||
left = parse_unary(p, error);
|
||||
} else if (tok->kind == TOK_LPAREN) {
|
||||
left = parse_group(p, error);
|
||||
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
|
||||
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) {
|
||||
left = parse_literal(p);
|
||||
} else if (tok->kind == TOK_IDENT) {
|
||||
left = parse_ident_or_call(p, error);
|
||||
} else {
|
||||
error->message = "无法识别的表达式"; error->filename = p->filename;
|
||||
error->line = tok->line; error->col = tok->col;
|
||||
return NULL;
|
||||
}
|
||||
if (!left) return NULL;
|
||||
|
||||
// 中缀解析循环
|
||||
while (!error->message) {
|
||||
TokenKind kind = peek(p)->kind;
|
||||
Precedence prec = tok_to_prec(kind);
|
||||
if (prec <= min_prec) break;
|
||||
|
||||
const Token* op = advance(p);
|
||||
AstNode* right = parse_expr_prec(p, prec, error);
|
||||
if (!right) return NULL;
|
||||
left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, op->line, op->col);
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
|
||||
return parse_expr_prec(p, PREC_NONE, error);
|
||||
}
|
||||
|
||||
// === 类型工具 ===
|
||||
static bool is_type_token(TokenKind k) {
|
||||
return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID;
|
||||
}
|
||||
|
||||
static TypeKind token_to_type(TokenKind k) {
|
||||
switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64;
|
||||
case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; }
|
||||
}
|
||||
|
||||
// === 语句解析 ===
|
||||
static AstNode* parse_statement(Parser* p, ErrorInfo* error);
|
||||
|
||||
static AstNode* parse_block(Parser* p, ErrorInfo* error) {
|
||||
const Token* open = peek(p);
|
||||
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
|
||||
AstNode* stmts[256]; int count = 0;
|
||||
while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) {
|
||||
AstNode* s = parse_statement(p, error);
|
||||
if (!s) return NULL;
|
||||
stmts[count++] = s;
|
||||
}
|
||||
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
|
||||
AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*));
|
||||
memcpy(arr, stmts, count * sizeof(AstNode*));
|
||||
return ast_make_block(p->arena, arr, count, open->line, open->col);
|
||||
}
|
||||
|
||||
static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
|
||||
const Token* t = peek(p);
|
||||
|
||||
if (t->kind == TOK_LET) {
|
||||
advance(p);
|
||||
const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名");
|
||||
if (!name) return NULL;
|
||||
// 可选的类型标注
|
||||
TypeKind annot_type = TYPE_UNKNOWN;
|
||||
bool has_type_annot = false;
|
||||
if (match(p, TOK_COLON)) {
|
||||
const Token* type_tok = advance(p);
|
||||
if (!is_type_token(type_tok->kind)) {
|
||||
error->message = "无效的类型标注"; error->filename = p->filename;
|
||||
error->line = type_tok->line; error->col = type_tok->col; return NULL;
|
||||
}
|
||||
annot_type = token_to_type(type_tok->kind);
|
||||
has_type_annot = true;
|
||||
}
|
||||
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
|
||||
AstNode* init = parse_expr(p, error);
|
||||
if (!init) return NULL;
|
||||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||||
return ast_make_let(p->arena,
|
||||
arena_strdup_impl(p->arena, name->start, name->length),
|
||||
annot_type, has_type_annot, init, t->line, t->col);
|
||||
}
|
||||
|
||||
if (t->kind == TOK_IF) {
|
||||
advance(p);
|
||||
AstNode* cond = parse_expr(p, error);
|
||||
if (!cond) return NULL;
|
||||
AstNode* then_block = parse_block(p, error);
|
||||
if (!then_block) return NULL;
|
||||
AstNode* else_block = NULL;
|
||||
if (match(p, TOK_ELSE)) {
|
||||
if (peek(p)->kind == TOK_IF) {
|
||||
else_block = parse_statement(p, error);
|
||||
} else {
|
||||
else_block = parse_block(p, error);
|
||||
}
|
||||
if (!else_block) return NULL;
|
||||
}
|
||||
return ast_make_if(p->arena, cond, then_block, else_block, t->line, t->col);
|
||||
}
|
||||
|
||||
if (t->kind == TOK_WHILE) {
|
||||
advance(p);
|
||||
AstNode* cond = parse_expr(p, error);
|
||||
if (!cond) return NULL;
|
||||
AstNode* body = parse_block(p, error);
|
||||
if (!body) return NULL;
|
||||
return ast_make_while(p->arena, cond, body, t->line, t->col);
|
||||
}
|
||||
|
||||
if (t->kind == TOK_RETURN) {
|
||||
advance(p);
|
||||
if (match(p, TOK_SEMICOLON)) {
|
||||
return ast_make_return(p->arena, NULL, t->line, t->col);
|
||||
}
|
||||
AstNode* expr = parse_expr(p, error);
|
||||
if (!expr) return NULL;
|
||||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||||
return ast_make_return(p->arena, expr, t->line, t->col);
|
||||
}
|
||||
|
||||
// 表达式语句
|
||||
AstNode* expr = parse_expr(p, error);
|
||||
if (!expr) return NULL;
|
||||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||||
return ast_make_expr_stmt(p->arena, expr, t->line, t->col);
|
||||
}
|
||||
|
||||
// === 函数解析 ===
|
||||
static AstNode* parse_function(Parser* p, ErrorInfo* error) {
|
||||
const Token* fn_tok = advance(p); // fn
|
||||
const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名");
|
||||
if (!name) return NULL;
|
||||
if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL;
|
||||
|
||||
// 参数列表
|
||||
AstNode* params[64]; int pcount = 0;
|
||||
while (peek(p)->kind != TOK_RPAREN && !error->message) {
|
||||
const Token* pname = expect(p, TOK_IDENT, error, "参数名");
|
||||
if (!pname) return NULL;
|
||||
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
|
||||
const Token* ptype = advance(p);
|
||||
if (!is_type_token(ptype->kind)) {
|
||||
error->message = "无效的参数类型"; error->filename = p->filename;
|
||||
error->line = ptype->line; error->col = ptype->col; return NULL;
|
||||
}
|
||||
params[pcount++] = ast_make_parameter(p->arena,
|
||||
arena_strdup_impl(p->arena, pname->start, pname->length),
|
||||
token_to_type(ptype->kind), pname->line, pname->col);
|
||||
if (match(p, TOK_COMMA)) continue;
|
||||
else break;
|
||||
}
|
||||
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
||||
|
||||
// 返回类型
|
||||
TypeKind ret = TYPE_VOID;
|
||||
if (match(p, TOK_ARROW)) {
|
||||
const Token* rt = advance(p);
|
||||
if (!is_type_token(rt->kind)) {
|
||||
error->message = "无效的返回类型"; error->filename = p->filename;
|
||||
error->line = rt->line; error->col = rt->col; return NULL;
|
||||
}
|
||||
ret = token_to_type(rt->kind);
|
||||
}
|
||||
|
||||
AstNode* body = parse_block(p, error);
|
||||
if (!body) return NULL;
|
||||
|
||||
AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*));
|
||||
memcpy(parr, params, pcount * sizeof(AstNode*));
|
||||
return ast_make_function(p->arena,
|
||||
arena_strdup_impl(p->arena, name->start, name->length),
|
||||
parr, pcount, ret, body, fn_tok->line, fn_tok->col);
|
||||
}
|
||||
|
||||
// === 程序入口 ===
|
||||
AstNode* parse(Arena* a, const Token* tokens, size_t count,
|
||||
const char* filename, ErrorInfo* error) {
|
||||
Parser p = {.tokens = tokens, .count = count, .pos = 0,
|
||||
.filename = filename, .arena = a};
|
||||
AstNode* functions[256]; int fn_count = 0;
|
||||
while (peek(&p)->kind != TOK_EOF && !error->message) {
|
||||
functions[fn_count++] = parse_function(&p, error);
|
||||
}
|
||||
if (error->message) return NULL;
|
||||
AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
|
||||
memcpy(arr, functions, fn_count * sizeof(AstNode*));
|
||||
return ast_make_program(a, arr, fn_count, 0, 0);
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include "ast.h"
|
||||
#include "token.h"
|
||||
#include "error.h"
|
||||
|
||||
// 解析 Token 数组,返回 Program 节点(内存来自 arena)。
|
||||
// 出错时 error 被填充并返回 NULL。
|
||||
AstNode* parse(Arena* a, const Token* tokens, size_t count,
|
||||
const char* filename, ErrorInfo* error);
|
||||
|
||||
#endif
|
||||
+264
@@ -0,0 +1,264 @@
|
||||
#include "sema.h"
|
||||
#include <string.h>
|
||||
|
||||
// === 类型关系 ===
|
||||
static TypeKind promote(TypeKind a, TypeKind b) {
|
||||
if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64;
|
||||
if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64;
|
||||
if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL;
|
||||
return TYPE_ERROR;
|
||||
}
|
||||
|
||||
static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64; }
|
||||
static bool is_comparable(TypeKind a, TypeKind b) { return a == b; }
|
||||
|
||||
// === 向前声明 ===
|
||||
static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a);
|
||||
|
||||
// === 检查表达式 ===
|
||||
static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) {
|
||||
switch (node->kind) {
|
||||
case AST_LITERAL_EXPR:
|
||||
break; // 类型已在创建时设置
|
||||
|
||||
case AST_IDENT_EXPR: {
|
||||
Symbol* sym = scope_lookup(scope, node->as.ident.name);
|
||||
if (!sym) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"未定义的变量 '%s'", node->as.ident.name);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else if (sym->kind == SYM_FUNCTION) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"'%s' 是函数,不能作为表达式使用", node->as.ident.name);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = sym->type;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_UNARY_EXPR: {
|
||||
analyze_expr(node->as.unary.operand, scope, errors, a);
|
||||
TypeKind inner = node->as.unary.operand->type.kind;
|
||||
if (node->as.unary.op == OP_NEG) {
|
||||
if (!is_numeric(inner)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"一元 '-' 只能用于数值类型");
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = inner;
|
||||
}
|
||||
} else { // OP_NOT
|
||||
if (inner != TYPE_BOOL) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"'!' 只能用于布尔类型,得到 '%s'", type_name(inner));
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = TYPE_BOOL;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_BINARY_EXPR: {
|
||||
analyze_expr(node->as.binary.left, scope, errors, a);
|
||||
analyze_expr(node->as.binary.right, scope, errors, a);
|
||||
TypeKind l = node->as.binary.left->type.kind;
|
||||
TypeKind r = node->as.binary.right->type.kind;
|
||||
if (l == TYPE_ERROR || r == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; }
|
||||
|
||||
switch (node->as.binary.op) {
|
||||
case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD:
|
||||
if (!is_numeric(l) || !is_numeric(r)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"算术运算需要数值类型");
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = promote(l, r);
|
||||
}
|
||||
break;
|
||||
case OP_EQ: case OP_NE: case OP_LT: case OP_GT: case OP_LE: case OP_GE:
|
||||
if (!is_comparable(l, r)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"类型 '%s' 和 '%s' 无法比较", type_name(l), type_name(r));
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = TYPE_BOOL;
|
||||
}
|
||||
break;
|
||||
case OP_AND: case OP_OR:
|
||||
if (l != TYPE_BOOL || r != TYPE_BOOL) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"逻辑运算需要布尔类型");
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = TYPE_BOOL;
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_CALL_EXPR: {
|
||||
Symbol* sym = scope_lookup(scope, node->as.call.name);
|
||||
if (!sym || sym->kind != SYM_FUNCTION) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"未定义的函数 '%s'", node->as.call.name);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
// 即使函数未定义,也要分析参数表达式(它们可能有更多错误)
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
analyze_expr(node->as.call.args[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (node->as.call.arg_count != sym->param_count) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"函数 '%s' 需要 %zu 个参数,但提供了 %zu 个",
|
||||
node->as.call.name, sym->param_count, node->as.call.arg_count);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
// 即使参数数量不匹配,也分析已有的参数
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
analyze_expr(node->as.call.args[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
analyze_expr(node->as.call.args[i], scope, errors, a);
|
||||
TypeKind actual = node->as.call.args[i]->type.kind;
|
||||
TypeKind expected = sym->param_types[i];
|
||||
if (actual != TYPE_ERROR && actual != expected) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"参数 %zu 类型不匹配: 期望 '%s',得到 '%s'",
|
||||
i + 1, type_name(expected), type_name(actual));
|
||||
}
|
||||
}
|
||||
node->type.kind = sym->return_type;
|
||||
break;
|
||||
}
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) {
|
||||
if (!node) return;
|
||||
|
||||
switch (node->kind) {
|
||||
case AST_PROGRAM:
|
||||
// 第一遍:收集所有函数签名
|
||||
for (size_t i = 0; i < node->as.program.fn_count; i++) {
|
||||
AstNode* fn = node->as.program.functions[i];
|
||||
TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind));
|
||||
for (size_t j = 0; j < fn->as.function.param_count; j++) {
|
||||
pts[j] = fn->as.function.params[j]->as.parameter.type;
|
||||
}
|
||||
scope_insert_function(scope, a, fn->as.function.name,
|
||||
fn->as.function.return_type, pts,
|
||||
fn->as.function.param_count);
|
||||
}
|
||||
// 第二遍:分析每个函数体
|
||||
for (size_t i = 0; i < node->as.program.fn_count; i++) {
|
||||
analyze_node(node->as.program.functions[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_FUNCTION: {
|
||||
Scope* fn_scope = scope_new(a, scope);
|
||||
// 注册参数
|
||||
for (size_t i = 0; i < node->as.function.param_count; i++) {
|
||||
AstNode* p = node->as.function.params[i];
|
||||
scope_insert(fn_scope, a, p->as.parameter.name, SYM_PARAMETER, p->as.parameter.type);
|
||||
}
|
||||
analyze_node(node->as.function.body, fn_scope, errors, a);
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_BLOCK:
|
||||
for (size_t i = 0; i < node->as.block.stmt_count; i++) {
|
||||
analyze_node(node->as.block.stmts[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_LET_STMT: {
|
||||
analyze_expr(node->as.let_stmt.init, scope, errors, a);
|
||||
TypeKind inferred = node->as.let_stmt.init->type.kind;
|
||||
TypeKind var_type;
|
||||
|
||||
if (node->as.let_stmt.has_type_annot) {
|
||||
// 使用显式类型标注
|
||||
var_type = node->as.let_stmt.annot_type;
|
||||
if (inferred != TYPE_ERROR && inferred != var_type) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'",
|
||||
node->as.let_stmt.name, type_name(var_type), type_name(inferred));
|
||||
}
|
||||
} else {
|
||||
// 类型推断
|
||||
if (inferred == TYPE_ERROR || inferred == TYPE_VOID) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"无法从表达式推断变量 '%s' 的类型", node->as.let_stmt.name);
|
||||
break;
|
||||
}
|
||||
var_type = inferred;
|
||||
}
|
||||
|
||||
node->type.kind = var_type;
|
||||
if (!scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, var_type)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"变量 '%s' 重复定义", node->as.let_stmt.name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_IF_STMT:
|
||||
analyze_expr(node->as.if_stmt.cond, scope, errors, a);
|
||||
if (node->as.if_stmt.cond->type.kind != TYPE_BOOL &&
|
||||
node->as.if_stmt.cond->type.kind != TYPE_ERROR) {
|
||||
error_add(errors, "<sema>", node->line, node->col, "if 条件必须是布尔类型");
|
||||
}
|
||||
analyze_node(node->as.if_stmt.then_block, scope, errors, a);
|
||||
if (node->as.if_stmt.else_block) {
|
||||
analyze_node(node->as.if_stmt.else_block, scope, errors, a);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_WHILE_STMT:
|
||||
analyze_expr(node->as.while_stmt.cond, scope, errors, a);
|
||||
if (node->as.while_stmt.cond->type.kind != TYPE_BOOL &&
|
||||
node->as.while_stmt.cond->type.kind != TYPE_ERROR) {
|
||||
error_add(errors, "<sema>", node->line, node->col, "while 条件必须是布尔类型");
|
||||
}
|
||||
analyze_node(node->as.while_stmt.body, scope, errors, a);
|
||||
break;
|
||||
|
||||
case AST_RETURN_STMT:
|
||||
if (node->as.return_stmt.expr) {
|
||||
analyze_expr(node->as.return_stmt.expr, scope, errors, a);
|
||||
node->type.kind = node->as.return_stmt.expr->type.kind;
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_EXPR_STMT:
|
||||
analyze_expr(node->as.expr_stmt.expr, scope, errors, a);
|
||||
break;
|
||||
|
||||
default:
|
||||
analyze_expr(node, scope, errors, a);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) {
|
||||
Scope* global = scope_new(arena, NULL);
|
||||
|
||||
// 注册内置函数
|
||||
TypeKind params_i64[] = {TYPE_I64};
|
||||
scope_insert_function(global, arena, "print_i64", TYPE_VOID, params_i64, 1);
|
||||
TypeKind params_f64[] = {TYPE_F64};
|
||||
scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1);
|
||||
TypeKind params_bool[] = {TYPE_BOOL};
|
||||
scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1);
|
||||
|
||||
analyze_node(ast, global, errors, arena);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
#ifndef SEMA_H
|
||||
#define SEMA_H
|
||||
|
||||
#include "ast.h"
|
||||
#include "error.h"
|
||||
#include "symbol.h"
|
||||
|
||||
// 对 AST 进行语义分析(类型推断 + 类型检查)
|
||||
// 为每个节点填充 type 字段,错误收集到 errors 列表中。
|
||||
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
#include "symbol.h"
|
||||
#include "l_lang.h"
|
||||
#include <string.h>
|
||||
|
||||
Scope* scope_new(void* alloc, Scope* parent) {
|
||||
Scope* s = (Scope*)arena_alloc_impl(alloc, sizeof(Scope));
|
||||
s->head = NULL;
|
||||
s->parent = parent;
|
||||
return s;
|
||||
}
|
||||
|
||||
Symbol* scope_lookup(const Scope* scope, const char* name) {
|
||||
for (const Scope* s = scope; s; s = s->parent) {
|
||||
for (Symbol* sym = s->head; sym; sym = sym->next) {
|
||||
if (strcmp(sym->name, name) == 0) return sym;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
|
||||
SymbolKind kind, TypeKind type) {
|
||||
if (scope->head) {
|
||||
for (Symbol* sym = scope->head; sym; sym = sym->next) {
|
||||
if (strcmp(sym->name, name) == 0) return NULL;
|
||||
}
|
||||
}
|
||||
Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol));
|
||||
sym->name = name; sym->kind = kind; sym->type = type;
|
||||
sym->next = scope->head;
|
||||
scope->head = sym;
|
||||
return sym;
|
||||
}
|
||||
|
||||
Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
|
||||
TypeKind ret, TypeKind* pt, size_t pc) {
|
||||
if (scope->head) {
|
||||
for (Symbol* sym = scope->head; sym; sym = sym->next) {
|
||||
if (strcmp(sym->name, name) == 0) return NULL;
|
||||
}
|
||||
}
|
||||
Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol));
|
||||
sym->name = name; sym->kind = SYM_FUNCTION; sym->type = TYPE_VOID;
|
||||
sym->return_type = ret; sym->param_types = pt; sym->param_count = pc;
|
||||
sym->next = scope->head;
|
||||
scope->head = sym;
|
||||
return sym;
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
#ifndef SYMBOL_H
|
||||
#define SYMBOL_H
|
||||
|
||||
#include "l_lang.h"
|
||||
#include "ast.h"
|
||||
|
||||
typedef enum { SYM_VARIABLE, SYM_PARAMETER, SYM_FUNCTION } SymbolKind;
|
||||
|
||||
typedef struct Symbol {
|
||||
const char* name;
|
||||
SymbolKind kind;
|
||||
TypeKind type; // 变量/参数的类型
|
||||
// 函数特有
|
||||
TypeKind return_type;
|
||||
TypeKind* param_types;
|
||||
size_t param_count;
|
||||
// 链表(同一作用域内的下一个符号)
|
||||
struct Symbol* next;
|
||||
} Symbol;
|
||||
|
||||
typedef struct Scope {
|
||||
Symbol* head; // 符号链表头
|
||||
struct Scope* parent; // 上级作用域
|
||||
} Scope;
|
||||
|
||||
// 创建新作用域(子作用域)
|
||||
Scope* scope_new(void* alloc, Scope* parent);
|
||||
|
||||
// 在当前作用域及其父作用域中查找符号
|
||||
Symbol* scope_lookup(const Scope* scope, const char* name);
|
||||
|
||||
// 在当前作用域中插入符号(重复插入返回 NULL)
|
||||
Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
|
||||
SymbolKind kind, TypeKind type);
|
||||
|
||||
// 插入函数符号
|
||||
Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
|
||||
TypeKind ret, TypeKind* pt, size_t pc);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,45 @@
|
||||
#include "arena.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
Arena arena_create(size_t capacity_mb) {
|
||||
Arena a;
|
||||
a.capacity = capacity_mb * 1024 * 1024;
|
||||
a.memory = (char*)malloc(a.capacity);
|
||||
a.offset = 0;
|
||||
if (!a.memory) a.capacity = 0;
|
||||
return a;
|
||||
}
|
||||
|
||||
void arena_destroy(Arena* a) {
|
||||
free(a->memory);
|
||||
a->memory = NULL;
|
||||
a->capacity = 0;
|
||||
a->offset = 0;
|
||||
}
|
||||
|
||||
void* arena_alloc(Arena* a, size_t size) {
|
||||
size = (size + 7) & ~7; // 8 字节对齐
|
||||
if (a->offset + size > a->capacity) return NULL;
|
||||
void* ptr = a->memory + a->offset;
|
||||
a->offset += size;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
char* arena_strdup(Arena* a, const char* src) {
|
||||
size_t len = strlen(src) + 1;
|
||||
char* dst = arena_alloc(a, len);
|
||||
if (dst) memcpy(dst, src, len);
|
||||
return dst;
|
||||
}
|
||||
|
||||
// === 跨模块分配器(void* 接口,供 parser/sema 等模块复用)===
|
||||
void* arena_alloc_impl(void* alloc, size_t size) {
|
||||
return arena_alloc((Arena*)alloc, size);
|
||||
}
|
||||
|
||||
char* arena_strdup_impl(void* alloc, const char* src, size_t len) {
|
||||
char* dst = arena_alloc_impl(alloc, len + 1);
|
||||
if (dst) { memcpy(dst, src, len); dst[len] = '\0'; }
|
||||
return dst;
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
#ifndef ARENA_H
|
||||
#define ARENA_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct Arena {
|
||||
char* memory;
|
||||
size_t capacity;
|
||||
size_t offset;
|
||||
} Arena;
|
||||
|
||||
Arena arena_create(size_t capacity_mb);
|
||||
void arena_destroy(Arena* a);
|
||||
void* arena_alloc(Arena* a, size_t size);
|
||||
char* arena_strdup(Arena* a, const char* src);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,5 @@
|
||||
fn main() -> i64 {
|
||||
let x: i64 = 1 + 2 * 3;
|
||||
print_i64(x);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
fn main() -> i64 {
|
||||
let x: i64 = 10;
|
||||
if x > 5 {
|
||||
print_i64(1);
|
||||
} else {
|
||||
print_i64(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
fn countdown(n: i64) -> i64 {
|
||||
if n > 0 {
|
||||
print_i64(n);
|
||||
return countdown(n - 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
fn main() -> i64 {
|
||||
return countdown(5);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
fn fib(n: i64) -> i64 {
|
||||
if n < 2 {
|
||||
return n;
|
||||
}
|
||||
return fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
|
||||
fn main() -> i64 {
|
||||
let result: i64 = fib(10);
|
||||
print_i64(result);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
fn square(x: f64) -> f64 {
|
||||
return x * x;
|
||||
}
|
||||
|
||||
fn add_floats(a: f64, b: f64) -> f64 {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
fn main() -> i64 {
|
||||
let s: f64 = square(3.0);
|
||||
let sum: f64 = add_floats(s, 4.0);
|
||||
print_f64(sum);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
#include "test_utils.h"
|
||||
#include "lexer.h"
|
||||
#include "arena.h"
|
||||
|
||||
void test_simple_tokens() {
|
||||
Arena a = arena_create(1);
|
||||
const char* src = "fn main() { return 42; }";
|
||||
size_t count; ErrorInfo error = {0};
|
||||
Token* tokens = lex(&a, src, "test", &count, &error);
|
||||
ASSERT(tokens != NULL);
|
||||
ASSERT(count >= 8);
|
||||
ASSERT(tokens[0].kind == TOK_FN);
|
||||
ASSERT(tokens[1].kind == TOK_IDENT);
|
||||
ASSERT(tokens[2].kind == TOK_LPAREN);
|
||||
ASSERT(tokens[3].kind == TOK_RPAREN);
|
||||
ASSERT(tokens[4].kind == TOK_LBRACE);
|
||||
ASSERT(tokens[5].kind == TOK_RETURN);
|
||||
ASSERT(tokens[6].kind == TOK_INT_LIT);
|
||||
ASSERT(tok_int_value(&tokens[6]) == 42);
|
||||
arena_destroy(&a);
|
||||
}
|
||||
|
||||
void test_keywords() {
|
||||
Arena a = arena_create(1);
|
||||
const char* src = "fn let if else while return i64 f64 bool void true false";
|
||||
TokenKind expected[] = {TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE,
|
||||
TOK_RETURN, TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, TOK_TRUE, TOK_FALSE, TOK_EOF};
|
||||
size_t count; ErrorInfo error = {0};
|
||||
Token* tokens = lex(&a, src, "test", &count, &error);
|
||||
ASSERT(tokens != NULL);
|
||||
for (int i = 0; i < 13; i++) ASSERT(tokens[i].kind == expected[i]);
|
||||
arena_destroy(&a);
|
||||
}
|
||||
|
||||
void test_operators() {
|
||||
Arena a = arena_create(1);
|
||||
const char* src = "+ - * / % == != < > <= >= && || ! ->";
|
||||
TokenKind expected[] = {TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT,
|
||||
TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ,
|
||||
TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, TOK_ARROW, TOK_EOF};
|
||||
size_t count; ErrorInfo error = {0};
|
||||
Token* tokens = lex(&a, src, "test", &count, &error);
|
||||
ASSERT(tokens != NULL);
|
||||
for (int i = 0; i < 16; i++) ASSERT(tokens[i].kind == expected[i]);
|
||||
arena_destroy(&a);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
TEST_RUN(test_simple_tokens);
|
||||
TEST_RUN(test_keywords);
|
||||
TEST_RUN(test_operators);
|
||||
return test_summary();
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
#include "test_utils.h"
|
||||
#include "parser.h"
|
||||
#include "lexer.h"
|
||||
#include "arena.h"
|
||||
|
||||
static AstNode* parse_string(const char* src) {
|
||||
Arena* a = malloc(sizeof(Arena));
|
||||
*a = arena_create(1);
|
||||
size_t tcount;
|
||||
ErrorInfo lex_err = {0};
|
||||
Token* tokens = lex(a, src, "test", &tcount, &lex_err);
|
||||
if (!tokens) { arena_destroy(a); free(a); return NULL; }
|
||||
ErrorInfo parse_err = {0};
|
||||
AstNode* ast = parse(a, tokens, tcount, "test", &parse_err);
|
||||
if (!ast) { arena_destroy(a); free(a); return NULL; }
|
||||
// NOTE: arena and tokens must stay alive for AST - leak intentionally in test
|
||||
return ast;
|
||||
}
|
||||
|
||||
void test_simple_function() {
|
||||
AstNode* ast = parse_string("fn main() { return 42; }");
|
||||
ASSERT(ast != NULL);
|
||||
ASSERT(ast->kind == AST_PROGRAM);
|
||||
ASSERT(ast->as.program.fn_count == 1);
|
||||
AstNode* fn = ast->as.program.functions[0];
|
||||
ASSERT(fn->kind == AST_FUNCTION);
|
||||
}
|
||||
|
||||
void test_arithmetic_expr() {
|
||||
AstNode* ast = parse_string("fn main() { return 1 + 2 * 3; }");
|
||||
ASSERT(ast != NULL);
|
||||
AstNode* body = ast->as.program.functions[0]->as.function.body;
|
||||
AstNode* ret = body->as.block.stmts[0];
|
||||
ASSERT(ret->kind == AST_RETURN_STMT);
|
||||
AstNode* expr = ret->as.return_stmt.expr;
|
||||
ASSERT(expr->kind == AST_BINARY_EXPR);
|
||||
ASSERT(expr->as.binary.op == OP_ADD);
|
||||
// 1 + (2 * 3): right should be *, left should be 1
|
||||
ASSERT(expr->as.binary.right->kind == AST_BINARY_EXPR);
|
||||
ASSERT(expr->as.binary.right->as.binary.op == OP_MUL);
|
||||
}
|
||||
|
||||
void test_if_statement() {
|
||||
AstNode* ast = parse_string("fn main() { if true { return 1; } else { return 0; } }");
|
||||
ASSERT(ast != NULL);
|
||||
}
|
||||
|
||||
void test_while_loop() {
|
||||
AstNode* ast = parse_string("fn main() { while true { return; } }");
|
||||
ASSERT(ast != NULL);
|
||||
}
|
||||
|
||||
void test_function_with_params() {
|
||||
AstNode* ast = parse_string("fn add(a: i64, b: i64) -> i64 { return a + b; }");
|
||||
ASSERT(ast != NULL);
|
||||
AstNode* fn = ast->as.program.functions[0];
|
||||
ASSERT(fn->as.function.param_count == 2);
|
||||
ASSERT(fn->as.function.return_type == TYPE_I64);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
TEST_RUN(test_simple_function);
|
||||
TEST_RUN(test_arithmetic_expr);
|
||||
TEST_RUN(test_if_statement);
|
||||
TEST_RUN(test_while_loop);
|
||||
TEST_RUN(test_function_with_params);
|
||||
return test_summary();
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
#include "test_utils.h"
|
||||
#include "parser.h"
|
||||
#include "lexer.h"
|
||||
#include "sema.h"
|
||||
#include "arena.h"
|
||||
|
||||
void test_type_error() {
|
||||
Arena a = arena_create(1);
|
||||
size_t tc; ErrorInfo lex_err = {0};
|
||||
Token* toks = lex(&a, "fn main() { let x: i64 = 1; let y: i64 = x + true; return; }",
|
||||
"test", &tc, &lex_err);
|
||||
ASSERT(toks != NULL);
|
||||
ErrorInfo parse_err = {0};
|
||||
AstNode* ast = parse(&a, toks, tc, "test", &parse_err);
|
||||
ASSERT(ast != NULL);
|
||||
|
||||
ErrorList errors; error_init(&errors);
|
||||
sema_analyze(ast, &errors, &a);
|
||||
ASSERT(errors.count > 0);
|
||||
arena_destroy(&a);
|
||||
}
|
||||
|
||||
void test_undefined_var() {
|
||||
Arena a = arena_create(1);
|
||||
size_t tc; ErrorInfo lex_err = {0};
|
||||
Token* toks = lex(&a, "fn main() { let x: i64 = y; return; }", "test", &tc, &lex_err);
|
||||
ASSERT(toks != NULL);
|
||||
ErrorInfo parse_err = {0};
|
||||
AstNode* ast = parse(&a, toks, tc, "test", &parse_err);
|
||||
ASSERT(ast != NULL);
|
||||
|
||||
ErrorList errors; error_init(&errors);
|
||||
sema_analyze(ast, &errors, &a);
|
||||
ASSERT(errors.count > 0);
|
||||
arena_destroy(&a);
|
||||
}
|
||||
|
||||
void test_simple_ok() {
|
||||
Arena a = arena_create(1);
|
||||
size_t tc; ErrorInfo lex_err = {0};
|
||||
Token* toks = lex(&a, "fn main() { let x: i64 = 42; print_i64(x); return; }",
|
||||
"test", &tc, &lex_err);
|
||||
ASSERT(toks != NULL);
|
||||
ErrorInfo parse_err = {0};
|
||||
AstNode* ast = parse(&a, toks, tc, "test", &parse_err);
|
||||
ASSERT(ast != NULL);
|
||||
|
||||
ErrorList errors; error_init(&errors);
|
||||
sema_analyze(ast, &errors, &a);
|
||||
ASSERT(errors.count == 0);
|
||||
arena_destroy(&a);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
TEST_RUN(test_type_error);
|
||||
TEST_RUN(test_undefined_var);
|
||||
TEST_RUN(test_simple_ok);
|
||||
return test_summary();
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
#ifndef TEST_UTILS_H
|
||||
#define TEST_UTILS_H
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static int _tests_run = 0;
|
||||
static int _tests_failed = 0;
|
||||
|
||||
#define ASSERT(expr) do { \
|
||||
_tests_run++; \
|
||||
if (!(expr)) { \
|
||||
fprintf(stderr, "\033[1;31mFAIL\033[0m %s:%d: %s\n", __FILE__, __LINE__, #expr); \
|
||||
_tests_failed++; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define TEST_RUN(func) do { \
|
||||
fprintf(stderr, " RUN %s\n", #func); \
|
||||
func(); \
|
||||
} while(0)
|
||||
|
||||
static inline int test_summary(void) {
|
||||
fprintf(stderr, "\n%d tests, %d passed, %d failed\n",
|
||||
_tests_run, _tests_run - _tests_failed, _tests_failed);
|
||||
return _tests_failed > 0 ? 1 : 0;
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user