Compare commits

...

4 Commits

20 changed files with 525 additions and 47 deletions
+3
View File
@@ -35,3 +35,6 @@ Thumbs.db
*.zip *.zip
*.tar.gz *.tar.gz
*.7z *.7z
# 环境变量文件
.env
+142
View File
@@ -0,0 +1,142 @@
# AGENTS.md
本文件为 L Language 项目的 Codex 上下文文件,定义项目架构、构建命令、核心 API 和开发约定。
## 项目概述
L Language v0.5 — C17 实现的静态类型编译型语言,Rust 风格语法,LLVM 22.x 后端。5 阶段流水线:词法 → 语法 → 语义 → IR → 可执行文件。38 单元测试 + 23 集成程序。
## 目录结构
```
├── include/l_lang.h TypeKind(10), SourceLoc 公共头文件
├── src/
│ ├── lexer/ {token,lexer} 手写状态机, 50 Token 类型
│ ├── parser/ parser.c 递归下降 + Pratt, 825 行
│ ├── ast/ ast.{c,h} 25 种 AST 节点 + 工厂函数
│ ├── sema/ {symbol,sema} 作用域链 + 类型推断 + impl mangle
│ ├── codegen/{codegen,target} LLVM-C API → 目标代码, 824 行
│ ├── driver/ {main,error} 入口 + 命令行解析 + 错误报告
│ └── util/ arena.c Bump allocator (8MB)
├── test/
│ ├── test_{lexer,parser,sema,codegen}.c 单元测试 (38)
│ └── programs/*.l 集成测试 (23)
└── docs/
├── PRD.md 产品需求文档 (v0.1)
├── analysis/ 架构分析报告
└── architecture-improvements.md
```
## 构建命令
```powershell
cd build
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM"
mingw32-make -j4
mingw32-make l_lang # 仅编译器
```
## 编译流水线
```
源码(.l) → Lexer → Parser → Sema → Codegen → Target(obj) → GCC 链接(.exe)
50 Tok 25 AST 类型标注 LLVM IR .o 可执行文件
```
## 核心 API
```c
// lexer.h — 词法分析
Token* lex(Arena* a, const char* source, const char* filename,
size_t* count, ErrorInfo* error);
// parser.h — 语法分析
AstNode* parse(Arena* a, const Token* tokens, size_t count,
const char* filename, ErrorInfo* error);
// sema.h — 语义分析
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena);
// codegen.h — IR 生成
LLVMModuleRef codegen_module(AstNode* ast, const char* module_name,
const char** error_msg);
```
## 类型系统
| L 类型 | LLVM 类型 | TypeKind |
|--------|-----------|----------|
| i64 | LLVMInt64Type() | TYPE_I64 |
| f64 | LLVMDoubleType() | TYPE_F64 |
| bool | LLVMInt1Type() | TYPE_BOOL |
| str | LLVMInt8PtrType() | TYPE_STR |
| void | LLVMVoidType() | TYPE_VOID |
| struct | LLVMStructType() | TYPE_STRUCT |
| enum | (i64 常量) | TYPE_ENUM |
| [T; N] | LLVMArrayType() | TYPE_ARRAY |
## 运算符优先级 (Pratt)
| 优先级 | 运算符 |
|--------|--------|
| 70 (最高) | `-`(一元负) `!` |
| 60 | `*` `/` `%` |
| 50 | `+` `-` |
| 40 | `==` `!=` `<` `>` `<=` `>=` |
| 30 | `&&` |
| 20 | `\|\|` |
## 关键架构决策
| 决策 | 说明 |
|------|------|
| impl mangle | sema 将 `impl S { fn f }` 改名为 `S$f`codegen 零修改 |
| match 脱糖 | parser 将 match 转为 let+if-else 链,sema/codegen 完全复用 |
| for 脱糖 | for 转为 let mut+while+assign |
| 复合赋值脱糖 | `x += 1``x = x + 1` |
| parse_type_expr 统一 | 所有类型标注经同一函数解析 |
| RAII cleanup_list | 作用域级自动 freedynamic resize |
## 版本状态
| 指标 | v0.5 |
|------|------|
| 实现代码 | ~3,336 行 |
| Token 类型 | 50 |
| AST 节点 | 25 |
| TypeKind | 10 |
| SymbolKind | 5 (VAR/PARAM/FN/STRUCT/ENUM) |
| P0 完成度 | 4/4 (100%) |
| P1 完成度 | 6/6 (100%) |
| 已知技术债务 | 10 项 |
## 开发约定
- **语言标准**: C17`-Wall -Wextra -g`,零警告
- **内存管理**: Token/AST/符号表均在 arena 分配,禁止 malloc/free 散落
- **错误消息**: 中文,格式 `文件名:行号:列号: 描述`
- **平台**: Windows 11 + MinGW-w64,链接器用 gcc(非 clang
- **LLVM 路径**: `D:\settings\Language\LLVM`v22.1.7C API
- **去糖优先**: 复杂语法优先在 parser 层去糖为简单原语,减少 sema/codegen 改动
- **新增功能流程**: lexer token → ast 节点 → parser 解析 → sema 检查 → codegen 生成 → 测试
## 测试
```powershell
./l_lang_lexer_test.exe # 3 测试
./l_lang_test.exe # 5 测试
./l_lang_sema_test.exe # 21 测试
./l_lang_codegen_test.exe # 9 测试
# 集成测试
Get-ChildItem test/programs/*.l | ForEach-Object {
./l_lang.exe $_.FullName -o test.exe
./test.exe
}
```
## 当前技术债务 (优先级排序)
1. **高**: analyze_expr 膨胀 (350+ 行)match/sema 无单元测试
2. **中**: parser.c 825 行单文件,codegen.c 824 行单文件,match 脱糖在 parser 非独立 pass`[Point; N]` 未实现
3. **低**: TypeKind 耦合 (改 7+ 文件)AST Visitor 缺失,CHANGELOG 未更新 v0.4/v0.5LLVM 22 无 mem2reg C API
+4 -1
View File
@@ -81,9 +81,12 @@ graph TB
| 类型 | 语法 | 示例 | | 类型 | 语法 | 示例 |
|------|------|------| |------|------|------|
| 32 位有符号整数 | `i32` | `100` |
| 64 位有符号整数 | `i64` | `42`, `-7` | | 64 位有符号整数 | `i64` | `42`, `-7` |
| 64 位无符号整数 | `u64` | `999` |
| 64 位浮点数 | `f64` | `3.14` | | 64 位浮点数 | `f64` | `3.14` |
| 布尔值 | `bool` | `true`, `false` | | 布尔值 | `bool` | `true`, `false` |
| 字符 | `char` | `'A'` |
| 字符串 | `str` | `"hello"` | | 字符串 | `str` | `"hello"` |
| 结构体 | `struct` | `Point { x: i64, y: i64 }` | | 结构体 | `struct` | `Point { x: i64, y: i64 }` |
| 枚举 | `enum` | `Color { Red, Green, Blue }` | | 枚举 | `enum` | `Color { Red, Green, Blue }` |
@@ -92,7 +95,7 @@ graph TB
| 类型别名 | `type` | `type Meters = i64;` | | 类型别名 | `type` | `type Meters = i64;` |
- `let` 不可变 + `var` 可变,类型推断 - `let` 不可变 + `var` 可变,类型推断
- `i64``f64` 自动提升 - `i32``i64``f64` 自动加宽,`char` 可隐式转为整数
### 控制流 ### 控制流
+6
View File
@@ -7,9 +7,12 @@
// === 类型系统 === // === 类型系统 ===
typedef enum { typedef enum {
TYPE_I32,
TYPE_I64, TYPE_I64,
TYPE_U64,
TYPE_F64, TYPE_F64,
TYPE_BOOL, TYPE_BOOL,
TYPE_CHAR,
TYPE_STR, TYPE_STR,
TYPE_VOID, TYPE_VOID,
TYPE_STRUCT, // 结构体类型 TYPE_STRUCT, // 结构体类型
@@ -21,9 +24,12 @@ typedef enum {
static inline const char* type_name(TypeKind kind) { static inline const char* type_name(TypeKind kind) {
switch (kind) { switch (kind) {
case TYPE_I32: return "i32";
case TYPE_I64: return "i64"; case TYPE_I64: return "i64";
case TYPE_U64: return "u64";
case TYPE_F64: return "f64"; case TYPE_F64: return "f64";
case TYPE_BOOL: return "bool"; case TYPE_BOOL: return "bool";
case TYPE_CHAR: return "char";
case TYPE_STR: return "str"; case TYPE_STR: return "str";
case TYPE_VOID: return "void"; case TYPE_VOID: return "void";
case TYPE_STRUCT: return "struct"; case TYPE_STRUCT: return "struct";
+11 -3
View File
@@ -110,9 +110,9 @@ AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, SourceLoc lo
return n; return n;
} }
AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, SourceLoc loc) { AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, const char** arg_names, size_t count, SourceLoc loc) {
NEW(alloc, AST_CALL_EXPR); NEW(alloc, AST_CALL_EXPR);
n->as.call.name = name; n->as.call.args = args; n->as.call.arg_count = count; n->as.call.name = name; n->as.call.args = args; n->as.call.arg_names = arg_names; n->as.call.arg_count = count;
return n; return n;
} }
@@ -137,6 +137,13 @@ AstNode* ast_make_literal_bool(void* alloc, bool val, SourceLoc loc) {
return n; return n;
} }
AstNode* ast_make_literal_char(void* alloc, int val, SourceLoc loc) {
NEW(alloc, AST_LITERAL_EXPR);
n->as.literal.lit_type = TYPE_CHAR; n->as.literal.i64_val = val;
n->type.kind = TYPE_CHAR;
return n;
}
AstNode* ast_make_literal_str(void* alloc, const char* val, SourceLoc loc) { AstNode* ast_make_literal_str(void* alloc, const char* val, SourceLoc loc) {
NEW(alloc, AST_LITERAL_EXPR); NEW(alloc, AST_LITERAL_EXPR);
n->as.literal.lit_type = TYPE_STR; n->as.literal.str_val = val; n->as.literal.lit_type = TYPE_STR; n->as.literal.str_val = val;
@@ -235,11 +242,12 @@ AstNode* ast_make_impl_block(void* alloc, const char* struct_name, AstNode** met
} }
AstNode* ast_make_method_call(void* alloc, AstNode* receiver, const char* method, AstNode* ast_make_method_call(void* alloc, AstNode* receiver, const char* method,
AstNode** args, size_t count, SourceLoc loc) { AstNode** args, const char** arg_names, size_t count, SourceLoc loc) {
NEW(alloc, AST_METHOD_CALL); NEW(alloc, AST_METHOD_CALL);
n->as.method_call.receiver = receiver; n->as.method_call.receiver = receiver;
n->as.method_call.method_name = method; n->as.method_call.method_name = method;
n->as.method_call.args = args; n->as.method_call.args = args;
n->as.method_call.arg_names = arg_names;
n->as.method_call.arg_count = count; n->as.method_call.arg_count = count;
return n; return n;
} }
+5 -4
View File
@@ -89,7 +89,7 @@ struct AstNode {
// AST_UNARY_EXPR // AST_UNARY_EXPR
struct { BinaryOp op; struct AstNode* operand; } unary; struct { BinaryOp op; struct AstNode* operand; } unary;
// AST_CALL_EXPR // AST_CALL_EXPR
struct { const char* name; struct AstNode** args; size_t arg_count; } call; struct { const char* name; struct AstNode** args; const char** arg_names; size_t arg_count; } call;
// AST_LITERAL_EXPR // AST_LITERAL_EXPR
struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; const char* str_val; }; } literal; struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; const char* str_val; }; } literal;
// AST_IDENT_EXPR // AST_IDENT_EXPR
@@ -114,7 +114,7 @@ struct AstNode {
// AST_IMPL_BLOCK // AST_IMPL_BLOCK
struct { const char* struct_name; struct AstNode** methods; size_t method_count; } impl_block; struct { const char* struct_name; struct AstNode** methods; size_t method_count; } impl_block;
// AST_METHOD_CALL // AST_METHOD_CALL
struct { struct AstNode* receiver; const char* method_name; struct AstNode** args; size_t arg_count; } method_call; struct { struct AstNode* receiver; const char* method_name; struct AstNode** args; const char** arg_names; size_t arg_count; } method_call;
} as; } as;
}; };
@@ -138,10 +138,11 @@ AstNode* ast_make_return(void* alloc, AstNode* expr, SourceLoc loc);
AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, SourceLoc loc); AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, SourceLoc loc);
AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, SourceLoc loc); AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, SourceLoc loc);
AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, SourceLoc loc); AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, SourceLoc loc);
AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, SourceLoc loc); AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, const char** arg_names, size_t count, SourceLoc loc);
AstNode* ast_make_literal_i64(void* alloc, int64_t val, SourceLoc loc); AstNode* ast_make_literal_i64(void* alloc, int64_t val, SourceLoc loc);
AstNode* ast_make_literal_f64(void* alloc, double val, SourceLoc loc); AstNode* ast_make_literal_f64(void* alloc, double val, SourceLoc loc);
AstNode* ast_make_literal_bool(void* alloc, bool val, SourceLoc loc); AstNode* ast_make_literal_bool(void* alloc, bool val, SourceLoc loc);
AstNode* ast_make_literal_char(void* alloc, int val, SourceLoc loc);
AstNode* ast_make_literal_str(void* alloc, const char* val, SourceLoc loc); AstNode* ast_make_literal_str(void* alloc, const char* val, SourceLoc loc);
AstNode* ast_make_ident(void* alloc, const char* name, SourceLoc loc); AstNode* ast_make_ident(void* alloc, const char* name, SourceLoc loc);
AstNode* ast_make_struct_decl(void* alloc, const char* name, AstNode** fields, size_t count, SourceLoc loc); AstNode* ast_make_struct_decl(void* alloc, const char* name, AstNode** fields, size_t count, SourceLoc loc);
@@ -154,6 +155,6 @@ AstNode* ast_make_enum_variant(void* alloc, const char* enum_name, const char* v
AstNode* ast_make_index_expr(void* alloc, AstNode* array, AstNode* index, SourceLoc loc); AstNode* ast_make_index_expr(void* alloc, AstNode* array, AstNode* index, SourceLoc loc);
AstNode* ast_make_array_assign(void* alloc, const char* name, AstNode* index, AstNode* value, SourceLoc loc); AstNode* ast_make_array_assign(void* alloc, const char* name, AstNode* index, AstNode* value, SourceLoc loc);
AstNode* ast_make_impl_block(void* alloc, const char* struct_name, AstNode** methods, size_t count, SourceLoc loc); AstNode* ast_make_impl_block(void* alloc, const char* struct_name, AstNode** methods, size_t count, SourceLoc loc);
AstNode* ast_make_method_call(void* alloc, AstNode* receiver, const char* method, AstNode** args, size_t count, SourceLoc loc); AstNode* ast_make_method_call(void* alloc, AstNode* receiver, const char* method, AstNode** args, const char** arg_names, size_t count, SourceLoc loc);
#endif #endif
+26
View File
@@ -59,9 +59,12 @@ typedef struct {
// === 类型映射(需要 Context=== // === 类型映射(需要 Context===
static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) { static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) {
switch (kind) { switch (kind) {
case TYPE_I32: return LLVMInt32TypeInContext(ctx->context);
case TYPE_I64: return LLVMInt64TypeInContext(ctx->context); case TYPE_I64: return LLVMInt64TypeInContext(ctx->context);
case TYPE_U64: return LLVMInt64TypeInContext(ctx->context);
case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context); case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context);
case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context); case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context);
case TYPE_CHAR: return LLVMInt8TypeInContext(ctx->context);
case TYPE_STR: return LLVMPointerType(LLVMInt8TypeInContext(ctx->context), 0); case TYPE_STR: return LLVMPointerType(LLVMInt8TypeInContext(ctx->context), 0);
case TYPE_STRUCT: case TYPE_STRUCT:
case TYPE_ENUM: return LLVMInt64TypeInContext(ctx->context); case TYPE_ENUM: return LLVMInt64TypeInContext(ctx->context);
@@ -73,7 +76,10 @@ static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) {
static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) { static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) {
switch (lit->as.literal.lit_type) { switch (lit->as.literal.lit_type) {
case TYPE_I32:
case TYPE_I64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true); case TYPE_I64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true);
case TYPE_U64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, false);
case TYPE_CHAR: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, false);
case TYPE_F64: return LLVMConstReal(ty, lit->as.literal.f64_val); case TYPE_F64: return LLVMConstReal(ty, lit->as.literal.f64_val);
case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false); case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false);
default: return NULL; default: return NULL;
@@ -127,6 +133,18 @@ static LLVMTypeRef find_struct_type(CgCtx* ctx, const char* name) {
return NULL; return NULL;
} }
// 将整数值强制转换到目标 LLVM 类型(sext/zext/trunc
static LLVMValueRef coerce_int(CgCtx* ctx, LLVMValueRef val,
LLVMTypeRef from_ty, LLVMTypeRef to_ty) {
if (from_ty == to_ty) return val;
int from_w = LLVMGetIntTypeWidth(from_ty);
int to_w = LLVMGetIntTypeWidth(to_ty);
if (from_w < to_w)
return LLVMBuildSExt(ctx->builder, val, to_ty, "sext");
else
return LLVMBuildTrunc(ctx->builder, val, to_ty, "trunc");
}
// 从 TypeInfo 生成 LLVM 类型(支持数组、结构体等复合类型) // 从 TypeInfo 生成 LLVM 类型(支持数组、结构体等复合类型)
static LLVMTypeRef type_info_to_llvm(CgCtx* ctx, const TypeInfo* ti) { static LLVMTypeRef type_info_to_llvm(CgCtx* ctx, const TypeInfo* ti) {
switch (ti->kind) { switch (ti->kind) {
@@ -271,6 +289,8 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
if (strcmp(node->as.call.name, "print_i64") == 0) { if (strcmp(node->as.call.name, "print_i64") == 0) {
LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]); LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]);
if (!arg) return NULL; if (!arg) return NULL;
LLVMTypeRef i64_ty = LLVMInt64TypeInContext(ctx->context);
arg = coerce_int(ctx, arg, LLVMTypeOf(arg), i64_ty);
LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%lld\n", "fmt_i64"); LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%lld\n", "fmt_i64");
LLVMValueRef printf_args[] = { fmt, arg }; LLVMValueRef printf_args[] = { fmt, arg };
return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn,
@@ -492,6 +512,12 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) {
// 尝试生成 init 值;数组类型可能 init 失败 (自引用占位符) // 尝试生成 init 值;数组类型可能 init 失败 (自引用占位符)
LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init); LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init);
if (init_val) { if (init_val) {
// 若 init LLVM 类型与 alloca 类型不同,强制转换(如 i64→i32)
LLVMTypeRef init_ty = LLVMTypeOf(init_val);
if (init_ty != var_type && LLVMGetTypeKind(init_ty) == LLVMIntegerTypeKind
&& LLVMGetTypeKind(var_type) == LLVMIntegerTypeKind) {
init_val = coerce_int(ctx, init_val, init_ty, var_type);
}
LLVMBuildStore(ctx->builder, init_val, alloca); LLVMBuildStore(ctx->builder, init_val, alloca);
} else if (node->type.kind == TYPE_ARRAY) { } else if (node->type.kind == TYPE_ARRAY) {
// 数组声明: init 失败是预期的 (自引用), 存储零初始化 // 数组声明: init 失败是预期的 (自引用), 存储零初始化
+19 -4
View File
@@ -56,13 +56,14 @@ static TokenKind check_keyword(const Token* tok) {
#define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k #define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k
KW("fn", TOK_FN); KW("let", TOK_LET); KW("fn", TOK_FN); KW("let", TOK_LET);
KW("var", TOK_VAR); KW("var", TOK_VAR);
KW("if", TOK_IF); KW("else", TOK_ELSE); KW("if", TOK_IF); KW("else", TOK_ELSE); KW("guard", TOK_GUARD);
KW("while", TOK_WHILE); KW("for", TOK_FOR); KW("in", TOK_IN); KW("while", TOK_WHILE); KW("for", TOK_FOR); KW("in", TOK_IN);
KW("to", TOK_TO); KW("to", TOK_TO);
KW("return", TOK_RETURN); KW("return", TOK_RETURN);
KW("i64", TOK_I64); KW("f64", TOK_F64); KW("i32", TOK_I32); KW("i64", TOK_I64);
KW("bool", TOK_BOOL); KW("str", TOK_STR); KW("u64", TOK_U64); KW("f64", TOK_F64);
KW("void", TOK_VOID); KW("bool", TOK_BOOL); KW("char", TOK_CHAR);
KW("str", TOK_STR); KW("void", TOK_VOID);
KW("struct", TOK_STRUCT); KW("type", TOK_TYPE); KW("struct", TOK_STRUCT); KW("type", TOK_TYPE);
KW("enum", TOK_ENUM); KW("extend", TOK_EXTEND); KW("match", TOK_MATCH); KW("enum", TOK_ENUM); KW("extend", TOK_EXTEND); KW("match", TOK_MATCH);
KW("_", TOK_UNDERSCORE); KW("_", TOK_UNDERSCORE);
@@ -101,6 +102,19 @@ Token* lex(Arena* a, const char* source, const char* filename,
char c = peek(&l); char c = peek(&l);
if (isdigit(c)) { tokens[idx++] = lex_number(&l); } if (isdigit(c)) { tokens[idx++] = lex_number(&l); }
else if (c == '\'') {
advance(&l); // 跳过开头的 '
int char_start = l.pos;
if (peek(&l) == '\\') advance(&l); // 转义字符: \n \t \\ \'
advance(&l); // 跳过字符内容
if (peek(&l) != '\'') {
*error = (ErrorInfo){.message="未闭合的字符字面量", .filename=filename, .line=line, .col=col};
return NULL;
}
int char_len = l.pos - char_start;
advance(&l); // 跳过结尾的 '
tokens[idx++] = make_token(&l, TOK_CHAR_LIT, char_start, char_len);
}
else if (c == '"') { else if (c == '"') {
advance(&l); // 跳过开头的 " advance(&l); // 跳过开头的 "
int start = l.pos; int start = l.pos;
@@ -137,6 +151,7 @@ Token* lex(Arena* a, const char* source, const char* filename,
else if (c == '>' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_GT_EQ, l.pos, 2); advance(&l); advance(&l); } else if (c == '>' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_GT_EQ, l.pos, 2); advance(&l); advance(&l); }
else if (c == '>') { tokens[idx++] = make_token(&l, TOK_GT, l.pos, 1); advance(&l); } else if (c == '>') { tokens[idx++] = make_token(&l, TOK_GT, l.pos, 1); advance(&l); }
else if (c == '&' && peek_next(&l) == '&') { tokens[idx++] = make_token(&l, TOK_AND_AND, l.pos, 2); advance(&l); advance(&l); } else if (c == '&' && peek_next(&l) == '&') { tokens[idx++] = make_token(&l, TOK_AND_AND, l.pos, 2); advance(&l); advance(&l); }
else if (c == '|' && peek_next(&l) == '>') { tokens[idx++] = make_token(&l, TOK_PIPE, l.pos, 2); advance(&l); advance(&l); }
else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); } else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); }
else if (c == '.') { tokens[idx++] = make_token(&l, TOK_DOT, l.pos, 1); advance(&l); } else if (c == '.') { tokens[idx++] = make_token(&l, TOK_DOT, l.pos, 1); advance(&l); }
else if (c == '[') { tokens[idx++] = make_token(&l, TOK_LBRACKET, l.pos, 1); advance(&l); } else if (c == '[') { tokens[idx++] = make_token(&l, TOK_LBRACKET, l.pos, 1); advance(&l); }
+9 -5
View File
@@ -5,19 +5,21 @@
#include <inttypes.h> #include <inttypes.h>
static const char* NAMES[] = { static const char* NAMES[] = {
[TOK_FN] = "fn", [TOK_LET] = "let", [TOK_VAR] = "var", [TOK_IF] = "if", [TOK_FN] = "fn", [TOK_LET] = "let", [TOK_VAR] = "var", [TOK_IF] = "if", [TOK_GUARD] = "guard",
[TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_FOR] = "for", [TOK_IN] = "in", [TOK_RETURN] = "return", [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_FOR] = "for", [TOK_IN] = "in", [TOK_RETURN] = "return",
[TOK_STRUCT] = "struct", [TOK_TYPE] = "type", [TOK_ENUM] = "enum", [TOK_EXTEND] = "extend", [TOK_STRUCT] = "struct", [TOK_TYPE] = "type", [TOK_ENUM] = "enum", [TOK_EXTEND] = "extend",
[TOK_MATCH] = "match", [TOK_MATCH] = "match",
[TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_STR] = "str", [TOK_VOID] = "void", [TOK_I32] = "i32", [TOK_I64] = "i64", [TOK_U64] = "u64", [TOK_F64] = "f64",
[TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", [TOK_STR_LIT] = "字符串", [TOK_BOOL] = "bool", [TOK_CHAR] = "char", [TOK_STR] = "str", [TOK_VOID] = "void",
[TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数",
[TOK_CHAR_LIT] = "字符", [TOK_STR_LIT] = "字符串",
[TOK_TRUE] = "true", [TOK_FALSE] = "false", [TOK_TRUE] = "true", [TOK_FALSE] = "false",
[TOK_IDENT] = "标识符", [TOK_UNDERSCORE] = "_", [TOK_IDENT] = "标识符", [TOK_UNDERSCORE] = "_",
[TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*", [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*",
[TOK_SLASH] = "/", [TOK_PERCENT] = "%", [TOK_SLASH] = "/", [TOK_PERCENT] = "%",
[TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=", [TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=",
[TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=", [TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=",
[TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_BANG] = "!", [TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_PIPE] = "|>", [TOK_BANG] = "!",
[TOK_ARROW] = "->", [TOK_TO] = "to", [TOK_MATCH_ARROW] = "=>", [TOK_ARROW] = "->", [TOK_TO] = "to", [TOK_MATCH_ARROW] = "=>",
[TOK_PLUS_EQ] = "+=", [TOK_MINUS_EQ] = "-=", [TOK_STAR_EQ] = "*=", [TOK_SLASH_EQ] = "/=", [TOK_PLUS_EQ] = "+=", [TOK_MINUS_EQ] = "-=", [TOK_STAR_EQ] = "*=", [TOK_SLASH_EQ] = "/=",
[TOK_LPAREN] = "(", [TOK_RPAREN] = ")", [TOK_LPAREN] = "(", [TOK_RPAREN] = ")",
@@ -34,7 +36,9 @@ const char* tok_name(TokenKind kind) {
} }
bool tok_is_type(TokenKind kind) { bool tok_is_type(TokenKind kind) {
return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_STR || kind == TOK_VOID; return kind == TOK_I32 || kind == TOK_I64 || kind == TOK_U64
|| kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_CHAR
|| kind == TOK_STR || kind == TOK_VOID;
} }
int64_t tok_int_value(const Token* tok) { int64_t tok_int_value(const Token* tok) {
+4 -4
View File
@@ -6,18 +6,18 @@
// === Token 类型枚举 === // === Token 类型枚举 ===
typedef enum { typedef enum {
// 关键字 // 关键字
TOK_FN, TOK_LET, TOK_VAR, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_FOR, TOK_IN, TOK_RETURN, TOK_FN, TOK_LET, TOK_VAR, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_FOR, TOK_IN, TOK_RETURN, TOK_GUARD,
TOK_STRUCT, TOK_TYPE, TOK_ENUM, TOK_EXTEND, TOK_MATCH, TOK_STRUCT, TOK_TYPE, TOK_ENUM, TOK_EXTEND, TOK_MATCH,
// 类型关键字 // 类型关键字
TOK_I64, TOK_F64, TOK_BOOL, TOK_STR, TOK_VOID, TOK_I32, TOK_I64, TOK_U64, TOK_F64, TOK_BOOL, TOK_CHAR, TOK_STR, TOK_VOID,
// 字面量 // 字面量
TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT, TOK_INT_LIT, TOK_FLOAT_LIT, TOK_CHAR_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT,
// 标识符 // 标识符
TOK_IDENT, TOK_UNDERSCORE, TOK_IDENT, TOK_UNDERSCORE,
// 运算符 // 运算符
TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT, TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT,
TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ, TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ,
TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, TOK_AND_AND, TOK_PIPE_PIPE, TOK_PIPE, TOK_BANG,
TOK_ARROW, TOK_TO, TOK_MATCH_ARROW, TOK_ARROW, TOK_TO, TOK_MATCH_ARROW,
TOK_PLUS_EQ, TOK_MINUS_EQ, TOK_STAR_EQ, TOK_SLASH_EQ, TOK_PLUS_EQ, TOK_MINUS_EQ, TOK_STAR_EQ, TOK_SLASH_EQ,
// 分隔符 // 分隔符
+128 -9
View File
@@ -31,6 +31,7 @@ static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg
// === 运算符优先级定义 === // === 运算符优先级定义 ===
typedef enum { typedef enum {
PREC_NONE = 0, PREC_NONE = 0,
PREC_PIPE = 10,
PREC_OR = 20, PREC_OR = 20,
PREC_AND = 30, PREC_AND = 30,
PREC_COMPARE = 40, PREC_COMPARE = 40,
@@ -89,17 +90,59 @@ static AstNode* parse_group(Parser* p, ErrorInfo* error) {
return expr; return expr;
} }
static AstNode* parse_literal(Parser* p) { static AstNode* parse_literal(Parser* p, ErrorInfo* error) {
const Token* t = advance(p); const Token* t = advance(p);
switch (t->kind) { switch (t->kind) {
case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t)); case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t));
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t)); case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t));
case TOK_CHAR_LIT: {
int64_t val = 0;
if (t->length >= 2 && t->start[0] == '\\') {
switch (t->start[1]) {
case 'n': val = '\n'; break;
case 't': val = '\t'; break;
case '\\': val = '\\'; break;
case '\'': val = '\''; break;
default: val = t->start[1]; break;
}
} else {
val = (unsigned char)t->start[0];
}
return ast_make_literal_char(p->arena, (int)val, tok_loc(t));
}
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t)); case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t));
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t)); case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t));
case TOK_STR_LIT: { case TOK_STR_LIT: {
char* str = arena_alloc_impl(p->arena, t->length + 1); char* str = arena_alloc_impl(p->arena, t->length + 1);
memcpy(str, t->start, t->length); memcpy(str, t->start, t->length);
str[t->length] = '\0'; str[t->length] = '\0';
// 字符串插值: "Hello, \(name)!" → "Hello, " + name + "!"
char* interp = strstr(str, "\\(");
if (interp) {
*interp = '\0'; // 截断前半部分
char* pre = str;
char* expr_start = interp + 2; // 跳过 \(
char* close = strchr(expr_start, ')');
if (!close) {
error->message = "字符串插值缺少 ')'"; error->filename = p->filename;
error->line = t->line; error->col = t->col; return NULL;
}
*close = '\0';
char* post = close + 1;
// 生成: pre + expr + post
AstNode* result = ast_make_literal_str(p->arena,
arena_strdup_impl(p->arena, pre, strlen(pre)), tok_loc(t));
// 将插值表达式按标识符解析
AstNode* expr = ast_make_ident(p->arena,
arena_strdup_impl(p->arena, expr_start, strlen(expr_start)), tok_loc(t));
result = ast_make_binary(p->arena, OP_ADD, result, expr, tok_loc(t));
if (post[0] != '\0') {
AstNode* post_str = ast_make_literal_str(p->arena,
arena_strdup_impl(p->arena, post, strlen(post)), tok_loc(t));
result = ast_make_binary(p->arena, OP_ADD, result, post_str, tok_loc(t));
}
return result;
}
return ast_make_literal_str(p->arena, str, tok_loc(t)); return ast_make_literal_str(p->arena, str, tok_loc(t));
} }
default: return NULL; default: return NULL;
@@ -170,12 +213,25 @@ static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
// 函数调用: name(...) // 函数调用: name(...)
if (match(p, TOK_LPAREN)) { if (match(p, TOK_LPAREN)) {
AstNode* args[16]; int arg_count = 0; AstNode* args[16]; const char* arg_names[16]; int arg_count = 0;
bool seen_named = false;
while (peek(p)->kind != TOK_RPAREN && !error->message) { while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) { if (arg_count >= 16) {
error->message = "函数参数过多"; error->filename = p->filename; error->message = "函数参数过多"; error->filename = p->filename;
error->line = peek(p)->line; error->col = peek(p)->col; return NULL; error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
} }
// 命名参数: name: expr
if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) {
const Token* aname = advance(p); advance(p); // 跳过标识符和 ':'
arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length);
seen_named = true;
} else {
if (seen_named) {
error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename;
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
}
arg_names[arg_count] = NULL;
}
args[arg_count] = parse_expr(p, error); args[arg_count] = parse_expr(p, error);
if (!args[arg_count]) return NULL; if (!args[arg_count]) return NULL;
arg_count++; arg_count++;
@@ -185,8 +241,11 @@ static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
const char** name_arr = seen_named
? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*))
: NULL;
return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
arg_arr, arg_count, tok_loc(name)); arg_arr, name_arr, arg_count, tok_loc(name));
} }
return ast_make_ident(p->arena, return ast_make_ident(p->arena,
arena_strdup_impl(p->arena, name->start, name->length), arena_strdup_impl(p->arena, name->start, name->length),
@@ -204,9 +263,10 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error
} else if (tok->kind == TOK_LPAREN) { } else if (tok->kind == TOK_LPAREN) {
left = parse_group(p, error); left = parse_group(p, error);
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
tok->kind == TOK_CHAR_LIT ||
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE || tok->kind == TOK_TRUE || tok->kind == TOK_FALSE ||
tok->kind == TOK_STR_LIT) { tok->kind == TOK_STR_LIT) {
left = parse_literal(p); left = parse_literal(p, error);
} else if (tok->kind == TOK_IDENT) { } else if (tok->kind == TOK_IDENT) {
left = parse_ident_or_call(p, error); left = parse_ident_or_call(p, error);
} else { } else {
@@ -220,6 +280,33 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error
while (!error->message) { while (!error->message) {
TokenKind kind = peek(p)->kind; TokenKind kind = peek(p)->kind;
// 管道: expr |> func(args...) → func(args..., expr)
if (kind == TOK_PIPE) {
Precedence prec = PREC_PIPE;
if (prec <= min_prec) break;
const Token* op = advance(p);
// RHS 必须是函数调用(不带管道时解析)
AstNode* right = parse_expr_prec(p, prec, error);
if (!right) return NULL;
if (right->kind != AST_CALL_EXPR) {
error->message = "管道右侧必须是函数调用"; error->filename = p->filename;
error->line = op->line; error->col = op->col;
return NULL;
}
// 将 left 作为第一个参数插入(F#/Elixir 风格)
if (right->as.call.arg_count >= 16) {
error->message = "管道参数过多"; error->filename = p->filename;
error->line = op->line; error->col = op->col; return NULL;
}
AstNode** new_args = arena_alloc_impl(p->arena, (right->as.call.arg_count + 1) * sizeof(AstNode*));
new_args[0] = left;
memcpy(new_args + 1, right->as.call.args, right->as.call.arg_count * sizeof(AstNode*));
right->as.call.args = new_args;
right->as.call.arg_count++;
left = right;
continue;
}
// 后置字段访问: expr.field 或 expr.method(args) // 后置字段访问: expr.field 或 expr.method(args)
if (kind == TOK_DOT) { if (kind == TOK_DOT) {
advance(p); // 跳过 '.' advance(p); // 跳过 '.'
@@ -229,9 +316,18 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error
// 方法调用: expr.method(args) // 方法调用: expr.method(args)
if (peek(p)->kind == TOK_LPAREN) { if (peek(p)->kind == TOK_LPAREN) {
advance(p); // 跳过 '(' advance(p); // 跳过 '('
AstNode* args[16]; int arg_count = 0; AstNode* args[16]; const char* arg_names[16]; int arg_count = 0;
bool seen_named = false;
while (peek(p)->kind != TOK_RPAREN && !error->message) { while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) {
const Token* aname = advance(p); advance(p);
arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length);
seen_named = true;
} else {
if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
arg_names[arg_count] = NULL;
}
args[arg_count] = parse_expr(p, error); args[arg_count] = parse_expr(p, error);
if (!args[arg_count]) return NULL; if (!args[arg_count]) return NULL;
arg_count++; arg_count++;
@@ -240,7 +336,10 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
left = ast_make_method_call(p->arena, left, member_name, arg_arr, arg_count, tok_loc(field)); const char** name_arr = seen_named
? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*))
: NULL;
left = ast_make_method_call(p->arena, left, member_name, arg_arr, name_arr, arg_count, tok_loc(field));
} else { } else {
left = ast_make_field_access(p->arena, left, member_name, tok_loc(field)); left = ast_make_field_access(p->arena, left, member_name, tok_loc(field));
} }
@@ -276,9 +375,16 @@ static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
// === 类型工具 === // === 类型工具 ===
static TypeKind token_to_type(TokenKind k) { static TypeKind token_to_type(TokenKind k) {
switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64; switch (k) {
case TOK_BOOL: return TYPE_BOOL; case TOK_STR: return TYPE_STR; case TOK_I32: return TYPE_I32;
default: return TYPE_VOID; } case TOK_I64: return TYPE_I64;
case TOK_U64: return TYPE_U64;
case TOK_F64: return TYPE_F64;
case TOK_BOOL: return TYPE_BOOL;
case TOK_CHAR: return TYPE_CHAR;
case TOK_STR: return TYPE_STR;
default: return TYPE_VOID;
}
} }
// === 类型表达式解析(内置类型/结构体名/数组类型)=== // === 类型表达式解析(内置类型/结构体名/数组类型)===
@@ -598,6 +704,19 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
return parse_match_stmt(p, error); return parse_match_stmt(p, error);
} }
if (t->kind == TOK_GUARD) {
// guard expr else { ... } → if !(expr) { ... }
const Token* guard_tok = advance(p);
AstNode* cond = parse_expr(p, error);
if (!cond) return NULL;
if (!expect(p, TOK_ELSE, error, "guard 缺少 'else'")) return NULL;
AstNode* body = parse_block(p, error);
if (!body) return NULL;
// 去糖: if !cond { body }
AstNode* not_cond = ast_make_unary(p->arena, OP_NOT, cond, tok_loc(guard_tok));
return ast_make_if(p->arena, not_cond, body, NULL, tok_loc(guard_tok));
}
if (t->kind == TOK_RETURN) { if (t->kind == TOK_RETURN) {
advance(p); advance(p);
if (match(p, TOK_SEMICOLON)) { if (match(p, TOK_SEMICOLON)) {
+99 -9
View File
@@ -10,13 +10,38 @@ static TypeKind promote(TypeKind a, TypeKind b) {
// 枚举在算术运算中视为 i64 // 枚举在算术运算中视为 i64
if (a == TYPE_ENUM) a = TYPE_I64; if (a == TYPE_ENUM) a = TYPE_I64;
if (b == TYPE_ENUM) b = TYPE_I64; if (b == TYPE_ENUM) b = TYPE_I64;
// char 在算术中提升为 i32
if (a == TYPE_CHAR) a = TYPE_I32;
if (b == TYPE_CHAR) b = TYPE_I32;
if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64; if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64;
if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64; if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64;
if (a == TYPE_U64 || b == TYPE_U64) return TYPE_U64;
if (a == TYPE_I32 || b == TYPE_I32) return TYPE_I32;
if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL; if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL;
return TYPE_ERROR; return TYPE_ERROR;
} }
static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64 || t == TYPE_ENUM; } static bool is_numeric(TypeKind t) {
return t == TYPE_I32 || t == TYPE_I64 || t == TYPE_U64
|| t == TYPE_F64 || t == TYPE_CHAR || t == TYPE_ENUM;
}
// 隐式类型转换规则: 无损加宽允许,有符号→无符号不允许
static bool can_implicit_convert(TypeKind from, TypeKind to) {
if (from == to) return true;
// 枚举视为 i64
if (from == TYPE_ENUM) from = TYPE_I64;
if (to == TYPE_ENUM) to = TYPE_I64;
// char 可转为任意整数
if (from == TYPE_CHAR) return to == TYPE_I32 || to == TYPE_I64 || to == TYPE_U64 || to == TYPE_F64;
// i32 可加宽
if (from == TYPE_I32) return to == TYPE_I64 || to == TYPE_F64;
// i64 可转 f64
if (from == TYPE_I64) return to == TYPE_F64;
// u64 ↔ i64 双向允许(同一位宽,LLVM 同类型)
if (from == TYPE_U64) return to == TYPE_F64 || to == TYPE_I64;
if (from == TYPE_I64) return to == TYPE_F64 || to == TYPE_U64;
return false;
}
static bool is_comparable(TypeKind a, TypeKind b) { static bool is_comparable(TypeKind a, TypeKind b) {
if (a == b) return true; if (a == b) return true;
// 枚举可以参与整数比较 // 枚举可以参与整数比较
@@ -166,6 +191,34 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena*
} }
break; break;
} }
// 命名参数重排序: 将命名 arg 按参数名映射到正确位置
if (node->as.call.arg_names) {
AstNode* reordered[16] = {0};
for (size_t i = 0; i < node->as.call.arg_count; i++) {
if (node->as.call.arg_names[i]) {
// 查找参数名匹配
bool found = false;
for (size_t j = 0; j < sym->param_count; j++) {
if (sym->param_names && sym->param_names[j] &&
strcmp(node->as.call.arg_names[i], sym->param_names[j]) == 0) {
reordered[j] = node->as.call.args[i];
found = true; break;
}
}
if (!found) {
error_add(errors, "<sema>", node->loc.line, node->loc.col,
"函数 '%s' 没有名为 '%s' 的参数",
node->as.call.name, node->as.call.arg_names[i]);
node->type.kind = TYPE_ERROR; return;
}
} else {
// 位置参数保持原位
reordered[i] = node->as.call.args[i];
}
}
// 填充未指定的命名参数(用 NULL 跳过,后续检查会报错)
memcpy(node->as.call.args, reordered, node->as.call.arg_count * sizeof(AstNode*));
}
for (size_t i = 0; i < node->as.call.arg_count; i++) { for (size_t i = 0; i < node->as.call.arg_count; i++) {
analyze_expr(node->as.call.args[i], scope, errors, a); analyze_expr(node->as.call.args[i], scope, errors, a);
TypeKind actual = node->as.call.args[i]->type.kind; TypeKind actual = node->as.call.args[i]->type.kind;
@@ -184,7 +237,10 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena*
actual_name ? actual_name : type_name(actual)); actual_name ? actual_name : type_name(actual));
} }
} else if (actual != expected && } else if (actual != expected &&
!(expected == TYPE_I64 && actual == TYPE_ENUM)) { !(expected == TYPE_I64 && actual == TYPE_ENUM) &&
!can_implicit_convert(actual, expected) &&
!(actual == TYPE_I64 && node->as.call.args[i]->kind == AST_LITERAL_EXPR
&& (expected == TYPE_I32 || expected == TYPE_U64 || expected == TYPE_CHAR))) {
error_add(errors, "<sema>", node->loc.line, node->loc.col, error_add(errors, "<sema>", node->loc.line, node->loc.col,
"参数 %zu 类型不匹配: 期望 '%s',得到 '%s'", "参数 %zu 类型不匹配: 期望 '%s',得到 '%s'",
i + 1, type_name(expected), type_name(actual)); i + 1, type_name(expected), type_name(actual));
@@ -374,13 +430,39 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena*
node->as.method_call.arg_count); node->as.method_call.arg_count);
node->type.kind = TYPE_ERROR; break; node->type.kind = TYPE_ERROR; break;
} }
// 命名参数重排序(同 CALL_EXPR 逻辑)
if (node->as.method_call.arg_names) {
AstNode* reordered[16] = {0};
for (size_t i = 0; i < node->as.method_call.arg_count; i++) {
if (node->as.method_call.arg_names[i]) {
bool found = false;
for (size_t j = 1; j < sym->param_count; j++) { // skip self
if (sym->param_names && sym->param_names[j] &&
strcmp(node->as.method_call.arg_names[i], sym->param_names[j]) == 0) {
reordered[j - 1] = node->as.method_call.args[i];
found = true; break;
}
}
if (!found) {
error_add(errors, "<sema>", node->loc.line, node->loc.col,
"方法 '%s' 没有名为 '%s' 的参数",
node->as.method_call.method_name, node->as.method_call.arg_names[i]);
node->type.kind = TYPE_ERROR; return;
}
} else {
reordered[i] = node->as.method_call.args[i];
}
}
memcpy(node->as.method_call.args, reordered, node->as.method_call.arg_count * sizeof(AstNode*));
}
// 对每个参数进行类型检查(跳过 self 参数,即 sym->param_types[0] 是 self 的类型) // 对每个参数进行类型检查(跳过 self 参数,即 sym->param_types[0] 是 self 的类型)
for (size_t i = 0; i < node->as.method_call.arg_count; i++) { for (size_t i = 0; i < node->as.method_call.arg_count; i++) {
analyze_expr(node->as.method_call.args[i], scope, errors, a); analyze_expr(node->as.method_call.args[i], scope, errors, a);
TypeKind actual = node->as.method_call.args[i]->type.kind; TypeKind actual = node->as.method_call.args[i]->type.kind;
TypeKind expected = sym->param_types[i + 1]; TypeKind expected = sym->param_types[i + 1];
if (actual != TYPE_ERROR && actual != expected && if (actual != TYPE_ERROR && actual != expected &&
!(expected == TYPE_I64 && actual == TYPE_ENUM)) { !(expected == TYPE_I64 && actual == TYPE_ENUM) &&
!can_implicit_convert(actual, expected)) {
if (expected == TYPE_STRUCT) { if (expected == TYPE_STRUCT) {
// 结构体类型参数:比较具体类型名 // 结构体类型参数:比较具体类型名
const char* actual_name = node->as.method_call.args[i]->type.struct_name; const char* actual_name = node->as.method_call.args[i]->type.struct_name;
@@ -501,10 +583,12 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
for (size_t i = 0; i < node->as.program.fn_count; i++) { for (size_t i = 0; i < node->as.program.fn_count; i++) {
AstNode* fn = node->as.program.functions[i]; AstNode* fn = node->as.program.functions[i];
TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind)); TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind));
const char** pnames = (const char**)arena_alloc_impl(a, fn->as.function.param_count * sizeof(const char*));
const char** pstruct_names = (const char**)arena_alloc_impl(a, fn->as.function.param_count * sizeof(const char*)); const char** pstruct_names = (const char**)arena_alloc_impl(a, fn->as.function.param_count * sizeof(const char*));
for (size_t j = 0; j < fn->as.function.param_count; j++) { for (size_t j = 0; j < fn->as.function.param_count; j++) {
TypeKind pt = fn->as.function.params[j]->as.parameter.type; TypeKind pt = fn->as.function.params[j]->as.parameter.type;
const char* psn = fn->as.function.params[j]->as.parameter.struct_type_name; const char* psn = fn->as.function.params[j]->as.parameter.struct_type_name;
const char* pn = fn->as.function.params[j]->as.parameter.name;
// 解析参数类型的别名 // 解析参数类型的别名
if (psn) { if (psn) {
Symbol* as = scope_lookup(scope, psn); Symbol* as = scope_lookup(scope, psn);
@@ -514,6 +598,7 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
} }
} }
pts[j] = pt; pts[j] = pt;
pnames[j] = pn;
pstruct_names[j] = psn; pstruct_names[j] = psn;
} }
// 解析返回类型的别名 // 解析返回类型的别名
@@ -528,7 +613,7 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
} }
scope_insert_function(scope, a, fn->as.function.name, scope_insert_function(scope, a, fn->as.function.name,
ret_t, ret_sn, ret_t, ret_sn,
pts, pstruct_names, pts, pnames, pstruct_names,
fn->as.function.param_count); fn->as.function.param_count);
} }
// 第三遍:分析每个函数体 // 第三遍:分析每个函数体
@@ -618,7 +703,12 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
} else { } else {
var_type = node->as.let_stmt.annot_type; var_type = node->as.let_stmt.annot_type;
} }
if (inferred != TYPE_ERROR && inferred != var_type) { bool literal_to_int = (inferred == TYPE_I64
&& node->as.let_stmt.init->kind == AST_LITERAL_EXPR
&& (var_type == TYPE_I32 || var_type == TYPE_U64 || var_type == TYPE_CHAR));
if (inferred != TYPE_ERROR && inferred != var_type
&& !can_implicit_convert(inferred, var_type)
&& !literal_to_int) {
error_add(errors, "<sema>", node->loc.line, node->loc.col, error_add(errors, "<sema>", node->loc.line, node->loc.col,
"变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'", "变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'",
node->as.let_stmt.name, node->as.let_stmt.name,
@@ -806,13 +896,13 @@ void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) {
// 注册内置函数 // 注册内置函数
TypeKind params_i64[] = {TYPE_I64}; TypeKind params_i64[] = {TYPE_I64};
scope_insert_function(global_scope, arena, "print_i64", TYPE_VOID, NULL, params_i64, NULL, 1); scope_insert_function(global_scope, arena, "print_i64", TYPE_VOID, NULL, params_i64, NULL, NULL, 1);
TypeKind params_f64[] = {TYPE_F64}; TypeKind params_f64[] = {TYPE_F64};
scope_insert_function(global_scope, arena, "print_f64", TYPE_VOID, NULL, params_f64, NULL, 1); scope_insert_function(global_scope, arena, "print_f64", TYPE_VOID, NULL, params_f64, NULL, NULL, 1);
TypeKind params_bool[] = {TYPE_BOOL}; TypeKind params_bool[] = {TYPE_BOOL};
scope_insert_function(global_scope, arena, "print_bool", TYPE_VOID, NULL, params_bool, NULL, 1); scope_insert_function(global_scope, arena, "print_bool", TYPE_VOID, NULL, params_bool, NULL, NULL, 1);
TypeKind params_str[] = {TYPE_STR}; TypeKind params_str[] = {TYPE_STR};
scope_insert_function(global_scope, arena, "print_str", TYPE_VOID, NULL, params_str, NULL, 1); scope_insert_function(global_scope, arena, "print_str", TYPE_VOID, NULL, params_str, NULL, NULL, 1);
analyze_node(ast, global_scope, errors, arena); analyze_node(ast, global_scope, errors, arena);
} }
+6 -4
View File
@@ -30,7 +30,7 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
if (!sym) return NULL; if (!sym) return NULL;
sym->name = name; sym->kind = kind; sym->type = type; sym->name = name; sym->kind = kind; sym->type = type;
sym->is_mut = false; sym->return_type = TYPE_VOID; sym->is_mut = false; sym->return_type = TYPE_VOID;
sym->param_types = NULL; sym->param_count = 0; sym->param_types = NULL; sym->param_names = NULL; sym->param_count = 0;
sym->struct_field_names = NULL; sym->struct_field_names = NULL;
sym->struct_field_types = NULL; sym->struct_field_types = NULL;
sym->struct_field_count = 0; sym->struct_field_count = 0;
@@ -46,7 +46,8 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
TypeKind ret, const char* ret_struct_name, TypeKind ret, const char* ret_struct_name,
TypeKind* pt, const char** pstruct_names, size_t pc) { TypeKind* pt, const char** pnames,
const char** pstruct_names, size_t pc) {
if (scope->head) { if (scope->head) {
for (Symbol* sym = scope->head; sym; sym = sym->next) { for (Symbol* sym = scope->head; sym; sym = sym->next) {
if (strcmp(sym->name, name) == 0) return NULL; if (strcmp(sym->name, name) == 0) return NULL;
@@ -58,6 +59,7 @@ Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
sym->return_type = ret; sym->return_type = ret;
sym->return_struct_type_name = ret_struct_name; sym->return_struct_type_name = ret_struct_name;
sym->param_types = pt; sym->param_types = pt;
sym->param_names = pnames;
sym->param_struct_names = pstruct_names; sym->param_struct_names = pstruct_names;
sym->param_count = pc; sym->param_count = pc;
sym->struct_field_names = NULL; sym->struct_field_names = NULL;
@@ -85,7 +87,7 @@ Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name,
if (!sym) return NULL; if (!sym) return NULL;
sym->name = name; sym->kind = SYM_STRUCT; sym->type = TYPE_STRUCT; sym->name = name; sym->kind = SYM_STRUCT; sym->type = TYPE_STRUCT;
sym->is_mut = false; sym->return_type = TYPE_VOID; sym->is_mut = false; sym->return_type = TYPE_VOID;
sym->param_types = NULL; sym->param_count = 0; sym->param_types = NULL; sym->param_names = NULL; sym->param_count = 0;
sym->struct_field_names = fnames; sym->struct_field_names = fnames;
sym->struct_field_types = ftypes; sym->struct_field_types = ftypes;
sym->struct_field_struct_names = fstruct_names; sym->struct_field_struct_names = fstruct_names;
@@ -130,7 +132,7 @@ Symbol* scope_insert_enum(Scope* scope, void* alloc, const char* name,
if (!sym) return NULL; if (!sym) return NULL;
sym->name = name; sym->kind = SYM_ENUM; sym->type = TYPE_ENUM; sym->name = name; sym->kind = SYM_ENUM; sym->type = TYPE_ENUM;
sym->is_mut = false; sym->return_type = TYPE_VOID; sym->is_mut = false; sym->return_type = TYPE_VOID;
sym->param_types = NULL; sym->param_count = 0; sym->param_types = NULL; sym->param_names = NULL; sym->param_count = 0;
sym->struct_field_names = vnames; sym->struct_field_names = vnames;
sym->struct_field_types = NULL; sym->struct_field_types = NULL;
sym->struct_field_struct_names = NULL; sym->struct_field_struct_names = NULL;
+3 -1
View File
@@ -15,6 +15,7 @@ typedef struct Symbol {
TypeKind return_type; TypeKind return_type;
const char* return_struct_type_name; // 返回类型为 struct 时的类型名 const char* return_struct_type_name; // 返回类型为 struct 时的类型名
TypeKind* param_types; TypeKind* param_types;
const char** param_names; // 参数名(用于命名参数匹配)
const char** param_struct_names; // 参数为 struct 时的类型名 const char** param_struct_names; // 参数为 struct 时的类型名
size_t param_count; size_t param_count;
// 结构体特有(SYM_STRUCT // 结构体特有(SYM_STRUCT
@@ -52,7 +53,8 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
// 插入函数符号 // 插入函数符号
Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
TypeKind ret, const char* ret_struct_name, TypeKind ret, const char* ret_struct_name,
TypeKind* pt, const char** pstruct_names, size_t pc); TypeKind* pt, const char** pnames,
const char** pstruct_names, size_t pc);
// 插入结构体符号 // 插入结构体符号
Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name, Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name,
+9
View File
@@ -0,0 +1,9 @@
fn main() -> i64 {
let a: i32 = 100; // i64 字面量 → i32 变量(隐式转换)
let b: u64 = 999; // i64 字面量 → u64 变量(隐式转换)
let c: char = 'A'; // 字符字面量
print_i64(a); // 100 (i32 自动加宽为 i64)
print_i64(c); // 65 (char 自动加宽为 i64)
print_i64(b); // 999 (u64 传 i64 参数 — 允许)
return 0;
}
+11
View File
@@ -0,0 +1,11 @@
fn abs(x: i64) -> i64 {
guard x >= 0 else { return -x; }
return x;
}
fn main() -> i64 {
print_i64(abs(10)); // 10
print_i64(abs(-5)); // 5
print_i64(abs(0)); // 0
return 0;
}
+17
View File
@@ -0,0 +1,17 @@
fn draw_rect(x: i64, y: i64, w: i64, h: i64) -> i64 {
print_i64(x);
print_i64(y);
print_i64(w);
print_i64(h);
return 0;
}
fn main() -> i64 {
// 位置参数
draw_rect(0, 0, 100, 200);
// 命名参数(任意顺序)
draw_rect(w: 10, h: 20, x: 1, y: 2);
return 0;
}
+14
View File
@@ -0,0 +1,14 @@
fn double(x: i64) -> i64 {
return x * 2;
}
fn add(a: i64, b: i64) -> i64 {
return a + b;
}
fn main() -> i64 {
// 管道: 10 |> double() |> add(5) → add(double(10), 5) = 25
let result = 10 |> double() |> add(5);
print_i64(result); // 25
return 0;
}
+6
View File
@@ -0,0 +1,6 @@
fn main() -> i64 {
let name = "World";
let greeting = "Hello, \(name)!";
print_str(greeting); // "Hello, World!"
return 0;
}
+3 -3
View File
@@ -258,7 +258,7 @@ void test_codegen_enum() {
AstNode* c_ident = ast_make_ident(&a, "c", loc_at(1, 1)); AstNode* c_ident = ast_make_ident(&a, "c", loc_at(1, 1));
c_ident->type.kind = TYPE_ENUM; c_ident->type.kind = TYPE_ENUM;
AstNode* args[] = { c_ident }; AstNode* args[] = { c_ident };
AstNode* print_call = ast_make_call(&a, "print_i64", args, 1, loc_at(1, 1)); AstNode* print_call = ast_make_call(&a, "print_i64", args, NULL, 1, loc_at(1, 1));
/* return 0; */ /* return 0; */
AstNode* ret = ast_make_return(&a, ast_make_literal_i64(&a, 0, loc_at(1, 1)), loc_at(1, 1)); AstNode* ret = ast_make_return(&a, ast_make_literal_i64(&a, 0, loc_at(1, 1)), loc_at(1, 1));
@@ -323,7 +323,7 @@ void test_codegen_array() {
// print_i64(arr[0]); // print_i64(arr[0]);
AstNode* args[] = { idx_expr }; AstNode* args[] = { idx_expr };
AstNode* print_call = ast_make_call(&a, "print_i64", args, 1, loc_at(1, 1)); AstNode* print_call = ast_make_call(&a, "print_i64", args, NULL, 1, loc_at(1, 1));
// return 0; // return 0;
AstNode* ret = ast_make_return(&a, ast_make_literal_i64(&a, 0, loc_at(1, 1)), loc_at(1, 1)); AstNode* ret = ast_make_return(&a, ast_make_literal_i64(&a, 0, loc_at(1, 1)), loc_at(1, 1));
@@ -393,7 +393,7 @@ void test_codegen_method_call() {
AstNode* p_ident = ast_make_ident(&a, "p", loc_at(1, 1)); AstNode* p_ident = ast_make_ident(&a, "p", loc_at(1, 1));
p_ident->type.kind = TYPE_STRUCT; p_ident->type.kind = TYPE_STRUCT;
p_ident->type.struct_name = "Point"; p_ident->type.struct_name = "Point";
AstNode* method_call = ast_make_method_call(&a, p_ident, "get_x", NULL, 0, loc_at(1, 1)); AstNode* method_call = ast_make_method_call(&a, p_ident, "get_x", NULL, NULL, 0, loc_at(1, 1));
method_call->type.kind = TYPE_I64; method_call->type.kind = TYPE_I64;
AstNode* ret_main = ast_make_return(&a, method_call, loc_at(1, 1)); AstNode* ret_main = ast_make_return(&a, method_call, loc_at(1, 1));