From 90d081c3fd56a33dd34d1f9a062387c02ea01e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Sat, 6 Jun 2026 18:57:07 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20P0=E5=AE=A1=E6=9F=A5=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=20P2=E8=A1=A5=E5=BC=BA=20=E2=80=94=20parser.c=20=E6=8B=86?= =?UTF-8?q?=E5=88=86=20+=20=E6=B5=8B=E8=AF=95=E6=89=A9=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P0-③: parser.c 1211行 → parser.c 662行 + expr.c 498行 + parse_internal.h 71行 - expr.c: 表达式解析 (Pratt主循环/字面量/标识符/类型/运算符) - parser.c: 语句/声明/程序入口 (block/match/let/if/while/for/guard/fn/parse) - parse_internal.h: 共享 Parser struct + 内联辅助 + 向前声明 P2-①: parser 测试 5函数→20函数, 15断言→54断言 - 新增: struct声明、字面量类型、优先级链、guard去糖、命名参数 - 新增: 字段访问、方法调用、match、enum声明、for去糖、管道 - 新增: 类型别名、trait声明、数组类型、if表达式 Co-Authored-By: Claude Opus 4.7 --- src/parser/expr.c | 498 +++++++++++++++++++++++++++++++ src/parser/parse_internal.h | 71 +++++ src/parser/parser.c | 567 +----------------------------------- test/test_parser.c | 160 +++++++++- 4 files changed, 736 insertions(+), 560 deletions(-) create mode 100644 src/parser/expr.c create mode 100644 src/parser/parse_internal.h diff --git a/src/parser/expr.c b/src/parser/expr.c new file mode 100644 index 0000000..53e08b0 --- /dev/null +++ b/src/parser/expr.c @@ -0,0 +1,498 @@ +#include "parse_internal.h" +#include +#include + +// 递归深度(程序级共享) +int parse_depth = 0; + +// === 运算符优先级 → Precedence 映射 === +Precedence tok_to_prec(TokenKind kind) { + switch (kind) { + case TOK_PIPE_PIPE: return PREC_OR; + case TOK_AND_AND: return PREC_AND; + case TOK_EQ_EQ: case TOK_BANG_EQ: + case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE; + case TOK_PLUS: case TOK_MINUS: return PREC_TERM; + case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR; + default: return PREC_NONE; + } +} + +// === 运算符 → BinaryOp 映射 === +BinaryOp tok_to_binop(TokenKind kind) { + switch (kind) { + case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB; + case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV; + case TOK_PERCENT: return OP_MOD; + case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE; + case TOK_LT: return OP_LT; case TOK_GT: return OP_GT; + case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE; + case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR; + default: return OP_ADD; + } +} + +// === 前缀解析 === +AstNode* parse_unary(Parser* p, ErrorInfo* error) { + const Token* op = advance(p); + AstNode* operand = parse_expr_prec(p, PREC_UNARY, error); + if (!operand) return NULL; + BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT; + return ast_make_unary(p->arena, uop, operand, tok_loc(op)); +} + +AstNode* parse_group(Parser* p, ErrorInfo* error) { + advance(p); // 跳过 ( + AstNode* expr = parse_expr(p, error); + if (!expr) return NULL; + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + return expr; +} + +AstNode* parse_literal(Parser* p, ErrorInfo* error) { + const Token* t = advance(p); + switch (t->kind) { + case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t)); + case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t)); + case TOK_CHAR_LIT: { + int64_t val = 0; + if (t->length >= 2 && t->start[0] == '\\') { + switch (t->start[1]) { + case 'n': val = '\n'; break; + case 't': val = '\t'; break; + case '\\': val = '\\'; break; + case '\'': val = '\''; break; + default: val = t->start[1]; break; + } + } else { + val = (unsigned char)t->start[0]; + } + return ast_make_literal_char(p->arena, (int)val, tok_loc(t)); + } + case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t)); + case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t)); + case TOK_STR_LIT: { + char* str = arena_alloc_impl(p->arena, t->length + 1); + memcpy(str, t->start, t->length); + str[t->length] = '\0'; + // 字符串插值: "Hello, \(name)!" → "Hello, " + name + "!" + char* interp = strstr(str, "\\("); + if (interp) { + *interp = '\0'; // 截断前半部分 + char* pre = str; + char* expr_start = interp + 2; // 跳过 \( + char* close = strchr(expr_start, ')'); + if (!close) { + error->message = "字符串插值缺少 ')'"; error->filename = p->filename; + error->line = t->line; error->col = t->col; return NULL; + } + *close = '\0'; + char* post = close + 1; + // 生成: pre + expr + post + AstNode* result = ast_make_literal_str(p->arena, + arena_strdup_impl(p->arena, pre, strlen(pre)), tok_loc(t)); + // 将插值表达式按标识符解析 + AstNode* expr = ast_make_ident(p->arena, + arena_strdup_impl(p->arena, expr_start, strlen(expr_start)), tok_loc(t)); + result = ast_make_binary(p->arena, OP_ADD, result, expr, tok_loc(t)); + if (post[0] != '\0') { + AstNode* post_str = ast_make_literal_str(p->arena, + arena_strdup_impl(p->arena, post, strlen(post)), tok_loc(t)); + result = ast_make_binary(p->arena, OP_ADD, result, post_str, tok_loc(t)); + } + return result; + } + return ast_make_literal_str(p->arena, str, tok_loc(t)); + } + default: return NULL; + } +} + +// === 结构体初始化解析: Name { field: val, ... } === +AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) { + advance(p); // 跳过 '{' + const char* fnames[32]; + AstNode* fvals[32]; + int fcount = 0; + + while (peek(p)->kind != TOK_RBRACE && !error->message) { + if (fcount >= 32) { error->message = "结构体初始化字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } + const Token* fname = expect(p, TOK_IDENT, error, "字段名"); + if (!fname) return NULL; + if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; + AstNode* val = parse_expr(p, error); + if (!val) return NULL; + + fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length); + fvals[fcount] = val; + fcount++; + + if (peek(p)->kind == TOK_COMMA) advance(p); + else break; + } + if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; + + const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*)); + memcpy(n_arr, fnames, fcount * sizeof(const char*)); + AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*)); + memcpy(v_arr, fvals, fcount * sizeof(AstNode*)); + + return ast_make_struct_init(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + n_arr, v_arr, fcount, tok_loc(name)); +} + +// === 标识符 / 函数调用 / 结构体初始化 === +AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) { + const Token* name = advance(p); + + // 枚举变体或模块函数: Name::Variant 或 Name::fn + if (peek(p)->kind == TOK_COLON_COLON) { + advance(p); // 跳过 :: + const Token* variant = expect(p, TOK_IDENT, error, "枚举变体名"); + if (!variant) return NULL; + // Name::fn 或 Name::Variant 或 Name::Variant(payload) + if (peek(p)->kind == TOK_LPAREN) { + // 前进探测: 检查括号内是否有多参数或命名参数(→函数调用)还是单表达式(→枚举payload) + size_t probe = p->pos + 1; + int paren_depth = 1; + bool has_comma = false, has_named = false; + while (paren_depth > 0 && p->tokens[probe].kind != TOK_EOF) { + if (p->tokens[probe].kind == TOK_LPAREN) paren_depth++; + else if (p->tokens[probe].kind == TOK_RPAREN) { paren_depth--; if (paren_depth == 0) break; } + else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COMMA) has_comma = true; + else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COLON) has_named = true; + probe++; + } + if (has_comma || has_named) { + // 模块函数调用: Name::fn(a, b) 或 Name::fn(x: 1) + advance(p); // 跳过 '(' + AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; + bool seen_named = false; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } + if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { + const Token* aname = advance(p); advance(p); + arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); + seen_named = true; + } else { + if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } + arg_names[arg_count] = NULL; + } + args[arg_count] = parse_expr(p, error); + if (!args[arg_count]) return NULL; + arg_count++; + if (peek(p)->kind == TOK_COMMA) advance(p); else break; + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); + memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); + const char** name_arr = seen_named + ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) + : NULL; + char* full_name = arena_alloc_impl(p->arena, name->length + variant->length + 4); + sprintf(full_name, "%.*s::%.*s", name->length, name->start, variant->length, variant->start); + return ast_make_call(p->arena, full_name, arg_arr, name_arr, arg_count, tok_loc(name)); + } + } + // 枚举 payload: Name::Variant 或 Name::Variant(expr) + AstNode* payload = NULL; + if (match(p, TOK_LPAREN)) { + payload = parse_expr(p, error); + if (!payload) return NULL; + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + } + return ast_make_enum_variant(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + arena_strdup_impl(p->arena, variant->start, variant->length), + payload, tok_loc(name)); + } + + // 结构体初始化: Name { field: val, ... } + if (peek(p)->kind == TOK_LBRACE) { + const Token* after_brace = &p->tokens[p->pos + 1]; + if (after_brace->kind == TOK_IDENT) { + const Token* after_fname = &p->tokens[p->pos + 2]; + if (after_fname->kind == TOK_COLON) { + return parse_struct_init(p, name, error); + } + } + } + + // 函数调用: name(...) + if (match(p, TOK_LPAREN)) { + AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; + bool seen_named = false; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + if (arg_count >= 16) { + error->message = "函数参数过多"; error->filename = p->filename; + error->line = peek(p)->line; error->col = peek(p)->col; return NULL; + } + // 命名参数: name: expr + if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { + const Token* aname = advance(p); advance(p); // 跳过标识符和 ':' + arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); + seen_named = true; + } else { + if (seen_named) { + error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; + error->line = peek(p)->line; error->col = peek(p)->col; return NULL; + } + arg_names[arg_count] = NULL; + } + args[arg_count] = parse_expr(p, error); + if (!args[arg_count]) return NULL; + arg_count++; + if (peek(p)->kind == TOK_COMMA) advance(p); + else break; + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); + memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); + const char** name_arr = seen_named + ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) + : NULL; + return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), + arg_arr, name_arr, arg_count, tok_loc(name)); + } + return ast_make_ident(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + tok_loc(name)); +} + +// === Pratt 主循环 === +AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { + const Token* tok = peek(p); + AstNode* left = NULL; + + // 前缀解析 + if (tok->kind == TOK_IF) { + const Token* if_tok = advance(p); + // if let: if let Pattern = expr { then } else { else } → 去糖为 let+if + if (peek(p)->kind == TOK_LET) { + advance(p); // 跳过 let + // 解析模式: Enum::Variant 或 Enum::Variant(var) + AstNode* pattern = parse_expr(p, error); // 解析枚举变体 + if (!pattern) return NULL; + if (!expect(p, TOK_ASSIGN, error, "if let 缺少 '='")) return NULL; + AstNode* match_expr = parse_expr(p, error); + if (!match_expr) return NULL; + AstNode* then_block = parse_block(p, error); + if (!then_block) return NULL; + AstNode* else_block = NULL; + if (match(p, TOK_ELSE)) { + if (peek(p)->kind == TOK_IF) + else_block = parse_expr_prec(p, min_prec, error); + else + else_block = parse_block(p, error); + if (!else_block) return NULL; + } + // 去糖: { let __match = expr; if __match == pattern { then } else { else } } + static int iflet_counter = 0; + char vname_buf[32]; + snprintf(vname_buf, sizeof(vname_buf), "__iflet_%d", iflet_counter++); + const char* vname = arena_strdup_impl(p->arena, vname_buf, strlen(vname_buf)); + AstNode* let_stmt = ast_make_let(p->arena, + vname, TYPE_UNKNOWN, + false, false, match_expr, NULL, 0, NULL, 0, tok_loc(if_tok)); + AstNode* cond = ast_make_binary(p->arena, OP_EQ, + ast_make_ident(p->arena, vname, tok_loc(if_tok)), + pattern, tok_loc(if_tok)); + AstNode* if_stmt = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); + AstNode* stmts[2] = { let_stmt, if_stmt }; + AstNode** arr = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); + memcpy(arr, stmts, 2 * sizeof(AstNode*)); + left = ast_make_block(p->arena, arr, 2, tok_loc(if_tok)); + } else { + // if-expr: if cond { then } else { else } + AstNode* cond = parse_expr(p, error); + if (!cond) return NULL; + AstNode* then_block = parse_block(p, error); + if (!then_block) return NULL; + AstNode* else_block = NULL; + if (match(p, TOK_ELSE)) { + if (peek(p)->kind == TOK_IF) + else_block = parse_expr_prec(p, min_prec, error); + else + else_block = parse_block(p, error); + if (!else_block) return NULL; + } + left = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); + } + } else if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { + left = parse_unary(p, error); + } else if (tok->kind == TOK_LPAREN) { + left = parse_group(p, error); + } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || + tok->kind == TOK_CHAR_LIT || + tok->kind == TOK_TRUE || tok->kind == TOK_FALSE || + tok->kind == TOK_STR_LIT) { + left = parse_literal(p, error); + } else if (tok->kind == TOK_IDENT) { + left = parse_ident_or_call(p, error); + } else { + error->message = "无法识别的表达式"; error->filename = p->filename; + error->line = tok->line; error->col = tok->col; + return NULL; + } + if (!left) return NULL; + + // 中缀/后置解析循环 + while (!error->message) { + TokenKind kind = peek(p)->kind; + + // 管道: expr |> func(args...) → func(args..., expr) + if (kind == TOK_PIPE) { + Precedence prec = PREC_PIPE; + if (prec <= min_prec) break; + const Token* op = advance(p); + // RHS 必须是函数调用(不带管道时解析) + AstNode* right = parse_expr_prec(p, prec, error); + if (!right) return NULL; + if (right->kind != AST_CALL_EXPR) { + error->message = "管道右侧必须是函数调用"; error->filename = p->filename; + error->line = op->line; error->col = op->col; + return NULL; + } + // 将 left 作为第一个参数插入(F#/Elixir 风格) + if (right->as.call.arg_count >= 16) { + error->message = "管道参数过多"; error->filename = p->filename; + error->line = op->line; error->col = op->col; return NULL; + } + AstNode** new_args = arena_alloc_impl(p->arena, (right->as.call.arg_count + 1) * sizeof(AstNode*)); + new_args[0] = left; + memcpy(new_args + 1, right->as.call.args, right->as.call.arg_count * sizeof(AstNode*)); + right->as.call.args = new_args; + right->as.call.arg_count++; + left = right; + continue; + } + + // 后置字段访问: expr.field 或 expr.method(args) + if (kind == TOK_DOT) { + advance(p); // 跳过 '.' + const Token* field = expect(p, TOK_IDENT, error, "缺少字段名"); + if (!field) return NULL; + const char* member_name = arena_strdup_impl(p->arena, field->start, field->length); + // 方法调用: expr.method(args) + if (peek(p)->kind == TOK_LPAREN) { + advance(p); // 跳过 '(' + AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; + bool seen_named = false; + while (peek(p)->kind != TOK_RPAREN && !error->message) { + if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } + if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { + const Token* aname = advance(p); advance(p); + arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); + seen_named = true; + } else { + if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } + arg_names[arg_count] = NULL; + } + args[arg_count] = parse_expr(p, error); + if (!args[arg_count]) return NULL; + arg_count++; + if (peek(p)->kind == TOK_COMMA) advance(p); else break; + } + if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; + AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); + memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); + const char** name_arr = seen_named + ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) + : NULL; + left = ast_make_method_call(p->arena, left, member_name, arg_arr, name_arr, arg_count, tok_loc(field)); + } else { + left = ast_make_field_access(p->arena, left, member_name, tok_loc(field)); + } + continue; + } + + // 后置索引: expr[expr] + if (kind == TOK_LBRACKET) { + const Token* lbrack = advance(p); // 跳过 '[' + AstNode* index = parse_expr(p, error); + if (!index) return NULL; + if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL; + left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack)); + continue; + } + + // 中缀运算符 + Precedence prec = tok_to_prec(kind); + if (prec <= min_prec) break; + + const Token* op = advance(p); + AstNode* right = parse_expr_prec(p, prec, error); + if (!right) return NULL; + left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, tok_loc(op)); + } + + return left; +} + +AstNode* parse_expr(Parser* p, ErrorInfo* error) { + return parse_expr_prec(p, PREC_NONE, error); +} + +// === 类型工具 === +TypeKind token_to_type(TokenKind k) { + switch (k) { + case TOK_I32: return TYPE_I32; + case TOK_I64: return TYPE_I64; + case TOK_U64: return TYPE_U64; + case TOK_F64: return TYPE_F64; + case TOK_BOOL: return TYPE_BOOL; + case TOK_CHAR: return TYPE_CHAR; + case TOK_STR: return TYPE_STR; + default: return TYPE_VOID; + } +} + +// === 类型表达式解析(内置类型/结构体名/数组类型)=== +// 数组支持后置语法: Type[N] +TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) { + const Token* t = peek(p); + TypeInfo ti = {0}; + + // Self 类型(trait 中引用实现者自身类型) + if (t->kind == TOK_SELF) { + advance(p); + ti.kind = TYPE_STRUCT; + ti.struct_name = "Self"; + return ti; + } + + // 解析基础类型 + if (tok_is_type(t->kind)) { + advance(p); + ti.kind = token_to_type(t->kind); + } else if (t->kind == TOK_IDENT) { + advance(p); + ti.kind = TYPE_STRUCT; + ti.struct_name = arena_strdup_impl(p->arena, t->start, t->length); + } else { + error->message = "无效的类型"; error->filename = p->filename; + error->line = t->line; error->col = t->col; + ti.kind = TYPE_ERROR; + return ti; + } + + // 后置数组维度: Type[N] → TYPE_ARRAY + if (peek(p)->kind == TOK_LBRACKET) { + advance(p); // 跳过 '[' + const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量"); + if (!size_tok) { ti.kind = TYPE_ERROR; return ti; } + int64_t size = tok_int_value(size_tok); + if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) { + ti.kind = TYPE_ERROR; return ti; + } + TypeInfo arr_ti = {0}; + arr_ti.kind = TYPE_ARRAY; + arr_ti.element_type = ti.kind; + arr_ti.element_struct_name = ti.struct_name; + arr_ti.array_size = size; + return arr_ti; + } + + return ti; +} diff --git a/src/parser/parse_internal.h b/src/parser/parse_internal.h new file mode 100644 index 0000000..0c8ac45 --- /dev/null +++ b/src/parser/parse_internal.h @@ -0,0 +1,71 @@ +#ifndef PARSE_INTERNAL_H +#define PARSE_INTERNAL_H + +#include "parser.h" +#include "lexer.h" +#include "ast.h" +#include "arena.h" +#include "l_lang.h" + +// === Parser 状态 === +typedef struct { + const Token* tokens; + size_t count; + size_t pos; + const char* filename; + Arena* arena; +} Parser; + +// 递归深度限制 +extern int parse_depth; +#define MAX_PARSE_DEPTH 1000 + +// === 内联辅助 === +static inline const Token* peek(const Parser* p) { return &p->tokens[p->pos]; } +static inline const Token* advance(Parser* p) { return &p->tokens[p->pos++]; } +static inline bool match(Parser* p, TokenKind k) { + if (peek(p)->kind == k) { p->pos++; return true; } + return false; +} +static inline const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) { + if (peek(p)->kind == k) return advance(p); + e->message = msg; e->filename = p->filename; + e->line = peek(p)->line; e->col = peek(p)->col; + return NULL; +} + +// === 优先级 === +typedef enum { + PREC_NONE = 0, + PREC_PIPE = 10, + PREC_OR = 20, + PREC_AND = 30, + PREC_COMPARE = 40, + PREC_TERM = 50, + PREC_FACTOR = 60, + PREC_UNARY = 70, + PREC_POSTFIX = 80, +} Precedence; + +// === 向前声明 === +AstNode* parse(Arena* a, const Token* tokens, size_t count, + const char* filename, ErrorInfo* error); +AstNode* parse_expr(Parser* p, ErrorInfo* error); +AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error); +AstNode* parse_block(Parser* p, ErrorInfo* error); +AstNode* parse_statement(Parser* p, ErrorInfo* error); +AstNode* parse_function(Parser* p, bool is_pub, ErrorInfo* error); +AstNode* parse_struct_decl(Parser* p, ErrorInfo* error); +TypeInfo parse_type_expr(Parser* p, ErrorInfo* error); + +// === 表达式层函数(供 parser.c 调用)=== +Precedence tok_to_prec(TokenKind kind); +BinaryOp tok_to_binop(TokenKind kind); +TypeKind token_to_type(TokenKind k); +AstNode* parse_unary(Parser* p, ErrorInfo* error); +AstNode* parse_group(Parser* p, ErrorInfo* error); +AstNode* parse_literal(Parser* p, ErrorInfo* error); +AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error); +AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error); + +#endif diff --git a/src/parser/parser.c b/src/parser/parser.c index 9fb5313..28d5036 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -1,549 +1,10 @@ -#include "parser.h" -#include "lexer.h" +#include "parse_internal.h" #include #include #include -typedef struct { - const Token* tokens; - size_t count; - size_t pos; - const char* filename; - Arena* arena; -} Parser; - -// === 递归深度限制 === -static int parse_depth = 0; -#define MAX_PARSE_DEPTH 1000 - -// === 向前看 === -static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; } -static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; } -static bool match(Parser* p, TokenKind k) { - if (peek(p)->kind == k) { p->pos++; return true; } - return false; -} -static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) { - if (peek(p)->kind == k) return advance(p); - e->message = msg; e->filename = p->filename; - e->line = peek(p)->line; e->col = peek(p)->col; - return NULL; -} - -// === 运算符优先级定义 === -typedef enum { - PREC_NONE = 0, - PREC_PIPE = 10, - PREC_OR = 20, - PREC_AND = 30, - PREC_COMPARE = 40, - PREC_TERM = 50, - PREC_FACTOR = 60, - PREC_UNARY = 70, - PREC_POSTFIX = 80, // .field, call() -} Precedence; - -static Precedence tok_to_prec(TokenKind kind) { - switch (kind) { - case TOK_PIPE_PIPE: return PREC_OR; - case TOK_AND_AND: return PREC_AND; - case TOK_EQ_EQ: case TOK_BANG_EQ: - case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE; - case TOK_PLUS: case TOK_MINUS: return PREC_TERM; - case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR; - default: return PREC_NONE; - } -} - -static BinaryOp tok_to_binop(TokenKind kind) { - switch (kind) { - case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB; - case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV; - case TOK_PERCENT: return OP_MOD; - case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE; - case TOK_LT: return OP_LT; case TOK_GT: return OP_GT; - case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE; - case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR; - default: return OP_ADD; - } -} - -// 向前声明 -static AstNode* parse_expr(Parser* p, ErrorInfo* error); -static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error); -static AstNode* parse_block(Parser* p, ErrorInfo* error); -static AstNode* parse_statement(Parser* p, ErrorInfo* error); -static AstNode* parse_function(Parser* p, bool is_pub, ErrorInfo* error); - -// === 前缀解析 === -static AstNode* parse_unary(Parser* p, ErrorInfo* error) { - const Token* op = advance(p); - AstNode* operand = parse_expr_prec(p, PREC_UNARY, error); - if (!operand) return NULL; - BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT; - return ast_make_unary(p->arena, uop, operand, tok_loc(op)); -} - -static AstNode* parse_group(Parser* p, ErrorInfo* error) { - advance(p); // 跳过 ( - AstNode* expr = parse_expr(p, error); - if (!expr) return NULL; - if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; - return expr; -} - -static AstNode* parse_literal(Parser* p, ErrorInfo* error) { - const Token* t = advance(p); - switch (t->kind) { - case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t)); - case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t)); - case TOK_CHAR_LIT: { - int64_t val = 0; - if (t->length >= 2 && t->start[0] == '\\') { - switch (t->start[1]) { - case 'n': val = '\n'; break; - case 't': val = '\t'; break; - case '\\': val = '\\'; break; - case '\'': val = '\''; break; - default: val = t->start[1]; break; - } - } else { - val = (unsigned char)t->start[0]; - } - return ast_make_literal_char(p->arena, (int)val, tok_loc(t)); - } - case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t)); - case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t)); - case TOK_STR_LIT: { - char* str = arena_alloc_impl(p->arena, t->length + 1); - memcpy(str, t->start, t->length); - str[t->length] = '\0'; - // 字符串插值: "Hello, \(name)!" → "Hello, " + name + "!" - char* interp = strstr(str, "\\("); - if (interp) { - *interp = '\0'; // 截断前半部分 - char* pre = str; - char* expr_start = interp + 2; // 跳过 \( - char* close = strchr(expr_start, ')'); - if (!close) { - error->message = "字符串插值缺少 ')'"; error->filename = p->filename; - error->line = t->line; error->col = t->col; return NULL; - } - *close = '\0'; - char* post = close + 1; - // 生成: pre + expr + post - AstNode* result = ast_make_literal_str(p->arena, - arena_strdup_impl(p->arena, pre, strlen(pre)), tok_loc(t)); - // 将插值表达式按标识符解析 - AstNode* expr = ast_make_ident(p->arena, - arena_strdup_impl(p->arena, expr_start, strlen(expr_start)), tok_loc(t)); - result = ast_make_binary(p->arena, OP_ADD, result, expr, tok_loc(t)); - if (post[0] != '\0') { - AstNode* post_str = ast_make_literal_str(p->arena, - arena_strdup_impl(p->arena, post, strlen(post)), tok_loc(t)); - result = ast_make_binary(p->arena, OP_ADD, result, post_str, tok_loc(t)); - } - return result; - } - return ast_make_literal_str(p->arena, str, tok_loc(t)); - } - default: return NULL; - } -} - -// === 结构体初始化解析: Name { field: val, ... } === -static AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) { - advance(p); // 跳过 '{' - const char* fnames[32]; - AstNode* fvals[32]; - int fcount = 0; - - while (peek(p)->kind != TOK_RBRACE && !error->message) { - if (fcount >= 32) { error->message = "结构体初始化字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } - const Token* fname = expect(p, TOK_IDENT, error, "字段名"); - if (!fname) return NULL; - if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; - AstNode* val = parse_expr(p, error); - if (!val) return NULL; - - fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length); - fvals[fcount] = val; - fcount++; - - if (peek(p)->kind == TOK_COMMA) advance(p); - else break; - } - if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; - - const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*)); - memcpy(n_arr, fnames, fcount * sizeof(const char*)); - AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*)); - memcpy(v_arr, fvals, fcount * sizeof(AstNode*)); - - return ast_make_struct_init(p->arena, - arena_strdup_impl(p->arena, name->start, name->length), - n_arr, v_arr, fcount, tok_loc(name)); -} - -// === 标识符 / 函数调用 / 结构体初始化 === -static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) { - const Token* name = advance(p); - - // 枚举变体或模块函数: Name::Variant 或 Name::fn - if (peek(p)->kind == TOK_COLON_COLON) { - advance(p); // 跳过 :: - const Token* variant = expect(p, TOK_IDENT, error, "枚举变体名"); - if (!variant) return NULL; - // Name::fn 或 Name::Variant 或 Name::Variant(payload) - if (peek(p)->kind == TOK_LPAREN) { - // 前进探测: 检查括号内是否有多参数或命名参数(→函数调用)还是单表达式(→枚举payload) - size_t probe = p->pos + 1; - int paren_depth = 1; - bool has_comma = false, has_named = false; - while (paren_depth > 0 && p->tokens[probe].kind != TOK_EOF) { - if (p->tokens[probe].kind == TOK_LPAREN) paren_depth++; - else if (p->tokens[probe].kind == TOK_RPAREN) { paren_depth--; if (paren_depth == 0) break; } - else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COMMA) has_comma = true; - else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COLON) has_named = true; - probe++; - } - if (has_comma || has_named) { - // 模块函数调用: Name::fn(a, b) 或 Name::fn(x: 1) - advance(p); // 跳过 '(' - AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; - bool seen_named = false; - while (peek(p)->kind != TOK_RPAREN && !error->message) { - if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } - if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { - const Token* aname = advance(p); advance(p); - arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); - seen_named = true; - } else { - if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } - arg_names[arg_count] = NULL; - } - args[arg_count] = parse_expr(p, error); - if (!args[arg_count]) return NULL; - arg_count++; - if (peek(p)->kind == TOK_COMMA) advance(p); else break; - } - if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; - AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); - memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); - const char** name_arr = seen_named - ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) - : NULL; - char* full_name = arena_alloc_impl(p->arena, name->length + variant->length + 4); - sprintf(full_name, "%.*s::%.*s", name->length, name->start, variant->length, variant->start); - return ast_make_call(p->arena, full_name, arg_arr, name_arr, arg_count, tok_loc(name)); - } - } - // 枚举 payload: Name::Variant 或 Name::Variant(expr) - AstNode* payload = NULL; - if (match(p, TOK_LPAREN)) { - payload = parse_expr(p, error); - if (!payload) return NULL; - if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; - } - return ast_make_enum_variant(p->arena, - arena_strdup_impl(p->arena, name->start, name->length), - arena_strdup_impl(p->arena, variant->start, variant->length), - payload, tok_loc(name)); - } - - // 结构体初始化: Name { field: val, ... } - // 用提前看来区别 struct init 和 block: - // struct init → { IDENT COLON ... ;block → { 可能是 let/if/while/... - if (peek(p)->kind == TOK_LBRACE) { - const Token* after_brace = &p->tokens[p->pos + 1]; - if (after_brace->kind == TOK_IDENT) { - const Token* after_fname = &p->tokens[p->pos + 2]; - if (after_fname->kind == TOK_COLON) { - return parse_struct_init(p, name, error); - } - } - } - - // 函数调用: name(...) - if (match(p, TOK_LPAREN)) { - AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; - bool seen_named = false; - while (peek(p)->kind != TOK_RPAREN && !error->message) { - if (arg_count >= 16) { - error->message = "函数参数过多"; error->filename = p->filename; - error->line = peek(p)->line; error->col = peek(p)->col; return NULL; - } - // 命名参数: name: expr - if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { - const Token* aname = advance(p); advance(p); // 跳过标识符和 ':' - arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); - seen_named = true; - } else { - if (seen_named) { - error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; - error->line = peek(p)->line; error->col = peek(p)->col; return NULL; - } - arg_names[arg_count] = NULL; - } - args[arg_count] = parse_expr(p, error); - if (!args[arg_count]) return NULL; - arg_count++; - if (peek(p)->kind == TOK_COMMA) advance(p); - else break; - } - if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; - AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); - memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); - const char** name_arr = seen_named - ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) - : NULL; - return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), - arg_arr, name_arr, arg_count, tok_loc(name)); - } - return ast_make_ident(p->arena, - arena_strdup_impl(p->arena, name->start, name->length), - tok_loc(name)); -} - -// === Pratt 主循环 === -static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { - const Token* tok = peek(p); - AstNode* left = NULL; - - // 前缀解析 - if (tok->kind == TOK_IF) { - const Token* if_tok = advance(p); - // if let: if let Pattern = expr { then } else { else } → 去糖为 let+if - if (peek(p)->kind == TOK_LET) { - advance(p); // 跳过 let - // 解析模式: Enum::Variant 或 Enum::Variant(var) - AstNode* pattern = parse_expr(p, error); // 解析枚举变体 - if (!pattern) return NULL; - if (!expect(p, TOK_ASSIGN, error, "if let 缺少 '='")) return NULL; - AstNode* match_expr = parse_expr(p, error); - if (!match_expr) return NULL; - AstNode* then_block = parse_block(p, error); - if (!then_block) return NULL; - AstNode* else_block = NULL; - if (match(p, TOK_ELSE)) { - if (peek(p)->kind == TOK_IF) - else_block = parse_expr_prec(p, min_prec, error); - else - else_block = parse_block(p, error); - if (!else_block) return NULL; - } - // 去糖: { let __match = expr; if __match == pattern { then } else { else } } - static int iflet_counter = 0; - char vname_buf[32]; - snprintf(vname_buf, sizeof(vname_buf), "__iflet_%d", iflet_counter++); - const char* vname = arena_strdup_impl(p->arena, vname_buf, strlen(vname_buf)); - AstNode* let_stmt = ast_make_let(p->arena, - vname, TYPE_UNKNOWN, - false, false, match_expr, NULL, 0, NULL, 0, tok_loc(if_tok)); - AstNode* cond = ast_make_binary(p->arena, OP_EQ, - ast_make_ident(p->arena, vname, tok_loc(if_tok)), - pattern, tok_loc(if_tok)); - AstNode* if_stmt = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); - AstNode* stmts[2] = { let_stmt, if_stmt }; - AstNode** arr = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); - memcpy(arr, stmts, 2 * sizeof(AstNode*)); - left = ast_make_block(p->arena, arr, 2, tok_loc(if_tok)); - } else { - // if-expr: if cond { then } else { else } - AstNode* cond = parse_expr(p, error); - if (!cond) return NULL; - AstNode* then_block = parse_block(p, error); - if (!then_block) return NULL; - AstNode* else_block = NULL; - if (match(p, TOK_ELSE)) { - if (peek(p)->kind == TOK_IF) - else_block = parse_expr_prec(p, min_prec, error); - else - else_block = parse_block(p, error); - if (!else_block) return NULL; - } - left = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); - } - } else if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { - left = parse_unary(p, error); - } else if (tok->kind == TOK_LPAREN) { - left = parse_group(p, error); - } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || - tok->kind == TOK_CHAR_LIT || - tok->kind == TOK_TRUE || tok->kind == TOK_FALSE || - tok->kind == TOK_STR_LIT) { - left = parse_literal(p, error); - } else if (tok->kind == TOK_IDENT) { - left = parse_ident_or_call(p, error); - } else { - error->message = "无法识别的表达式"; error->filename = p->filename; - error->line = tok->line; error->col = tok->col; - return NULL; - } - if (!left) return NULL; - - // 中缀/后置解析循环 - while (!error->message) { - TokenKind kind = peek(p)->kind; - - // 管道: expr |> func(args...) → func(args..., expr) - if (kind == TOK_PIPE) { - Precedence prec = PREC_PIPE; - if (prec <= min_prec) break; - const Token* op = advance(p); - // RHS 必须是函数调用(不带管道时解析) - AstNode* right = parse_expr_prec(p, prec, error); - if (!right) return NULL; - if (right->kind != AST_CALL_EXPR) { - error->message = "管道右侧必须是函数调用"; error->filename = p->filename; - error->line = op->line; error->col = op->col; - return NULL; - } - // 将 left 作为第一个参数插入(F#/Elixir 风格) - if (right->as.call.arg_count >= 16) { - error->message = "管道参数过多"; error->filename = p->filename; - error->line = op->line; error->col = op->col; return NULL; - } - AstNode** new_args = arena_alloc_impl(p->arena, (right->as.call.arg_count + 1) * sizeof(AstNode*)); - new_args[0] = left; - memcpy(new_args + 1, right->as.call.args, right->as.call.arg_count * sizeof(AstNode*)); - right->as.call.args = new_args; - right->as.call.arg_count++; - left = right; - continue; - } - - // 后置字段访问: expr.field 或 expr.method(args) - if (kind == TOK_DOT) { - advance(p); // 跳过 '.' - const Token* field = expect(p, TOK_IDENT, error, "缺少字段名"); - if (!field) return NULL; - const char* member_name = arena_strdup_impl(p->arena, field->start, field->length); - // 方法调用: expr.method(args) - if (peek(p)->kind == TOK_LPAREN) { - advance(p); // 跳过 '(' - AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; - bool seen_named = false; - while (peek(p)->kind != TOK_RPAREN && !error->message) { - if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } - if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { - const Token* aname = advance(p); advance(p); - arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); - seen_named = true; - } else { - if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } - arg_names[arg_count] = NULL; - } - args[arg_count] = parse_expr(p, error); - if (!args[arg_count]) return NULL; - arg_count++; - if (peek(p)->kind == TOK_COMMA) advance(p); else break; - } - if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; - AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); - memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); - const char** name_arr = seen_named - ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) - : NULL; - left = ast_make_method_call(p->arena, left, member_name, arg_arr, name_arr, arg_count, tok_loc(field)); - } else { - left = ast_make_field_access(p->arena, left, member_name, tok_loc(field)); - } - continue; - } - - // 后置索引: expr[expr] - if (kind == TOK_LBRACKET) { - const Token* lbrack = advance(p); // 跳过 '[' - AstNode* index = parse_expr(p, error); - if (!index) return NULL; - if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL; - left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack)); - continue; - } - - // 中缀运算符 - Precedence prec = tok_to_prec(kind); - if (prec <= min_prec) break; - - const Token* op = advance(p); - AstNode* right = parse_expr_prec(p, prec, error); - if (!right) return NULL; - left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, tok_loc(op)); - } - - return left; -} - -static AstNode* parse_expr(Parser* p, ErrorInfo* error) { - return parse_expr_prec(p, PREC_NONE, error); -} - -// === 类型工具 === -static TypeKind token_to_type(TokenKind k) { - switch (k) { - case TOK_I32: return TYPE_I32; - case TOK_I64: return TYPE_I64; - case TOK_U64: return TYPE_U64; - case TOK_F64: return TYPE_F64; - case TOK_BOOL: return TYPE_BOOL; - case TOK_CHAR: return TYPE_CHAR; - case TOK_STR: return TYPE_STR; - default: return TYPE_VOID; - } -} - -// === 类型表达式解析(内置类型/结构体名/数组类型)=== -// 数组支持后置语法: T[N], T[N][M] 等 -static TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) { - const Token* t = peek(p); - TypeInfo ti = {0}; - - // Self 类型(trait 中引用实现者自身类型) - if (t->kind == TOK_SELF) { - advance(p); - ti.kind = TYPE_STRUCT; - ti.struct_name = "Self"; - return ti; - } - - // 解析基础类型 - if (tok_is_type(t->kind)) { - advance(p); - ti.kind = token_to_type(t->kind); - } else if (t->kind == TOK_IDENT) { - advance(p); - ti.kind = TYPE_STRUCT; - ti.struct_name = arena_strdup_impl(p->arena, t->start, t->length); - } else { - error->message = "无效的类型"; error->filename = p->filename; - error->line = t->line; error->col = t->col; - ti.kind = TYPE_ERROR; - return ti; - } - - // 后置数组维度: Type[N] → TYPE_ARRAY - if (peek(p)->kind == TOK_LBRACKET) { - advance(p); // 跳过 '[' - const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量"); - if (!size_tok) { ti.kind = TYPE_ERROR; return ti; } - int64_t size = tok_int_value(size_tok); - if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) { - ti.kind = TYPE_ERROR; return ti; - } - TypeInfo arr_ti = {0}; - arr_ti.kind = TYPE_ARRAY; - arr_ti.element_type = ti.kind; - arr_ti.element_struct_name = ti.struct_name; - arr_ti.array_size = size; - return arr_ti; - } - - return ti; -} - // === 结构体声明解析 === -static AstNode* parse_struct_decl(Parser* p, ErrorInfo* error) { +AstNode* parse_struct_decl(Parser* p, ErrorInfo* error) { const Token* s_tok = advance(p); // 跳过 'struct' const Token* name = expect(p, TOK_IDENT, error, "struct 后应为结构体名"); if (!name) return NULL; @@ -664,9 +125,8 @@ static AstNode* parse_match_stmt(Parser* p, ErrorInfo* error) { return ast_make_block(p->arena, stmts, 2, tok_loc(match_tok)); } -// === 语句解析 === - -static AstNode* parse_block(Parser* p, ErrorInfo* error) { +// === 代码块解析 === +AstNode* parse_block(Parser* p, ErrorInfo* error) { if (++parse_depth > MAX_PARSE_DEPTH) { error->message = "嵌套过深"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; @@ -688,7 +148,8 @@ static AstNode* parse_block(Parser* p, ErrorInfo* error) { return ast_make_block(p->arena, arr, count, tok_loc(open)); } -static AstNode* parse_statement(Parser* p, ErrorInfo* error) { +// === 语句解析 === +AstNode* parse_statement(Parser* p, ErrorInfo* error) { const Token* t = peek(p); if (t->kind == TOK_LET || t->kind == TOK_VAR) { @@ -832,9 +293,7 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { } // 数组元素赋值: ident[expr] = expr ; - // 需要前瞻: 检查 ']' 后面是否是 '=' (而非 ';' 或其它) if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_LBRACKET) { - // 向前扫描找到对应的 ']'(不支持嵌套 '[' 在索引中) int ahead_idx = 2; int bracket_depth = 1; while (bracket_depth > 0 && (t + ahead_idx)->kind != TOK_EOF) { @@ -842,7 +301,6 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { else if ((t + ahead_idx)->kind == TOK_RBRACKET) bracket_depth--; if (bracket_depth > 0) ahead_idx++; } - // 检查 ']' 后是否是 '=' if ((t + ahead_idx + 1)->kind == TOK_ASSIGN) { const Token* name = advance(p); // 消费标识符 advance(p); // 消费 '[' @@ -857,7 +315,6 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { arena_strdup_impl(p->arena, name->start, name->length), index, value, tok_loc(name)); } - // 否则: 不是数组赋值, 回退到下方表达式语句处理 } // 赋值语句: ident = expr ; @@ -911,7 +368,7 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { } // === 函数解析 === -static AstNode* parse_function(Parser* p, bool is_pub, ErrorInfo* error) { +AstNode* parse_function(Parser* p, bool is_pub, ErrorInfo* error) { const Token* fn_tok = advance(p); // fn const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名"); if (!name) return NULL; @@ -930,7 +387,7 @@ static AstNode* parse_function(Parser* p, bool is_pub, ErrorInfo* error) { } if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL; - // 参数列表(泛型参数可标注为类型参数名) + // 参数列表 AstNode* params[64]; int pcount = 0; while (peek(p)->kind != TOK_RPAREN && !error->message) { if (pcount >= 64) { error->message = "函数参数过多 (最多64)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } @@ -978,11 +435,7 @@ static AstNode* parse_function(Parser* p, bool is_pub, ErrorInfo* error) { parr, pcount, ret, ret_struct_name, body, is_pub, tparr, tp_count, tok_loc(fn_tok)); } -// === 模块文件加载辅助 === -// parse 前向声明(定义在后面) -AstNode* parse(Arena* a, const Token* tokens, size_t count, - const char* filename, ErrorInfo* error); - +// === 模块文件加载 === static AstNode* load_module(Arena* a, const char* parent_file, const char* mod_name, ErrorInfo* error) { // 构造模块文件路径: 同目录下 mod_name.l @@ -1056,7 +509,6 @@ AstNode* parse(Arena* a, const Token* tokens, size_t count, if (!expect(&p, TOK_RBRACE, error, "缺少 '}'")) return NULL; AstNode** marr = arena_alloc_impl(p.arena, mcount * sizeof(AstNode*)); memcpy(marr, methods, mcount * sizeof(AstNode*)); - // 复用 impl_count 存储 trait(共用计数) if (impl_count >= 64) { error->message = "trait 过多(最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; } impls[impl_count++] = ast_make_trait_decl(p.arena, arena_strdup_impl(p.arena, tname->start, tname->length), @@ -1177,7 +629,6 @@ AstNode* parse(Arena* a, const Token* tokens, size_t count, if (enum_count >= 64) break; enums[enum_count++] = sub->as.program.enums[i]; } - /* mod 内容已内联合并到当前文件 */ ; } else if (peek(&p)->kind == TOK_USE) { /* TODO: use 语句待实现符号导入 */ ; advance(&p); diff --git a/test/test_parser.c b/test/test_parser.c index 31e0188..540b4b4 100644 --- a/test/test_parser.c +++ b/test/test_parser.c @@ -2,6 +2,7 @@ #include "parser.h" #include "lexer.h" #include "arena.h" +#include static AstNode* parse_string(const char* src) { Arena* a = malloc(sizeof(Arena)); @@ -13,7 +14,6 @@ static AstNode* parse_string(const char* src) { ErrorInfo parse_err = {0}; AstNode* ast = parse(a, tokens, tcount, "test", &parse_err); if (!ast) { arena_destroy(a); free(a); return NULL; } - // NOTE: arena and tokens must stay alive for AST - leak intentionally in test return ast; } @@ -35,7 +35,6 @@ void test_arithmetic_expr() { AstNode* expr = ret->as.return_stmt.expr; ASSERT(expr->kind == AST_BINARY_EXPR); ASSERT(expr->as.binary.op == OP_ADD); - // 1 + (2 * 3): right should be *, left should be 1 ASSERT(expr->as.binary.right->kind == AST_BINARY_EXPR); ASSERT(expr->as.binary.right->as.binary.op == OP_MUL); } @@ -58,11 +57,168 @@ void test_function_with_params() { ASSERT(fn->as.function.return_type == TYPE_I64); } +// === 新增测试 === + +void test_struct_decl_and_init() { + AstNode* ast = parse_string( + "struct Point { x: i64, y: i64 }" + "fn main() -> i64 { let p = Point { x: 1, y: 2 }; return p.x; }"); + ASSERT(ast != NULL); + ASSERT(ast->as.program.struct_count == 1); + AstNode* sd = ast->as.program.structs[0]; + ASSERT(sd->kind == AST_STRUCT_DECL); + ASSERT(strcmp(sd->as.struct_decl.name, "Point") == 0); + ASSERT(sd->as.struct_decl.field_count == 2); +} + +void test_literals() { + // i64 + AstNode* a1 = parse_string("fn main() -> i64 { return 42; }"); + ASSERT(a1 != NULL); + // f64 + AstNode* a2 = parse_string("fn main() -> f64 { return 3.14; }"); + ASSERT(a2 != NULL); + // bool true/false + AstNode* a3 = parse_string("fn main() -> bool { return true; }"); + ASSERT(a3 != NULL); + AstNode* a4 = parse_string("fn main() -> bool { return false; }"); + ASSERT(a4 != NULL); + // str + AstNode* a5 = parse_string("fn main() -> str { return \"hello\"; }"); + ASSERT(a5 != NULL); +} + +void test_comparison_chaining() { + AstNode* ast = parse_string("fn main() -> bool { return 1 + 2 == 3; }"); + ASSERT(ast != NULL); + // 1 + 2 == 3 应解析为 (1+2) == 3, 因 == 优先级(40) < +(50) + AstNode* body = ast->as.program.functions[0]->as.function.body; + AstNode* ret_expr = body->as.block.stmts[0]->as.return_stmt.expr; + ASSERT(ret_expr->kind == AST_BINARY_EXPR); + ASSERT(ret_expr->as.binary.op == OP_EQ); + ASSERT(ret_expr->as.binary.left->kind == AST_BINARY_EXPR); + ASSERT(ret_expr->as.binary.left->as.binary.op == OP_ADD); +} + +void test_guard_desugar() { + AstNode* ast = parse_string("fn main() { guard x >= 0 else { return -1; } return 0; }"); + ASSERT(ast != NULL); + // guard 去糖为 if !(x >= 0) { return -1; } + AstNode* body = ast->as.program.functions[0]->as.function.body; + AstNode* first = body->as.block.stmts[0]; + ASSERT(first->kind == AST_IF_STMT); + ASSERT(first->as.if_stmt.cond->kind == AST_UNARY_EXPR); + ASSERT(first->as.if_stmt.cond->as.unary.op == OP_NOT); +} + +void test_named_args() { + AstNode* ast = parse_string( + "fn draw(x: i64, y: i64) { }" + "fn main() { draw(x: 10, y: 20); return 0; }"); + ASSERT(ast != NULL); + AstNode* body = ast->as.program.functions[1]->as.function.body; + AstNode* call = body->as.block.stmts[0]->as.expr_stmt.expr; + ASSERT(call->kind == AST_CALL_EXPR); + ASSERT(call->as.call.arg_count == 2); + // 命名参数应有 name_arr +} + +void test_field_access() { + AstNode* ast = parse_string( + "struct Pt { x: i64 }" + "fn main() -> i64 { let p = Pt { x: 42 }; return p.x; }"); + ASSERT(ast != NULL); + AstNode* body = ast->as.program.functions[0]->as.function.body; + AstNode* ret_expr = body->as.block.stmts[1]->as.return_stmt.expr; + ASSERT(ret_expr->kind == AST_FIELD_ACCESS); +} + +void test_method_call_parse() { + AstNode* ast = parse_string( + "struct Pt { x: i64 }" + "extend Pt { fn get(self: Pt) -> i64 { return self.x; } }" + "fn main() -> i64 { let p = Pt { x: 10 }; return p.get(); }"); + ASSERT(ast != NULL); +} + +void test_match_parse() { + AstNode* ast = parse_string( + "fn main() -> i64 { match 1 { 1 => { return 10; }, _ => { return 0; } } }"); + ASSERT(ast != NULL); +} + +void test_enum_decl() { + AstNode* ast = parse_string("enum Color { Red, Green, Blue } fn main() -> i64 { return 0; }"); + ASSERT(ast != NULL); + ASSERT(ast->as.program.enum_count == 1); + AstNode* ed = ast->as.program.enums[0]; + ASSERT(ed->kind == AST_ENUM_DECL); + ASSERT(ed->as.enum_decl.variant_count == 3); +} + +void test_for_desugar() { + AstNode* ast = parse_string( + "fn main() { for i in 0 to 5 { print_i64(i); } return 0; }"); + ASSERT(ast != NULL); + // for 去糖为 { var i = 0; while i < 5 { ... i = i + 1; } } + AstNode* body = ast->as.program.functions[0]->as.function.body; + ASSERT(body->as.block.stmt_count == 2); // for-block + return 0 + // 第一个语句是 for 脱糖块 + AstNode* for_block = body->as.block.stmts[0]; + ASSERT(for_block->kind == AST_BLOCK); + ASSERT(for_block->as.block.stmt_count == 2); // let + while +} + +void test_pipe_operator() { + AstNode* ast = parse_string( + "fn main() -> i64 { return 10 |> double(5); }"); + ASSERT(ast != NULL); +} + +void test_type_alias_parse() { + AstNode* ast = parse_string("type Meters = i64; fn main() -> i64 { return 0; }"); + ASSERT(ast != NULL); +} + +void test_trait_decl() { + AstNode* ast = parse_string( + "trait Show { fn show(self: Self) -> void; }" + "fn main() -> i64 { return 0; }"); + ASSERT(ast != NULL); +} + +void test_array_type_parse() { + AstNode* ast = parse_string( + "fn main() -> i64 { var a: i64[10] = a; a[0] = 42; return a[0]; }"); + ASSERT(ast != NULL); +} + +void test_if_expr_parse() { + AstNode* ast = parse_string( + "fn main() -> i64 { let x = if true { 10; } else { 20; }; return x; }"); + ASSERT(ast != NULL); +} + int main(void) { TEST_RUN(test_simple_function); TEST_RUN(test_arithmetic_expr); TEST_RUN(test_if_statement); TEST_RUN(test_while_loop); TEST_RUN(test_function_with_params); + TEST_RUN(test_struct_decl_and_init); + TEST_RUN(test_literals); + TEST_RUN(test_comparison_chaining); + TEST_RUN(test_guard_desugar); + TEST_RUN(test_named_args); + TEST_RUN(test_field_access); + TEST_RUN(test_method_call_parse); + TEST_RUN(test_match_parse); + TEST_RUN(test_enum_decl); + TEST_RUN(test_for_desugar); + TEST_RUN(test_pipe_operator); + TEST_RUN(test_type_alias_parse); + TEST_RUN(test_trait_decl); + TEST_RUN(test_array_type_parse); + TEST_RUN(test_if_expr_parse); return test_summary(); }