fix: P0审查修复 P2补强 — parser.c 拆分 + 测试扩充

P0-③: parser.c 1211行 → parser.c 662行 + expr.c 498行 + parse_internal.h 71行
  - expr.c: 表达式解析 (Pratt主循环/字面量/标识符/类型/运算符)
  - parser.c: 语句/声明/程序入口 (block/match/let/if/while/for/guard/fn/parse)
  - parse_internal.h: 共享 Parser struct + 内联辅助 + 向前声明

P2-①: parser 测试 5函数→20函数, 15断言→54断言
  - 新增: struct声明、字面量类型、优先级链、guard去糖、命名参数
  - 新增: 字段访问、方法调用、match、enum声明、for去糖、管道
  - 新增: 类型别名、trait声明、数组类型、if表达式

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-06 18:57:07 +08:00
parent 466be76fd8
commit 90d081c3fd
4 changed files with 736 additions and 560 deletions
+498
View File
@@ -0,0 +1,498 @@
#include "parse_internal.h"
#include <stdio.h>
#include <string.h>
// 递归深度(程序级共享)
int parse_depth = 0;
// === 运算符优先级 → Precedence 映射 ===
Precedence tok_to_prec(TokenKind kind) {
switch (kind) {
case TOK_PIPE_PIPE: return PREC_OR;
case TOK_AND_AND: return PREC_AND;
case TOK_EQ_EQ: case TOK_BANG_EQ:
case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE;
case TOK_PLUS: case TOK_MINUS: return PREC_TERM;
case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR;
default: return PREC_NONE;
}
}
// === 运算符 → BinaryOp 映射 ===
BinaryOp tok_to_binop(TokenKind kind) {
switch (kind) {
case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB;
case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV;
case TOK_PERCENT: return OP_MOD;
case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE;
case TOK_LT: return OP_LT; case TOK_GT: return OP_GT;
case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE;
case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR;
default: return OP_ADD;
}
}
// === 前缀解析 ===
AstNode* parse_unary(Parser* p, ErrorInfo* error) {
const Token* op = advance(p);
AstNode* operand = parse_expr_prec(p, PREC_UNARY, error);
if (!operand) return NULL;
BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT;
return ast_make_unary(p->arena, uop, operand, tok_loc(op));
}
AstNode* parse_group(Parser* p, ErrorInfo* error) {
advance(p); // 跳过 (
AstNode* expr = parse_expr(p, error);
if (!expr) return NULL;
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
return expr;
}
AstNode* parse_literal(Parser* p, ErrorInfo* error) {
const Token* t = advance(p);
switch (t->kind) {
case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t));
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t));
case TOK_CHAR_LIT: {
int64_t val = 0;
if (t->length >= 2 && t->start[0] == '\\') {
switch (t->start[1]) {
case 'n': val = '\n'; break;
case 't': val = '\t'; break;
case '\\': val = '\\'; break;
case '\'': val = '\''; break;
default: val = t->start[1]; break;
}
} else {
val = (unsigned char)t->start[0];
}
return ast_make_literal_char(p->arena, (int)val, tok_loc(t));
}
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t));
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t));
case TOK_STR_LIT: {
char* str = arena_alloc_impl(p->arena, t->length + 1);
memcpy(str, t->start, t->length);
str[t->length] = '\0';
// 字符串插值: "Hello, \(name)!" → "Hello, " + name + "!"
char* interp = strstr(str, "\\(");
if (interp) {
*interp = '\0'; // 截断前半部分
char* pre = str;
char* expr_start = interp + 2; // 跳过 \(
char* close = strchr(expr_start, ')');
if (!close) {
error->message = "字符串插值缺少 ')'"; error->filename = p->filename;
error->line = t->line; error->col = t->col; return NULL;
}
*close = '\0';
char* post = close + 1;
// 生成: pre + expr + post
AstNode* result = ast_make_literal_str(p->arena,
arena_strdup_impl(p->arena, pre, strlen(pre)), tok_loc(t));
// 将插值表达式按标识符解析
AstNode* expr = ast_make_ident(p->arena,
arena_strdup_impl(p->arena, expr_start, strlen(expr_start)), tok_loc(t));
result = ast_make_binary(p->arena, OP_ADD, result, expr, tok_loc(t));
if (post[0] != '\0') {
AstNode* post_str = ast_make_literal_str(p->arena,
arena_strdup_impl(p->arena, post, strlen(post)), tok_loc(t));
result = ast_make_binary(p->arena, OP_ADD, result, post_str, tok_loc(t));
}
return result;
}
return ast_make_literal_str(p->arena, str, tok_loc(t));
}
default: return NULL;
}
}
// === 结构体初始化解析: Name { field: val, ... } ===
AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) {
advance(p); // 跳过 '{'
const char* fnames[32];
AstNode* fvals[32];
int fcount = 0;
while (peek(p)->kind != TOK_RBRACE && !error->message) {
if (fcount >= 32) { error->message = "结构体初始化字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
if (!fname) return NULL;
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
AstNode* val = parse_expr(p, error);
if (!val) return NULL;
fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length);
fvals[fcount] = val;
fcount++;
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*));
memcpy(n_arr, fnames, fcount * sizeof(const char*));
AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
memcpy(v_arr, fvals, fcount * sizeof(AstNode*));
return ast_make_struct_init(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
n_arr, v_arr, fcount, tok_loc(name));
}
// === 标识符 / 函数调用 / 结构体初始化 ===
AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
const Token* name = advance(p);
// 枚举变体或模块函数: Name::Variant 或 Name::fn
if (peek(p)->kind == TOK_COLON_COLON) {
advance(p); // 跳过 ::
const Token* variant = expect(p, TOK_IDENT, error, "枚举变体名");
if (!variant) return NULL;
// Name::fn 或 Name::Variant 或 Name::Variant(payload)
if (peek(p)->kind == TOK_LPAREN) {
// 前进探测: 检查括号内是否有多参数或命名参数(→函数调用)还是单表达式(→枚举payload)
size_t probe = p->pos + 1;
int paren_depth = 1;
bool has_comma = false, has_named = false;
while (paren_depth > 0 && p->tokens[probe].kind != TOK_EOF) {
if (p->tokens[probe].kind == TOK_LPAREN) paren_depth++;
else if (p->tokens[probe].kind == TOK_RPAREN) { paren_depth--; if (paren_depth == 0) break; }
else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COMMA) has_comma = true;
else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COLON) has_named = true;
probe++;
}
if (has_comma || has_named) {
// 模块函数调用: Name::fn(a, b) 或 Name::fn(x: 1)
advance(p); // 跳过 '('
AstNode* args[16]; const char* arg_names[16]; int arg_count = 0;
bool seen_named = false;
while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) {
const Token* aname = advance(p); advance(p);
arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length);
seen_named = true;
} else {
if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
arg_names[arg_count] = NULL;
}
args[arg_count] = parse_expr(p, error);
if (!args[arg_count]) return NULL;
arg_count++;
if (peek(p)->kind == TOK_COMMA) advance(p); else break;
}
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
const char** name_arr = seen_named
? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*))
: NULL;
char* full_name = arena_alloc_impl(p->arena, name->length + variant->length + 4);
sprintf(full_name, "%.*s::%.*s", name->length, name->start, variant->length, variant->start);
return ast_make_call(p->arena, full_name, arg_arr, name_arr, arg_count, tok_loc(name));
}
}
// 枚举 payload: Name::Variant 或 Name::Variant(expr)
AstNode* payload = NULL;
if (match(p, TOK_LPAREN)) {
payload = parse_expr(p, error);
if (!payload) return NULL;
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
}
return ast_make_enum_variant(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
arena_strdup_impl(p->arena, variant->start, variant->length),
payload, tok_loc(name));
}
// 结构体初始化: Name { field: val, ... }
if (peek(p)->kind == TOK_LBRACE) {
const Token* after_brace = &p->tokens[p->pos + 1];
if (after_brace->kind == TOK_IDENT) {
const Token* after_fname = &p->tokens[p->pos + 2];
if (after_fname->kind == TOK_COLON) {
return parse_struct_init(p, name, error);
}
}
}
// 函数调用: name(...)
if (match(p, TOK_LPAREN)) {
AstNode* args[16]; const char* arg_names[16]; int arg_count = 0;
bool seen_named = false;
while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) {
error->message = "函数参数过多"; error->filename = p->filename;
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
}
// 命名参数: name: expr
if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) {
const Token* aname = advance(p); advance(p); // 跳过标识符和 ':'
arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length);
seen_named = true;
} else {
if (seen_named) {
error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename;
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
}
arg_names[arg_count] = NULL;
}
args[arg_count] = parse_expr(p, error);
if (!args[arg_count]) return NULL;
arg_count++;
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
const char** name_arr = seen_named
? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*))
: NULL;
return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
arg_arr, name_arr, arg_count, tok_loc(name));
}
return ast_make_ident(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
tok_loc(name));
}
// === Pratt 主循环 ===
AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) {
const Token* tok = peek(p);
AstNode* left = NULL;
// 前缀解析
if (tok->kind == TOK_IF) {
const Token* if_tok = advance(p);
// if let: if let Pattern = expr { then } else { else } → 去糖为 let+if
if (peek(p)->kind == TOK_LET) {
advance(p); // 跳过 let
// 解析模式: Enum::Variant 或 Enum::Variant(var)
AstNode* pattern = parse_expr(p, error); // 解析枚举变体
if (!pattern) return NULL;
if (!expect(p, TOK_ASSIGN, error, "if let 缺少 '='")) return NULL;
AstNode* match_expr = parse_expr(p, error);
if (!match_expr) return NULL;
AstNode* then_block = parse_block(p, error);
if (!then_block) return NULL;
AstNode* else_block = NULL;
if (match(p, TOK_ELSE)) {
if (peek(p)->kind == TOK_IF)
else_block = parse_expr_prec(p, min_prec, error);
else
else_block = parse_block(p, error);
if (!else_block) return NULL;
}
// 去糖: { let __match = expr; if __match == pattern { then } else { else } }
static int iflet_counter = 0;
char vname_buf[32];
snprintf(vname_buf, sizeof(vname_buf), "__iflet_%d", iflet_counter++);
const char* vname = arena_strdup_impl(p->arena, vname_buf, strlen(vname_buf));
AstNode* let_stmt = ast_make_let(p->arena,
vname, TYPE_UNKNOWN,
false, false, match_expr, NULL, 0, NULL, 0, tok_loc(if_tok));
AstNode* cond = ast_make_binary(p->arena, OP_EQ,
ast_make_ident(p->arena, vname, tok_loc(if_tok)),
pattern, tok_loc(if_tok));
AstNode* if_stmt = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok));
AstNode* stmts[2] = { let_stmt, if_stmt };
AstNode** arr = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*));
memcpy(arr, stmts, 2 * sizeof(AstNode*));
left = ast_make_block(p->arena, arr, 2, tok_loc(if_tok));
} else {
// if-expr: if cond { then } else { else }
AstNode* cond = parse_expr(p, error);
if (!cond) return NULL;
AstNode* then_block = parse_block(p, error);
if (!then_block) return NULL;
AstNode* else_block = NULL;
if (match(p, TOK_ELSE)) {
if (peek(p)->kind == TOK_IF)
else_block = parse_expr_prec(p, min_prec, error);
else
else_block = parse_block(p, error);
if (!else_block) return NULL;
}
left = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok));
}
} else if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) {
left = parse_unary(p, error);
} else if (tok->kind == TOK_LPAREN) {
left = parse_group(p, error);
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
tok->kind == TOK_CHAR_LIT ||
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE ||
tok->kind == TOK_STR_LIT) {
left = parse_literal(p, error);
} else if (tok->kind == TOK_IDENT) {
left = parse_ident_or_call(p, error);
} else {
error->message = "无法识别的表达式"; error->filename = p->filename;
error->line = tok->line; error->col = tok->col;
return NULL;
}
if (!left) return NULL;
// 中缀/后置解析循环
while (!error->message) {
TokenKind kind = peek(p)->kind;
// 管道: expr |> func(args...) → func(args..., expr)
if (kind == TOK_PIPE) {
Precedence prec = PREC_PIPE;
if (prec <= min_prec) break;
const Token* op = advance(p);
// RHS 必须是函数调用(不带管道时解析)
AstNode* right = parse_expr_prec(p, prec, error);
if (!right) return NULL;
if (right->kind != AST_CALL_EXPR) {
error->message = "管道右侧必须是函数调用"; error->filename = p->filename;
error->line = op->line; error->col = op->col;
return NULL;
}
// 将 left 作为第一个参数插入(F#/Elixir 风格)
if (right->as.call.arg_count >= 16) {
error->message = "管道参数过多"; error->filename = p->filename;
error->line = op->line; error->col = op->col; return NULL;
}
AstNode** new_args = arena_alloc_impl(p->arena, (right->as.call.arg_count + 1) * sizeof(AstNode*));
new_args[0] = left;
memcpy(new_args + 1, right->as.call.args, right->as.call.arg_count * sizeof(AstNode*));
right->as.call.args = new_args;
right->as.call.arg_count++;
left = right;
continue;
}
// 后置字段访问: expr.field 或 expr.method(args)
if (kind == TOK_DOT) {
advance(p); // 跳过 '.'
const Token* field = expect(p, TOK_IDENT, error, "缺少字段名");
if (!field) return NULL;
const char* member_name = arena_strdup_impl(p->arena, field->start, field->length);
// 方法调用: expr.method(args)
if (peek(p)->kind == TOK_LPAREN) {
advance(p); // 跳过 '('
AstNode* args[16]; const char* arg_names[16]; int arg_count = 0;
bool seen_named = false;
while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) {
const Token* aname = advance(p); advance(p);
arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length);
seen_named = true;
} else {
if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
arg_names[arg_count] = NULL;
}
args[arg_count] = parse_expr(p, error);
if (!args[arg_count]) return NULL;
arg_count++;
if (peek(p)->kind == TOK_COMMA) advance(p); else break;
}
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
const char** name_arr = seen_named
? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*))
: NULL;
left = ast_make_method_call(p->arena, left, member_name, arg_arr, name_arr, arg_count, tok_loc(field));
} else {
left = ast_make_field_access(p->arena, left, member_name, tok_loc(field));
}
continue;
}
// 后置索引: expr[expr]
if (kind == TOK_LBRACKET) {
const Token* lbrack = advance(p); // 跳过 '['
AstNode* index = parse_expr(p, error);
if (!index) return NULL;
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL;
left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack));
continue;
}
// 中缀运算符
Precedence prec = tok_to_prec(kind);
if (prec <= min_prec) break;
const Token* op = advance(p);
AstNode* right = parse_expr_prec(p, prec, error);
if (!right) return NULL;
left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, tok_loc(op));
}
return left;
}
AstNode* parse_expr(Parser* p, ErrorInfo* error) {
return parse_expr_prec(p, PREC_NONE, error);
}
// === 类型工具 ===
TypeKind token_to_type(TokenKind k) {
switch (k) {
case TOK_I32: return TYPE_I32;
case TOK_I64: return TYPE_I64;
case TOK_U64: return TYPE_U64;
case TOK_F64: return TYPE_F64;
case TOK_BOOL: return TYPE_BOOL;
case TOK_CHAR: return TYPE_CHAR;
case TOK_STR: return TYPE_STR;
default: return TYPE_VOID;
}
}
// === 类型表达式解析(内置类型/结构体名/数组类型)===
// 数组支持后置语法: Type[N]
TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) {
const Token* t = peek(p);
TypeInfo ti = {0};
// Self 类型(trait 中引用实现者自身类型)
if (t->kind == TOK_SELF) {
advance(p);
ti.kind = TYPE_STRUCT;
ti.struct_name = "Self";
return ti;
}
// 解析基础类型
if (tok_is_type(t->kind)) {
advance(p);
ti.kind = token_to_type(t->kind);
} else if (t->kind == TOK_IDENT) {
advance(p);
ti.kind = TYPE_STRUCT;
ti.struct_name = arena_strdup_impl(p->arena, t->start, t->length);
} else {
error->message = "无效的类型"; error->filename = p->filename;
error->line = t->line; error->col = t->col;
ti.kind = TYPE_ERROR;
return ti;
}
// 后置数组维度: Type[N] → TYPE_ARRAY
if (peek(p)->kind == TOK_LBRACKET) {
advance(p); // 跳过 '['
const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量");
if (!size_tok) { ti.kind = TYPE_ERROR; return ti; }
int64_t size = tok_int_value(size_tok);
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) {
ti.kind = TYPE_ERROR; return ti;
}
TypeInfo arr_ti = {0};
arr_ti.kind = TYPE_ARRAY;
arr_ti.element_type = ti.kind;
arr_ti.element_struct_name = ti.struct_name;
arr_ti.array_size = size;
return arr_ti;
}
return ti;
}