#include "parse_internal.h" #include #include // 递归深度(程序级共享) int parse_depth = 0; // === 运算符优先级 → Precedence 映射 === Precedence tok_to_prec(TokenKind kind) { switch (kind) { case TOK_PIPE_PIPE: return PREC_OR; case TOK_AND_AND: return PREC_AND; case TOK_EQ_EQ: case TOK_BANG_EQ: case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE; case TOK_PLUS: case TOK_MINUS: return PREC_TERM; case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR; default: return PREC_NONE; } } // === 运算符 → BinaryOp 映射 === BinaryOp tok_to_binop(TokenKind kind) { switch (kind) { case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB; case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV; case TOK_PERCENT: return OP_MOD; case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE; case TOK_LT: return OP_LT; case TOK_GT: return OP_GT; case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE; case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR; default: return OP_ADD; } } // === 前缀解析 === AstNode* parse_unary(Parser* p, ErrorInfo* error) { const Token* op = advance(p); AstNode* operand = parse_expr_prec(p, PREC_UNARY, error); if (!operand) return NULL; BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT; return ast_make_unary(p->arena, uop, operand, tok_loc(op)); } AstNode* parse_group(Parser* p, ErrorInfo* error) { advance(p); // 跳过 ( AstNode* expr = parse_expr(p, error); if (!expr) return NULL; if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; return expr; } AstNode* parse_literal(Parser* p, ErrorInfo* error) { const Token* t = advance(p); switch (t->kind) { case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t)); case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t)); case TOK_CHAR_LIT: { int64_t val = 0; if (t->length >= 2 && t->start[0] == '\\') { switch (t->start[1]) { case 'n': val = '\n'; break; case 't': val = '\t'; break; case '\\': val = '\\'; break; case '\'': val = '\''; break; default: val = t->start[1]; break; } } else { val = (unsigned char)t->start[0]; } return ast_make_literal_char(p->arena, (int)val, tok_loc(t)); } case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t)); case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t)); case TOK_STR_LIT: { char* str = arena_alloc_impl(p->arena, t->length + 1); memcpy(str, t->start, t->length); str[t->length] = '\0'; // 字符串插值: "Hello, \(name)!" → "Hello, " + name + "!" char* interp = strstr(str, "\\("); if (interp) { *interp = '\0'; // 截断前半部分 char* pre = str; char* expr_start = interp + 2; // 跳过 \( char* close = strchr(expr_start, ')'); if (!close) { error->message = "字符串插值缺少 ')'"; error->filename = p->filename; error->line = t->line; error->col = t->col; return NULL; } *close = '\0'; char* post = close + 1; // 生成: pre + expr + post AstNode* result = ast_make_literal_str(p->arena, arena_strdup_impl(p->arena, pre, strlen(pre)), tok_loc(t)); // 将插值表达式按标识符解析 AstNode* expr = ast_make_ident(p->arena, arena_strdup_impl(p->arena, expr_start, strlen(expr_start)), tok_loc(t)); result = ast_make_binary(p->arena, OP_ADD, result, expr, tok_loc(t)); if (post[0] != '\0') { AstNode* post_str = ast_make_literal_str(p->arena, arena_strdup_impl(p->arena, post, strlen(post)), tok_loc(t)); result = ast_make_binary(p->arena, OP_ADD, result, post_str, tok_loc(t)); } return result; } return ast_make_literal_str(p->arena, str, tok_loc(t)); } default: return NULL; } } // === 结构体初始化解析: Name { field: val, ... } === AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) { advance(p); // 跳过 '{' const char* fnames[32]; AstNode* fvals[32]; int fcount = 0; while (peek(p)->kind != TOK_RBRACE && !error->message) { if (fcount >= 32) { error->message = "结构体初始化字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } const Token* fname = expect(p, TOK_IDENT, error, "字段名"); if (!fname) return NULL; if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; AstNode* val = parse_expr(p, error); if (!val) return NULL; fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length); fvals[fcount] = val; fcount++; if (peek(p)->kind == TOK_COMMA) advance(p); else break; } if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*)); memcpy(n_arr, fnames, fcount * sizeof(const char*)); AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*)); memcpy(v_arr, fvals, fcount * sizeof(AstNode*)); return ast_make_struct_init(p->arena, arena_strdup_impl(p->arena, name->start, name->length), n_arr, v_arr, fcount, tok_loc(name)); } // === 标识符 / 函数调用 / 结构体初始化 === AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) { const Token* name = advance(p); // 枚举变体或模块函数: Name::Variant 或 Name::fn if (peek(p)->kind == TOK_COLON_COLON) { advance(p); // 跳过 :: const Token* variant = expect(p, TOK_IDENT, error, "枚举变体名"); if (!variant) return NULL; // Name::fn 或 Name::Variant 或 Name::Variant(payload) if (peek(p)->kind == TOK_LPAREN) { // 前进探测: 检查括号内是否有多参数或命名参数(→函数调用)还是单表达式(→枚举payload) size_t probe = p->pos + 1; int paren_depth = 1; bool has_comma = false, has_named = false; while (paren_depth > 0 && p->tokens[probe].kind != TOK_EOF) { if (p->tokens[probe].kind == TOK_LPAREN) paren_depth++; else if (p->tokens[probe].kind == TOK_RPAREN) { paren_depth--; if (paren_depth == 0) break; } else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COMMA) has_comma = true; else if (paren_depth == 1 && p->tokens[probe].kind == TOK_COLON) has_named = true; probe++; } if (has_comma || has_named) { // 模块函数调用: Name::fn(a, b) 或 Name::fn(x: 1) advance(p); // 跳过 '(' AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; bool seen_named = false; while (peek(p)->kind != TOK_RPAREN && !error->message) { if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { const Token* aname = advance(p); advance(p); arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); seen_named = true; } else { if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } arg_names[arg_count] = NULL; } args[arg_count] = parse_expr(p, error); if (!args[arg_count]) return NULL; arg_count++; if (peek(p)->kind == TOK_COMMA) advance(p); else break; } if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); const char** name_arr = seen_named ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) : NULL; char* full_name = arena_alloc_impl(p->arena, name->length + variant->length + 4); sprintf(full_name, "%.*s::%.*s", name->length, name->start, variant->length, variant->start); return ast_make_call(p->arena, full_name, arg_arr, name_arr, arg_count, tok_loc(name)); } } // 枚举 payload: Name::Variant 或 Name::Variant(expr) AstNode* payload = NULL; if (match(p, TOK_LPAREN)) { payload = parse_expr(p, error); if (!payload) return NULL; if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; } return ast_make_enum_variant(p->arena, arena_strdup_impl(p->arena, name->start, name->length), arena_strdup_impl(p->arena, variant->start, variant->length), payload, tok_loc(name)); } // 结构体初始化: Name { field: val, ... } if (peek(p)->kind == TOK_LBRACE) { const Token* after_brace = &p->tokens[p->pos + 1]; if (after_brace->kind == TOK_IDENT) { const Token* after_fname = &p->tokens[p->pos + 2]; if (after_fname->kind == TOK_COLON) { return parse_struct_init(p, name, error); } } } // 函数调用: name(...) if (match(p, TOK_LPAREN)) { AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; bool seen_named = false; while (peek(p)->kind != TOK_RPAREN && !error->message) { if (arg_count >= 16) { error->message = "函数参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } // 命名参数: name: expr if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { const Token* aname = advance(p); advance(p); // 跳过标识符和 ':' arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); seen_named = true; } else { if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } arg_names[arg_count] = NULL; } args[arg_count] = parse_expr(p, error); if (!args[arg_count]) return NULL; arg_count++; if (peek(p)->kind == TOK_COMMA) advance(p); else break; } if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); const char** name_arr = seen_named ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) : NULL; return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), arg_arr, name_arr, arg_count, tok_loc(name)); } return ast_make_ident(p->arena, arena_strdup_impl(p->arena, name->start, name->length), tok_loc(name)); } // === Pratt 主循环 === AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { const Token* tok = peek(p); AstNode* left = NULL; // 前缀解析 if (tok->kind == TOK_IF) { const Token* if_tok = advance(p); // if let: if let Pattern = expr { then } else { else } → 去糖为 let+if if (peek(p)->kind == TOK_LET) { advance(p); // 跳过 let // 解析模式: Enum::Variant 或 Enum::Variant(var) AstNode* pattern = parse_expr(p, error); // 解析枚举变体 if (!pattern) return NULL; if (!expect(p, TOK_ASSIGN, error, "if let 缺少 '='")) return NULL; AstNode* match_expr = parse_expr(p, error); if (!match_expr) return NULL; AstNode* then_block = parse_block(p, error); if (!then_block) return NULL; AstNode* else_block = NULL; if (match(p, TOK_ELSE)) { if (peek(p)->kind == TOK_IF) else_block = parse_expr_prec(p, min_prec, error); else else_block = parse_block(p, error); if (!else_block) return NULL; } // 去糖: { let __match = expr; if __match == pattern { then } else { else } } static int iflet_counter = 0; char vname_buf[32]; snprintf(vname_buf, sizeof(vname_buf), "__iflet_%d", iflet_counter++); const char* vname = arena_strdup_impl(p->arena, vname_buf, strlen(vname_buf)); AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, false, match_expr, NULL, 0, NULL, 0, tok_loc(if_tok)); AstNode* cond = ast_make_binary(p->arena, OP_EQ, ast_make_ident(p->arena, vname, tok_loc(if_tok)), pattern, tok_loc(if_tok)); AstNode* if_stmt = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); AstNode* stmts[2] = { let_stmt, if_stmt }; AstNode** arr = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); memcpy(arr, stmts, 2 * sizeof(AstNode*)); left = ast_make_block(p->arena, arr, 2, tok_loc(if_tok)); } else { // if-expr: if cond { then } else { else } AstNode* cond = parse_expr(p, error); if (!cond) return NULL; AstNode* then_block = parse_block(p, error); if (!then_block) return NULL; AstNode* else_block = NULL; if (match(p, TOK_ELSE)) { if (peek(p)->kind == TOK_IF) else_block = parse_expr_prec(p, min_prec, error); else else_block = parse_block(p, error); if (!else_block) return NULL; } left = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); } } else if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { left = parse_unary(p, error); } else if (tok->kind == TOK_LPAREN) { left = parse_group(p, error); } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || tok->kind == TOK_CHAR_LIT || tok->kind == TOK_TRUE || tok->kind == TOK_FALSE || tok->kind == TOK_STR_LIT) { left = parse_literal(p, error); } else if (tok->kind == TOK_IDENT) { left = parse_ident_or_call(p, error); } else { error->message = "无法识别的表达式"; error->filename = p->filename; error->line = tok->line; error->col = tok->col; return NULL; } if (!left) return NULL; // 中缀/后置解析循环 while (!error->message) { TokenKind kind = peek(p)->kind; // 管道: expr |> func(args...) → func(args..., expr) if (kind == TOK_PIPE) { Precedence prec = PREC_PIPE; if (prec <= min_prec) break; const Token* op = advance(p); // RHS 必须是函数调用(不带管道时解析) AstNode* right = parse_expr_prec(p, prec, error); if (!right) return NULL; if (right->kind != AST_CALL_EXPR) { error->message = "管道右侧必须是函数调用"; error->filename = p->filename; error->line = op->line; error->col = op->col; return NULL; } // 将 left 作为第一个参数插入(F#/Elixir 风格) if (right->as.call.arg_count >= 16) { error->message = "管道参数过多"; error->filename = p->filename; error->line = op->line; error->col = op->col; return NULL; } AstNode** new_args = arena_alloc_impl(p->arena, (right->as.call.arg_count + 1) * sizeof(AstNode*)); new_args[0] = left; memcpy(new_args + 1, right->as.call.args, right->as.call.arg_count * sizeof(AstNode*)); right->as.call.args = new_args; right->as.call.arg_count++; left = right; continue; } // 后置字段访问: expr.field 或 expr.method(args) if (kind == TOK_DOT) { advance(p); // 跳过 '.' const Token* field = expect(p, TOK_IDENT, error, "缺少字段名"); if (!field) return NULL; const char* member_name = arena_strdup_impl(p->arena, field->start, field->length); // 方法调用: expr.method(args) if (peek(p)->kind == TOK_LPAREN) { advance(p); // 跳过 '(' AstNode* args[16]; const char* arg_names[16]; int arg_count = 0; bool seen_named = false; while (peek(p)->kind != TOK_RPAREN && !error->message) { if (arg_count >= 16) { error->message = "参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } if (peek(p)->kind == TOK_IDENT && (p->tokens[p->pos + 1].kind == TOK_COLON)) { const Token* aname = advance(p); advance(p); arg_names[arg_count] = arena_strdup_impl(p->arena, aname->start, aname->length); seen_named = true; } else { if (seen_named) { error->message = "命名参数必须放在位置参数之后"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } arg_names[arg_count] = NULL; } args[arg_count] = parse_expr(p, error); if (!args[arg_count]) return NULL; arg_count++; if (peek(p)->kind == TOK_COMMA) advance(p); else break; } if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); const char** name_arr = seen_named ? memcpy(arena_alloc_impl(p->arena, arg_count * sizeof(const char*)), arg_names, arg_count * sizeof(const char*)) : NULL; left = ast_make_method_call(p->arena, left, member_name, arg_arr, name_arr, arg_count, tok_loc(field)); } else { left = ast_make_field_access(p->arena, left, member_name, tok_loc(field)); } continue; } // 后置索引: expr[expr] if (kind == TOK_LBRACKET) { const Token* lbrack = advance(p); // 跳过 '[' AstNode* index = parse_expr(p, error); if (!index) return NULL; if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL; left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack)); continue; } // 中缀运算符 Precedence prec = tok_to_prec(kind); if (prec <= min_prec) break; const Token* op = advance(p); AstNode* right = parse_expr_prec(p, prec, error); if (!right) return NULL; left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, tok_loc(op)); } return left; } AstNode* parse_expr(Parser* p, ErrorInfo* error) { return parse_expr_prec(p, PREC_NONE, error); } // === 类型工具 === TypeKind token_to_type(TokenKind k) { switch (k) { case TOK_I32: return TYPE_I32; case TOK_I64: return TYPE_I64; case TOK_U64: return TYPE_U64; case TOK_F64: return TYPE_F64; case TOK_BOOL: return TYPE_BOOL; case TOK_CHAR: return TYPE_CHAR; case TOK_STR: return TYPE_STR; default: return TYPE_VOID; } } // === 类型表达式解析(内置类型/结构体名/数组类型)=== // 数组支持后置语法: Type[N] TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) { const Token* t = peek(p); TypeInfo ti = {0}; // Self 类型(trait 中引用实现者自身类型) if (t->kind == TOK_SELF) { advance(p); ti.kind = TYPE_STRUCT; ti.struct_name = "Self"; return ti; } // 解析基础类型 if (tok_is_type(t->kind)) { advance(p); ti.kind = token_to_type(t->kind); } else if (t->kind == TOK_IDENT) { advance(p); ti.kind = TYPE_STRUCT; ti.struct_name = arena_strdup_impl(p->arena, t->start, t->length); } else { error->message = "无效的类型"; error->filename = p->filename; error->line = t->line; error->col = t->col; ti.kind = TYPE_ERROR; return ti; } // 后置数组维度: Type[N] → TYPE_ARRAY if (peek(p)->kind == TOK_LBRACKET) { advance(p); // 跳过 '[' const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量"); if (!size_tok) { ti.kind = TYPE_ERROR; return ti; } int64_t size = tok_int_value(size_tok); if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) { ti.kind = TYPE_ERROR; return ti; } TypeInfo arr_ti = {0}; arr_ti.kind = TYPE_ARRAY; arr_ti.element_type = ti.kind; arr_ti.element_struct_name = ti.struct_name; arr_ti.array_size = size; return arr_ti; } return ti; }