#include "parser.h" #include #include typedef struct { const Token* tokens; size_t count; size_t pos; const char* filename; Arena* arena; } Parser; // === 向前看 === static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; } static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; } static bool match(Parser* p, TokenKind k) { if (peek(p)->kind == k) { p->pos++; return true; } return false; } static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) { if (peek(p)->kind == k) return advance(p); e->message = msg; e->filename = p->filename; e->line = peek(p)->line; e->col = peek(p)->col; return NULL; } // === 运算符优先级定义 === typedef enum { PREC_NONE = 0, PREC_OR = 20, PREC_AND = 30, PREC_COMPARE = 40, PREC_TERM = 50, PREC_FACTOR = 60, PREC_UNARY = 70, } Precedence; static Precedence tok_to_prec(TokenKind kind) { switch (kind) { case TOK_PIPE_PIPE: return PREC_OR; case TOK_AND_AND: return PREC_AND; case TOK_EQ_EQ: case TOK_BANG_EQ: case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE; case TOK_PLUS: case TOK_MINUS: return PREC_TERM; case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR; default: return PREC_NONE; } } static BinaryOp tok_to_binop(TokenKind kind) { switch (kind) { case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB; case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV; case TOK_PERCENT: return OP_MOD; case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE; case TOK_LT: return OP_LT; case TOK_GT: return OP_GT; case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE; case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR; default: return OP_ADD; } } // 向前声明 static AstNode* parse_expr(Parser* p, ErrorInfo* error); static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error); static AstNode* parse_block(Parser* p, ErrorInfo* error); // === 前缀解析 === static AstNode* parse_unary(Parser* p, ErrorInfo* error) { const Token* op = advance(p); AstNode* operand = parse_expr_prec(p, PREC_UNARY, error); if (!operand) return NULL; BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT; return ast_make_unary(p->arena, uop, operand, op->line, op->col); } static AstNode* parse_group(Parser* p, ErrorInfo* error) { advance(p); // 跳过 ( AstNode* expr = parse_expr(p, error); if (!expr) return NULL; if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; return expr; } static AstNode* parse_literal(Parser* p) { const Token* t = advance(p); switch (t->kind) { case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), t->line, t->col); case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col); case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col); case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col); default: return NULL; } } static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) { const Token* name = advance(p); if (match(p, TOK_LPAREN)) { // 函数调用 AstNode* args[16]; int arg_count = 0; while (peek(p)->kind != TOK_RPAREN && !error->message) { if (arg_count >= 16) { error->message = "函数参数过多"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } args[arg_count] = parse_expr(p, error); if (!args[arg_count]) return NULL; arg_count++; if (peek(p)->kind == TOK_COMMA) advance(p); else break; } if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*)); memcpy(arg_arr, args, arg_count * sizeof(AstNode*)); return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length), arg_arr, arg_count, name->line, name->col); } return ast_make_ident(p->arena, arena_strdup_impl(p->arena, name->start, name->length), name->line, name->col); } // === Pratt 主循环 === static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { const Token* tok = peek(p); AstNode* left = NULL; // 前缀解析 if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { left = parse_unary(p, error); } else if (tok->kind == TOK_LPAREN) { left = parse_group(p, error); } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) { left = parse_literal(p); } else if (tok->kind == TOK_IDENT) { left = parse_ident_or_call(p, error); } else { error->message = "无法识别的表达式"; error->filename = p->filename; error->line = tok->line; error->col = tok->col; return NULL; } if (!left) return NULL; // 中缀解析循环 while (!error->message) { TokenKind kind = peek(p)->kind; Precedence prec = tok_to_prec(kind); if (prec <= min_prec) break; const Token* op = advance(p); AstNode* right = parse_expr_prec(p, prec, error); if (!right) return NULL; left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, op->line, op->col); } return left; } static AstNode* parse_expr(Parser* p, ErrorInfo* error) { return parse_expr_prec(p, PREC_NONE, error); } // === 类型工具 === static bool is_type_token(TokenKind k) { return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID; } static TypeKind token_to_type(TokenKind k) { switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64; case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; } } // === 语句解析 === static AstNode* parse_statement(Parser* p, ErrorInfo* error); static AstNode* parse_block(Parser* p, ErrorInfo* error) { const Token* open = peek(p); if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL; AstNode* stmts[256]; int count = 0; while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) { AstNode* s = parse_statement(p, error); if (!s) return NULL; stmts[count++] = s; } if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*)); memcpy(arr, stmts, count * sizeof(AstNode*)); return ast_make_block(p->arena, arr, count, open->line, open->col); } static AstNode* parse_statement(Parser* p, ErrorInfo* error) { const Token* t = peek(p); if (t->kind == TOK_LET) { advance(p); bool is_mut = false; if (peek(p)->kind == TOK_MUT) { is_mut = true; advance(p); } const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名"); if (!name) return NULL; // 可选的类型标注 TypeKind annot_type = TYPE_UNKNOWN; bool has_type_annot = false; if (match(p, TOK_COLON)) { const Token* type_tok = advance(p); if (!is_type_token(type_tok->kind)) { error->message = "无效的类型标注"; error->filename = p->filename; error->line = type_tok->line; error->col = type_tok->col; return NULL; } annot_type = token_to_type(type_tok->kind); has_type_annot = true; } if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL; AstNode* init = parse_expr(p, error); if (!init) return NULL; if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; return ast_make_let(p->arena, arena_strdup_impl(p->arena, name->start, name->length), annot_type, has_type_annot, is_mut, init, t->line, t->col); } if (t->kind == TOK_IF) { advance(p); AstNode* cond = parse_expr(p, error); if (!cond) return NULL; AstNode* then_block = parse_block(p, error); if (!then_block) return NULL; AstNode* else_block = NULL; if (match(p, TOK_ELSE)) { if (peek(p)->kind == TOK_IF) { else_block = parse_statement(p, error); } else { else_block = parse_block(p, error); } if (!else_block) return NULL; } return ast_make_if(p->arena, cond, then_block, else_block, t->line, t->col); } if (t->kind == TOK_WHILE) { advance(p); AstNode* cond = parse_expr(p, error); if (!cond) return NULL; AstNode* body = parse_block(p, error); if (!body) return NULL; return ast_make_while(p->arena, cond, body, t->line, t->col); } if (t->kind == TOK_RETURN) { advance(p); if (match(p, TOK_SEMICOLON)) { return ast_make_return(p->arena, NULL, t->line, t->col); } AstNode* expr = parse_expr(p, error); if (!expr) return NULL; if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; return ast_make_return(p->arena, expr, t->line, t->col); } // 赋值语句: ident = expr ; if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_ASSIGN) { const Token* name = advance(p); // 消费标识符 advance(p); // 消费 '=' AstNode* value = parse_expr(p, error); if (!value) return NULL; if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; return ast_make_assign(p->arena, arena_strdup_impl(p->arena, name->start, name->length), value, name->line, name->col); } // 表达式语句 AstNode* expr = parse_expr(p, error); if (!expr) return NULL; if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; return ast_make_expr_stmt(p->arena, expr, t->line, t->col); } // === 函数解析 === static AstNode* parse_function(Parser* p, ErrorInfo* error) { const Token* fn_tok = advance(p); // fn const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名"); if (!name) return NULL; if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL; // 参数列表 AstNode* params[64]; int pcount = 0; while (peek(p)->kind != TOK_RPAREN && !error->message) { const Token* pname = expect(p, TOK_IDENT, error, "参数名"); if (!pname) return NULL; if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; const Token* ptype = advance(p); if (!is_type_token(ptype->kind)) { error->message = "无效的参数类型"; error->filename = p->filename; error->line = ptype->line; error->col = ptype->col; return NULL; } params[pcount++] = ast_make_parameter(p->arena, arena_strdup_impl(p->arena, pname->start, pname->length), token_to_type(ptype->kind), pname->line, pname->col); if (match(p, TOK_COMMA)) continue; else break; } if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL; // 返回类型 TypeKind ret = TYPE_VOID; if (match(p, TOK_ARROW)) { const Token* rt = advance(p); if (!is_type_token(rt->kind)) { error->message = "无效的返回类型"; error->filename = p->filename; error->line = rt->line; error->col = rt->col; return NULL; } ret = token_to_type(rt->kind); } AstNode* body = parse_block(p, error); if (!body) return NULL; AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*)); memcpy(parr, params, pcount * sizeof(AstNode*)); return ast_make_function(p->arena, arena_strdup_impl(p->arena, name->start, name->length), parr, pcount, ret, body, fn_tok->line, fn_tok->col); } // === 程序入口 === AstNode* parse(Arena* a, const Token* tokens, size_t count, const char* filename, ErrorInfo* error) { Parser p = {.tokens = tokens, .count = count, .pos = 0, .filename = filename, .arena = a}; AstNode* functions[256]; int fn_count = 0; while (peek(&p)->kind != TOK_EOF && !error->message) { functions[fn_count++] = parse_function(&p, error); } if (error->message) return NULL; AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*)); memcpy(arr, functions, fn_count * sizeof(AstNode*)); return ast_make_program(a, arr, fn_count, 0, 0); }