bd02a4989e
- lexer: 新增 TOK_MUT 关键字 - ast: AST_ASSIGN_STMT 节点 + let_stmt.is_mut 标志 - parser: ‘let mut’ 前缀识别 + ‘ident = expr;’ 赋值语句 - sema: Symbol.is_mut 可变性检查(不可变变量赋值报错) - codegen: AST_ASSIGN_STMT → store 指令 - 新增集成测试 06_mut_while.l(while 循环 + 计数器) 基于 Codex 分析报告 P0 建议。
340 lines
13 KiB
C
340 lines
13 KiB
C
#include "parser.h"
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
typedef struct {
|
|
const Token* tokens;
|
|
size_t count;
|
|
size_t pos;
|
|
const char* filename;
|
|
Arena* arena;
|
|
} Parser;
|
|
|
|
// === 向前看 ===
|
|
static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; }
|
|
static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; }
|
|
static bool match(Parser* p, TokenKind k) {
|
|
if (peek(p)->kind == k) { p->pos++; return true; }
|
|
return false;
|
|
}
|
|
static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) {
|
|
if (peek(p)->kind == k) return advance(p);
|
|
e->message = msg; e->filename = p->filename;
|
|
e->line = peek(p)->line; e->col = peek(p)->col;
|
|
return NULL;
|
|
}
|
|
|
|
// === 运算符优先级定义 ===
|
|
typedef enum {
|
|
PREC_NONE = 0,
|
|
PREC_OR = 20,
|
|
PREC_AND = 30,
|
|
PREC_COMPARE = 40,
|
|
PREC_TERM = 50,
|
|
PREC_FACTOR = 60,
|
|
PREC_UNARY = 70,
|
|
} Precedence;
|
|
|
|
static Precedence tok_to_prec(TokenKind kind) {
|
|
switch (kind) {
|
|
case TOK_PIPE_PIPE: return PREC_OR;
|
|
case TOK_AND_AND: return PREC_AND;
|
|
case TOK_EQ_EQ: case TOK_BANG_EQ:
|
|
case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE;
|
|
case TOK_PLUS: case TOK_MINUS: return PREC_TERM;
|
|
case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR;
|
|
default: return PREC_NONE;
|
|
}
|
|
}
|
|
|
|
static BinaryOp tok_to_binop(TokenKind kind) {
|
|
switch (kind) {
|
|
case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB;
|
|
case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV;
|
|
case TOK_PERCENT: return OP_MOD;
|
|
case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE;
|
|
case TOK_LT: return OP_LT; case TOK_GT: return OP_GT;
|
|
case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE;
|
|
case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR;
|
|
default: return OP_ADD;
|
|
}
|
|
}
|
|
|
|
// 向前声明
|
|
static AstNode* parse_expr(Parser* p, ErrorInfo* error);
|
|
static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error);
|
|
static AstNode* parse_block(Parser* p, ErrorInfo* error);
|
|
|
|
// === 前缀解析 ===
|
|
static AstNode* parse_unary(Parser* p, ErrorInfo* error) {
|
|
const Token* op = advance(p);
|
|
AstNode* operand = parse_expr_prec(p, PREC_UNARY, error);
|
|
if (!operand) return NULL;
|
|
BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT;
|
|
return ast_make_unary(p->arena, uop, operand, op->line, op->col);
|
|
}
|
|
|
|
static AstNode* parse_group(Parser* p, ErrorInfo* error) {
|
|
advance(p); // 跳过 (
|
|
AstNode* expr = parse_expr(p, error);
|
|
if (!expr) return NULL;
|
|
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
|
return expr;
|
|
}
|
|
|
|
static AstNode* parse_literal(Parser* p) {
|
|
const Token* t = advance(p);
|
|
switch (t->kind) {
|
|
case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), t->line, t->col);
|
|
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col);
|
|
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col);
|
|
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col);
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
|
|
const Token* name = advance(p);
|
|
if (match(p, TOK_LPAREN)) {
|
|
// 函数调用
|
|
AstNode* args[16]; int arg_count = 0;
|
|
while (peek(p)->kind != TOK_RPAREN && !error->message) {
|
|
if (arg_count >= 16) {
|
|
error->message = "函数参数过多"; error->filename = p->filename;
|
|
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
|
|
}
|
|
args[arg_count] = parse_expr(p, error);
|
|
if (!args[arg_count]) return NULL;
|
|
arg_count++;
|
|
if (peek(p)->kind == TOK_COMMA) advance(p);
|
|
else break;
|
|
}
|
|
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
|
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
|
|
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
|
|
return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
|
|
arg_arr, arg_count, name->line, name->col);
|
|
}
|
|
return ast_make_ident(p->arena,
|
|
arena_strdup_impl(p->arena, name->start, name->length),
|
|
name->line, name->col);
|
|
}
|
|
|
|
// === Pratt 主循环 ===
|
|
static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) {
|
|
const Token* tok = peek(p);
|
|
AstNode* left = NULL;
|
|
|
|
// 前缀解析
|
|
if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) {
|
|
left = parse_unary(p, error);
|
|
} else if (tok->kind == TOK_LPAREN) {
|
|
left = parse_group(p, error);
|
|
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
|
|
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) {
|
|
left = parse_literal(p);
|
|
} else if (tok->kind == TOK_IDENT) {
|
|
left = parse_ident_or_call(p, error);
|
|
} else {
|
|
error->message = "无法识别的表达式"; error->filename = p->filename;
|
|
error->line = tok->line; error->col = tok->col;
|
|
return NULL;
|
|
}
|
|
if (!left) return NULL;
|
|
|
|
// 中缀解析循环
|
|
while (!error->message) {
|
|
TokenKind kind = peek(p)->kind;
|
|
Precedence prec = tok_to_prec(kind);
|
|
if (prec <= min_prec) break;
|
|
|
|
const Token* op = advance(p);
|
|
AstNode* right = parse_expr_prec(p, prec, error);
|
|
if (!right) return NULL;
|
|
left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, op->line, op->col);
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
|
|
return parse_expr_prec(p, PREC_NONE, error);
|
|
}
|
|
|
|
// === 类型工具 ===
|
|
static bool is_type_token(TokenKind k) {
|
|
return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID;
|
|
}
|
|
|
|
static TypeKind token_to_type(TokenKind k) {
|
|
switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64;
|
|
case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; }
|
|
}
|
|
|
|
// === 语句解析 ===
|
|
static AstNode* parse_statement(Parser* p, ErrorInfo* error);
|
|
|
|
static AstNode* parse_block(Parser* p, ErrorInfo* error) {
|
|
const Token* open = peek(p);
|
|
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
|
|
AstNode* stmts[256]; int count = 0;
|
|
while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) {
|
|
AstNode* s = parse_statement(p, error);
|
|
if (!s) return NULL;
|
|
stmts[count++] = s;
|
|
}
|
|
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
|
|
AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*));
|
|
memcpy(arr, stmts, count * sizeof(AstNode*));
|
|
return ast_make_block(p->arena, arr, count, open->line, open->col);
|
|
}
|
|
|
|
static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
|
|
const Token* t = peek(p);
|
|
|
|
if (t->kind == TOK_LET) {
|
|
advance(p);
|
|
bool is_mut = false;
|
|
if (peek(p)->kind == TOK_MUT) { is_mut = true; advance(p); }
|
|
const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名");
|
|
if (!name) return NULL;
|
|
// 可选的类型标注
|
|
TypeKind annot_type = TYPE_UNKNOWN;
|
|
bool has_type_annot = false;
|
|
if (match(p, TOK_COLON)) {
|
|
const Token* type_tok = advance(p);
|
|
if (!is_type_token(type_tok->kind)) {
|
|
error->message = "无效的类型标注"; error->filename = p->filename;
|
|
error->line = type_tok->line; error->col = type_tok->col; return NULL;
|
|
}
|
|
annot_type = token_to_type(type_tok->kind);
|
|
has_type_annot = true;
|
|
}
|
|
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
|
|
AstNode* init = parse_expr(p, error);
|
|
if (!init) return NULL;
|
|
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
|
return ast_make_let(p->arena,
|
|
arena_strdup_impl(p->arena, name->start, name->length),
|
|
annot_type, has_type_annot, is_mut, init, t->line, t->col);
|
|
}
|
|
|
|
if (t->kind == TOK_IF) {
|
|
advance(p);
|
|
AstNode* cond = parse_expr(p, error);
|
|
if (!cond) return NULL;
|
|
AstNode* then_block = parse_block(p, error);
|
|
if (!then_block) return NULL;
|
|
AstNode* else_block = NULL;
|
|
if (match(p, TOK_ELSE)) {
|
|
if (peek(p)->kind == TOK_IF) {
|
|
else_block = parse_statement(p, error);
|
|
} else {
|
|
else_block = parse_block(p, error);
|
|
}
|
|
if (!else_block) return NULL;
|
|
}
|
|
return ast_make_if(p->arena, cond, then_block, else_block, t->line, t->col);
|
|
}
|
|
|
|
if (t->kind == TOK_WHILE) {
|
|
advance(p);
|
|
AstNode* cond = parse_expr(p, error);
|
|
if (!cond) return NULL;
|
|
AstNode* body = parse_block(p, error);
|
|
if (!body) return NULL;
|
|
return ast_make_while(p->arena, cond, body, t->line, t->col);
|
|
}
|
|
|
|
if (t->kind == TOK_RETURN) {
|
|
advance(p);
|
|
if (match(p, TOK_SEMICOLON)) {
|
|
return ast_make_return(p->arena, NULL, t->line, t->col);
|
|
}
|
|
AstNode* expr = parse_expr(p, error);
|
|
if (!expr) return NULL;
|
|
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
|
return ast_make_return(p->arena, expr, t->line, t->col);
|
|
}
|
|
|
|
// 赋值语句: ident = expr ;
|
|
if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_ASSIGN) {
|
|
const Token* name = advance(p); // 消费标识符
|
|
advance(p); // 消费 '='
|
|
AstNode* value = parse_expr(p, error);
|
|
if (!value) return NULL;
|
|
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
|
return ast_make_assign(p->arena,
|
|
arena_strdup_impl(p->arena, name->start, name->length),
|
|
value, name->line, name->col);
|
|
}
|
|
|
|
// 表达式语句
|
|
AstNode* expr = parse_expr(p, error);
|
|
if (!expr) return NULL;
|
|
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
|
return ast_make_expr_stmt(p->arena, expr, t->line, t->col);
|
|
}
|
|
|
|
// === 函数解析 ===
|
|
static AstNode* parse_function(Parser* p, ErrorInfo* error) {
|
|
const Token* fn_tok = advance(p); // fn
|
|
const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名");
|
|
if (!name) return NULL;
|
|
if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL;
|
|
|
|
// 参数列表
|
|
AstNode* params[64]; int pcount = 0;
|
|
while (peek(p)->kind != TOK_RPAREN && !error->message) {
|
|
const Token* pname = expect(p, TOK_IDENT, error, "参数名");
|
|
if (!pname) return NULL;
|
|
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
|
|
const Token* ptype = advance(p);
|
|
if (!is_type_token(ptype->kind)) {
|
|
error->message = "无效的参数类型"; error->filename = p->filename;
|
|
error->line = ptype->line; error->col = ptype->col; return NULL;
|
|
}
|
|
params[pcount++] = ast_make_parameter(p->arena,
|
|
arena_strdup_impl(p->arena, pname->start, pname->length),
|
|
token_to_type(ptype->kind), pname->line, pname->col);
|
|
if (match(p, TOK_COMMA)) continue;
|
|
else break;
|
|
}
|
|
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
|
|
|
// 返回类型
|
|
TypeKind ret = TYPE_VOID;
|
|
if (match(p, TOK_ARROW)) {
|
|
const Token* rt = advance(p);
|
|
if (!is_type_token(rt->kind)) {
|
|
error->message = "无效的返回类型"; error->filename = p->filename;
|
|
error->line = rt->line; error->col = rt->col; return NULL;
|
|
}
|
|
ret = token_to_type(rt->kind);
|
|
}
|
|
|
|
AstNode* body = parse_block(p, error);
|
|
if (!body) return NULL;
|
|
|
|
AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*));
|
|
memcpy(parr, params, pcount * sizeof(AstNode*));
|
|
return ast_make_function(p->arena,
|
|
arena_strdup_impl(p->arena, name->start, name->length),
|
|
parr, pcount, ret, body, fn_tok->line, fn_tok->col);
|
|
}
|
|
|
|
// === 程序入口 ===
|
|
AstNode* parse(Arena* a, const Token* tokens, size_t count,
|
|
const char* filename, ErrorInfo* error) {
|
|
Parser p = {.tokens = tokens, .count = count, .pos = 0,
|
|
.filename = filename, .arena = a};
|
|
AstNode* functions[256]; int fn_count = 0;
|
|
while (peek(&p)->kind != TOK_EOF && !error->message) {
|
|
functions[fn_count++] = parse_function(&p, error);
|
|
}
|
|
if (error->message) return NULL;
|
|
AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
|
|
memcpy(arr, functions, fn_count * sizeof(AstNode*));
|
|
return ast_make_program(a, arr, fn_count, 0, 0);
|
|
}
|