Files
l-language/src/parser/parser.c
T
Serendipity 2923e7574d feat: 数组+索引 [T;N], arr[i] (P1 #6)
- lexer: TOK_LBRACKET, TOK_RBRACKET
- type: TYPE_ARRAY + TypeInfo扩展(element_type/array_size)
- ast: AST_INDEX_EXPR, AST_ARRAY_ASSIGN_STMT
- parser: parse_type_expr()支持[T;N], Pratt加[索引], 数组元素赋值
- sema: 数组类型检查, 索引必须i64, 元素赋值类型匹配
- codegen: type_info_to_llvm(TYPE_ARRAY), GEP+load/store
- 新增集成测试: 18_array.l

测试: 136 通过 (41+15+59+21)
2026-06-05 14:19:01 +08:00

695 lines
29 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "parser.h"
#include <string.h>
#include <stdlib.h>
typedef struct {
const Token* tokens;
size_t count;
size_t pos;
const char* filename;
Arena* arena;
} Parser;
// === 递归深度限制 ===
static int parse_depth = 0;
#define MAX_PARSE_DEPTH 1000
// === 向前看 ===
static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; }
static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; }
static bool match(Parser* p, TokenKind k) {
if (peek(p)->kind == k) { p->pos++; return true; }
return false;
}
static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) {
if (peek(p)->kind == k) return advance(p);
e->message = msg; e->filename = p->filename;
e->line = peek(p)->line; e->col = peek(p)->col;
return NULL;
}
// === 运算符优先级定义 ===
typedef enum {
PREC_NONE = 0,
PREC_OR = 20,
PREC_AND = 30,
PREC_COMPARE = 40,
PREC_TERM = 50,
PREC_FACTOR = 60,
PREC_UNARY = 70,
PREC_POSTFIX = 80, // .field, call()
} Precedence;
static Precedence tok_to_prec(TokenKind kind) {
switch (kind) {
case TOK_PIPE_PIPE: return PREC_OR;
case TOK_AND_AND: return PREC_AND;
case TOK_EQ_EQ: case TOK_BANG_EQ:
case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE;
case TOK_PLUS: case TOK_MINUS: return PREC_TERM;
case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR;
default: return PREC_NONE;
}
}
static BinaryOp tok_to_binop(TokenKind kind) {
switch (kind) {
case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB;
case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV;
case TOK_PERCENT: return OP_MOD;
case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE;
case TOK_LT: return OP_LT; case TOK_GT: return OP_GT;
case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE;
case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR;
default: return OP_ADD;
}
}
// 向前声明
static AstNode* parse_expr(Parser* p, ErrorInfo* error);
static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error);
static AstNode* parse_block(Parser* p, ErrorInfo* error);
static AstNode* parse_statement(Parser* p, ErrorInfo* error);
static AstNode* parse_function(Parser* p, ErrorInfo* error);
// === 前缀解析 ===
static AstNode* parse_unary(Parser* p, ErrorInfo* error) {
const Token* op = advance(p);
AstNode* operand = parse_expr_prec(p, PREC_UNARY, error);
if (!operand) return NULL;
BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT;
return ast_make_unary(p->arena, uop, operand, tok_loc(op));
}
static AstNode* parse_group(Parser* p, ErrorInfo* error) {
advance(p); // 跳过 (
AstNode* expr = parse_expr(p, error);
if (!expr) return NULL;
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
return expr;
}
static AstNode* parse_literal(Parser* p) {
const Token* t = advance(p);
switch (t->kind) {
case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t));
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t));
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t));
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t));
case TOK_STR_LIT: {
char* str = arena_alloc_impl(p->arena, t->length + 1);
memcpy(str, t->start, t->length);
str[t->length] = '\0';
return ast_make_literal_str(p->arena, str, tok_loc(t));
}
default: return NULL;
}
}
// === 结构体初始化解析: Name { field: val, ... } ===
static AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) {
advance(p); // 跳过 '{'
const char* fnames[32];
AstNode* fvals[32];
int fcount = 0;
while (peek(p)->kind != TOK_RBRACE && !error->message) {
if (fcount >= 32) { error->message = "结构体初始化字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
if (!fname) return NULL;
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
AstNode* val = parse_expr(p, error);
if (!val) return NULL;
fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length);
fvals[fcount] = val;
fcount++;
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*));
memcpy(n_arr, fnames, fcount * sizeof(const char*));
AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
memcpy(v_arr, fvals, fcount * sizeof(AstNode*));
return ast_make_struct_init(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
n_arr, v_arr, fcount, tok_loc(name));
}
// === 标识符 / 函数调用 / 结构体初始化 ===
static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
const Token* name = advance(p);
// 枚举变体引用: Name::Variant
if (peek(p)->kind == TOK_COLON_COLON) {
advance(p); // 跳过 ::
const Token* variant = expect(p, TOK_IDENT, error, "枚举变体名");
if (!variant) return NULL;
return ast_make_enum_variant(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
arena_strdup_impl(p->arena, variant->start, variant->length),
tok_loc(name));
}
// 结构体初始化: Name { field: val, ... }
// 用提前看来区别 struct init 和 block
// struct init → { IDENT COLON ... block → { 可能是 let/if/while/...
if (peek(p)->kind == TOK_LBRACE) {
const Token* after_brace = &p->tokens[p->pos + 1];
if (after_brace->kind == TOK_IDENT) {
const Token* after_fname = &p->tokens[p->pos + 2];
if (after_fname->kind == TOK_COLON) {
return parse_struct_init(p, name, error);
}
}
}
// 函数调用: name(...)
if (match(p, TOK_LPAREN)) {
AstNode* args[16]; int arg_count = 0;
while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) {
error->message = "函数参数过多"; error->filename = p->filename;
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
}
args[arg_count] = parse_expr(p, error);
if (!args[arg_count]) return NULL;
arg_count++;
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
arg_arr, arg_count, tok_loc(name));
}
return ast_make_ident(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
tok_loc(name));
}
// === Pratt 主循环 ===
static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) {
const Token* tok = peek(p);
AstNode* left = NULL;
// 前缀解析
if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) {
left = parse_unary(p, error);
} else if (tok->kind == TOK_LPAREN) {
left = parse_group(p, error);
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE ||
tok->kind == TOK_STR_LIT) {
left = parse_literal(p);
} else if (tok->kind == TOK_IDENT) {
left = parse_ident_or_call(p, error);
} else {
error->message = "无法识别的表达式"; error->filename = p->filename;
error->line = tok->line; error->col = tok->col;
return NULL;
}
if (!left) return NULL;
// 中缀/后置解析循环
while (!error->message) {
TokenKind kind = peek(p)->kind;
// 后置字段访问: expr.field
if (kind == TOK_DOT) {
advance(p); // 跳过 '.'
const Token* field = expect(p, TOK_IDENT, error, "缺少字段名");
if (!field) return NULL;
left = ast_make_field_access(p->arena, left,
arena_strdup_impl(p->arena, field->start, field->length),
tok_loc(field));
continue;
}
// 后置索引: expr[expr]
if (kind == TOK_LBRACKET) {
const Token* lbrack = advance(p); // 跳过 '['
AstNode* index = parse_expr(p, error);
if (!index) return NULL;
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL;
left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack));
continue;
}
// 中缀运算符
Precedence prec = tok_to_prec(kind);
if (prec <= min_prec) break;
const Token* op = advance(p);
AstNode* right = parse_expr_prec(p, prec, error);
if (!right) return NULL;
left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, tok_loc(op));
}
return left;
}
static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
return parse_expr_prec(p, PREC_NONE, error);
}
// === 类型工具 ===
static TypeKind token_to_type(TokenKind k) {
switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64;
case TOK_BOOL: return TYPE_BOOL; case TOK_STR: return TYPE_STR;
default: return TYPE_VOID; }
}
// === 类型表达式解析(内置类型/结构体名/数组类型)===
static TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) {
const Token* t = peek(p);
// 数组类型: [element_type; size]
if (t->kind == TOK_LBRACKET) {
advance(p); // 跳过 '['
TypeInfo elem = parse_type_expr(p, error);
if (elem.kind == TYPE_ERROR) return elem;
if (!expect(p, TOK_SEMICOLON, error, "数组类型中缺少 ';'")) {
TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti;
}
const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量");
if (!size_tok) { TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti; }
int64_t size = tok_int_value(size_tok);
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) {
TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti;
}
TypeInfo ti = {0};
ti.kind = TYPE_ARRAY;
ti.element_type = elem.kind;
ti.element_struct_name = elem.struct_name;
ti.array_size = size;
return ti;
}
TypeInfo ti = {0};
if (tok_is_type(t->kind)) {
advance(p);
ti.kind = token_to_type(t->kind);
} else if (t->kind == TOK_IDENT) {
advance(p);
ti.kind = TYPE_STRUCT;
ti.struct_name = arena_strdup_impl(p->arena, t->start, t->length);
} else {
error->message = "无效的类型"; error->filename = p->filename;
error->line = t->line; error->col = t->col;
ti.kind = TYPE_ERROR;
}
return ti;
}
// === 结构体声明解析 ===
static AstNode* parse_struct_decl(Parser* p, ErrorInfo* error) {
const Token* s_tok = advance(p); // 跳过 'struct'
const Token* name = expect(p, TOK_IDENT, error, "struct 后应为结构体名");
if (!name) return NULL;
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
AstNode* fields[32]; int fcount = 0;
while (peek(p)->kind != TOK_RBRACE && !error->message) {
if (fcount >= 32) { error->message = "结构体字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
if (!fname) return NULL;
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
TypeInfo fti = parse_type_expr(p, error);
if (fti.kind == TYPE_ERROR) {
error->filename = p->filename;
return NULL;
}
fields[fcount++] = ast_make_parameter(p->arena,
arena_strdup_impl(p->arena, fname->start, fname->length),
fti.kind, fti.struct_name, tok_loc(fname));
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
AstNode** farr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
memcpy(farr, fields, fcount * sizeof(AstNode*));
return ast_make_struct_decl(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
farr, fcount, tok_loc(s_tok));
}
// === 语句解析 ===
static AstNode* parse_block(Parser* p, ErrorInfo* error) {
if (++parse_depth > MAX_PARSE_DEPTH) {
error->message = "嵌套过深"; error->filename = p->filename;
error->line = peek(p)->line; error->col = peek(p)->col;
parse_depth--; return NULL;
}
const Token* open = peek(p);
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) { parse_depth--; return NULL; }
AstNode* stmts[256]; int count = 0;
while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) {
if (count >= 256) { error->message = "代码块语句过多 (最多256)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; parse_depth--; return NULL; }
AstNode* s = parse_statement(p, error);
if (!s) { parse_depth--; return NULL; }
stmts[count++] = s;
}
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) { parse_depth--; return NULL; }
AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*));
memcpy(arr, stmts, count * sizeof(AstNode*));
parse_depth--;
return ast_make_block(p->arena, arr, count, tok_loc(open));
}
static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
const Token* t = peek(p);
if (t->kind == TOK_LET) {
advance(p);
bool is_mut = false;
if (peek(p)->kind == TOK_MUT) { is_mut = true; advance(p); }
const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名");
if (!name) return NULL;
// 可选的类型标注
TypeKind annot_type = TYPE_UNKNOWN;
bool has_type_annot = false;
const char* struct_type_name = NULL;
TypeKind annot_elem_type = 0;
const char* annot_elem_struct = NULL;
int64_t annot_arr_size = 0;
if (match(p, TOK_COLON)) {
TypeInfo ti = parse_type_expr(p, error);
if (ti.kind == TYPE_ERROR) return NULL;
annot_type = ti.kind;
struct_type_name = ti.struct_name;
annot_elem_type = ti.element_type;
annot_elem_struct = ti.element_struct_name;
annot_arr_size = ti.array_size;
has_type_annot = true;
}
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
AstNode* init = parse_expr(p, error);
if (!init) return NULL;
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
return ast_make_let(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
annot_type, has_type_annot, is_mut, init, struct_type_name,
annot_elem_type, annot_elem_struct, annot_arr_size, tok_loc(t));
}
if (t->kind == TOK_IF) {
advance(p);
AstNode* cond = parse_expr(p, error);
if (!cond) return NULL;
AstNode* then_block = parse_block(p, error);
if (!then_block) return NULL;
AstNode* else_block = NULL;
if (match(p, TOK_ELSE)) {
if (peek(p)->kind == TOK_IF) {
else_block = parse_statement(p, error);
} else {
else_block = parse_block(p, error);
}
if (!else_block) return NULL;
}
return ast_make_if(p->arena, cond, then_block, else_block, tok_loc(t));
}
if (t->kind == TOK_WHILE) {
advance(p);
AstNode* cond = parse_expr(p, error);
if (!cond) return NULL;
AstNode* body = parse_block(p, error);
if (!body) return NULL;
return ast_make_while(p->arena, cond, body, tok_loc(t));
}
if (t->kind == TOK_FOR) {
advance(p); // 跳过 'for'
// 解析循环变量名
const Token* var_name = expect(p, TOK_IDENT, error, "for 后应为变量名");
if (!var_name) return NULL;
// 解析 'in'
if (!expect(p, TOK_IN, error, "缺少 'in'")) return NULL;
// 解析起始表达式
AstNode* start_expr = parse_expr(p, error);
if (!start_expr) return NULL;
// 解析 '..'
if (!expect(p, TOK_DOT_DOT, error, "缺少 '..'")) return NULL;
// 解析结束表达式
AstNode* end_expr = parse_expr(p, error);
if (!end_expr) return NULL;
// 解析循环体
AstNode* body = parse_block(p, error);
if (!body) return NULL;
// 脱糖: for i in start..end { body; }
// → { let mut i = start; while i < end { body; i = i + 1; } }
const char* vname = arena_strdup_impl(p->arena, var_name->start, var_name->length);
// 构建: let mut i = start;
AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, NULL, 0, NULL, 0, tok_loc(var_name));
// 构建: i < end (while 条件)
AstNode* cond = ast_make_binary(p->arena, OP_LT,
ast_make_ident(p->arena, vname, tok_loc(var_name)),
end_expr, tok_loc(var_name));
// 构建: i = i + 1 (循环增量)
AstNode* incr = ast_make_assign(p->arena, vname,
ast_make_binary(p->arena, OP_ADD,
ast_make_ident(p->arena, vname, tok_loc(var_name)),
ast_make_literal_i64(p->arena, 1, tok_loc(var_name)),
tok_loc(var_name)),
tok_loc(var_name));
// 将增量追加到循环体末尾
AstNode** new_stmts = arena_alloc_impl(p->arena,
(body->as.block.stmt_count + 1) * sizeof(AstNode*));
memcpy(new_stmts, body->as.block.stmts, body->as.block.stmt_count * sizeof(AstNode*));
new_stmts[body->as.block.stmt_count] = incr;
AstNode* new_body = ast_make_block(p->arena, new_stmts,
body->as.block.stmt_count + 1, body->loc);
// 构建: while i < end { ... body ... ; i = i + 1; }
AstNode* while_loop = ast_make_while(p->arena, cond, new_body, tok_loc(t));
// 包装: { let mut i = start; while i < end { ... } }
AstNode* stmts_arr[2] = { let_stmt, while_loop };
AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*));
memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*));
return ast_make_block(p->arena, stmts, 2, tok_loc(t));
}
if (t->kind == TOK_RETURN) {
advance(p);
if (match(p, TOK_SEMICOLON)) {
return ast_make_return(p->arena, NULL, tok_loc(t));
}
AstNode* expr = parse_expr(p, error);
if (!expr) return NULL;
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
return ast_make_return(p->arena, expr, tok_loc(t));
}
// 数组元素赋值: ident[expr] = expr ;
// 需要前瞻: 检查 ']' 后面是否是 '=' (而非 ';' 或其它)
if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_LBRACKET) {
// 向前扫描找到对应的 ']'(不支持嵌套 '[' 在索引中)
int ahead_idx = 2;
int bracket_depth = 1;
while (bracket_depth > 0 && (t + ahead_idx)->kind != TOK_EOF) {
if ((t + ahead_idx)->kind == TOK_LBRACKET) bracket_depth++;
else if ((t + ahead_idx)->kind == TOK_RBRACKET) bracket_depth--;
if (bracket_depth > 0) ahead_idx++;
}
// 检查 ']' 后是否是 '='
if ((t + ahead_idx + 1)->kind == TOK_ASSIGN) {
const Token* name = advance(p); // 消费标识符
advance(p); // 消费 '['
AstNode* index = parse_expr(p, error);
if (!index) return NULL;
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL;
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
AstNode* value = parse_expr(p, error);
if (!value) return NULL;
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
return ast_make_array_assign(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
index, value, tok_loc(name));
}
// 否则: 不是数组赋值, 回退到下方表达式语句处理
}
// 赋值语句: ident = expr ;
if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_ASSIGN) {
const Token* name = advance(p); // 消费标识符
advance(p); // 消费 '='
AstNode* value = parse_expr(p, error);
if (!value) return NULL;
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
return ast_make_assign(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
value, tok_loc(name));
}
// 复合赋值: ident += expr → ident = ident + expr
if (t->kind == TOK_IDENT) {
TokenKind next_kind = (t + 1)->kind;
if (next_kind >= TOK_PLUS_EQ && next_kind <= TOK_SLASH_EQ) {
const Token* name = advance(p); // 消费标识符
TokenKind comp_op = advance(p)->kind;
BinaryOp binop;
switch (comp_op) {
case TOK_PLUS_EQ: binop = OP_ADD; break;
case TOK_MINUS_EQ: binop = OP_SUB; break;
case TOK_STAR_EQ: binop = OP_MUL; break;
case TOK_SLASH_EQ: binop = OP_DIV; break;
default: break;
}
AstNode* rhs = parse_expr(p, error);
if (!rhs) return NULL;
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
AstNode* lhs_ident = ast_make_ident(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
tok_loc(name));
AstNode* bin_expr = ast_make_binary(p->arena, binop, lhs_ident, rhs,
tok_loc(name));
return ast_make_assign(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
bin_expr, tok_loc(name));
}
}
// 表达式语句
AstNode* expr = parse_expr(p, error);
if (!expr) return NULL;
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
return ast_make_expr_stmt(p->arena, expr, tok_loc(t));
}
// === 函数解析 ===
static AstNode* parse_function(Parser* p, ErrorInfo* error) {
const Token* fn_tok = advance(p); // fn
const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名");
if (!name) return NULL;
if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL;
// 参数列表
AstNode* params[64]; int pcount = 0;
while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (pcount >= 64) { error->message = "函数参数过多 (最多64)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
const Token* pname = expect(p, TOK_IDENT, error, "参数名");
if (!pname) return NULL;
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
TypeInfo pti = parse_type_expr(p, error);
if (pti.kind == TYPE_ERROR) return NULL;
params[pcount++] = ast_make_parameter(p->arena,
arena_strdup_impl(p->arena, pname->start, pname->length),
pti.kind, pti.struct_name, tok_loc(pname));
if (match(p, TOK_COMMA)) continue;
else break;
}
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
// 返回类型
TypeKind ret = TYPE_VOID;
const char* ret_struct_name = NULL;
if (match(p, TOK_ARROW)) {
TypeInfo rti = parse_type_expr(p, error);
if (rti.kind == TYPE_ERROR) return NULL;
ret = rti.kind;
ret_struct_name = rti.struct_name;
}
AstNode* body = parse_block(p, error);
if (!body) return NULL;
AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*));
memcpy(parr, params, pcount * sizeof(AstNode*));
return ast_make_function(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
parr, pcount, ret, ret_struct_name, body, tok_loc(fn_tok));
}
// === 程序入口 ===
AstNode* parse(Arena* a, const Token* tokens, size_t count,
const char* filename, ErrorInfo* error) {
Parser p = {.tokens = tokens, .count = count, .pos = 0,
.filename = filename, .arena = a};
AstNode* functions[256]; int fn_count = 0;
AstNode* structs[64]; int struct_count = 0;
AstNode* aliases[64]; int alias_count = 0;
AstNode* enums[64]; int enum_count = 0;
while (peek(&p)->kind != TOK_EOF && !error->message) {
if (peek(&p)->kind == TOK_STRUCT) {
if (struct_count >= 64) { error->message = "结构体过多 (最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
structs[struct_count++] = parse_struct_decl(&p, error);
} else if (peek(&p)->kind == TOK_TYPE) {
if (alias_count >= 64) { error->message = "类型别名过多 (最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
const Token* type_tok = advance(&p); // 跳过 'type'
const Token* alias_name = expect(&p, TOK_IDENT, error, "type 后应为别名");
if (!alias_name) return NULL;
if (!expect(&p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
TypeInfo rti = parse_type_expr(&p, error);
if (rti.kind == TYPE_ERROR) return NULL;
if (!expect(&p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
aliases[alias_count++] = ast_make_type_alias(a,
arena_strdup_impl(a, alias_name->start, alias_name->length),
rti.kind, rti.struct_name, tok_loc(type_tok));
} else if (peek(&p)->kind == TOK_ENUM) {
advance(&p);
const Token* name = expect(&p, TOK_IDENT, error, "enum 后应为枚举名");
if (!name) return NULL;
if (!expect(&p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
const char* variants[64]; int vcount = 0;
while (peek(&p)->kind != TOK_RBRACE && !error->message) {
if (vcount >= 64) { error->message = "枚举变体过多(最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
const Token* vname = expect(&p, TOK_IDENT, error, "变体名");
if (!vname) return NULL;
variants[vcount++] = arena_strdup_impl(p.arena, vname->start, vname->length);
if (peek(&p)->kind == TOK_COMMA) advance(&p); else break;
}
if (!expect(&p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
const char** v_arr = arena_alloc_impl(p.arena, vcount * sizeof(const char*));
memcpy(v_arr, variants, vcount * sizeof(const char*));
AstNode* enum_decl = ast_make_enum_decl(p.arena, arena_strdup_impl(p.arena, name->start, name->length), v_arr, vcount, tok_loc(name));
if (enum_count >= 64) { error->message = "枚举过多 (最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
enums[enum_count++] = enum_decl;
} else if (peek(&p)->kind == TOK_FN) {
if (fn_count >= 256) { error->message = "函数过多 (最多256)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
functions[fn_count++] = parse_function(&p, error);
} else {
error->message = "顶层只允许 fn、struct、type 或 enum";
error->filename = p.filename;
error->line = peek(&p)->line;
error->col = peek(&p)->col;
return NULL;
}
}
if (error->message) return NULL;
AstNode** fn_arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
memcpy(fn_arr, functions, fn_count * sizeof(AstNode*));
AstNode** st_arr = arena_alloc_impl(a, struct_count * sizeof(AstNode*));
memcpy(st_arr, structs, struct_count * sizeof(AstNode*));
AstNode** al_arr = arena_alloc_impl(a, alias_count * sizeof(AstNode*));
memcpy(al_arr, aliases, alias_count * sizeof(AstNode*));
AstNode** en_arr = arena_alloc_impl(a, enum_count * sizeof(AstNode*));
memcpy(en_arr, enums, enum_count * sizeof(AstNode*));
return ast_make_program(a, fn_arr, fn_count, st_arr, struct_count,
al_arr, alias_count, en_arr, enum_count, loc_at(0, 0));
}