2923e7574d
- lexer: TOK_LBRACKET, TOK_RBRACKET - type: TYPE_ARRAY + TypeInfo扩展(element_type/array_size) - ast: AST_INDEX_EXPR, AST_ARRAY_ASSIGN_STMT - parser: parse_type_expr()支持[T;N], Pratt加[索引], 数组元素赋值 - sema: 数组类型检查, 索引必须i64, 元素赋值类型匹配 - codegen: type_info_to_llvm(TYPE_ARRAY), GEP+load/store - 新增集成测试: 18_array.l 测试: 136 通过 (41+15+59+21)
695 lines
29 KiB
C
695 lines
29 KiB
C
#include "parser.h"
|
||
#include <string.h>
|
||
#include <stdlib.h>
|
||
|
||
typedef struct {
|
||
const Token* tokens;
|
||
size_t count;
|
||
size_t pos;
|
||
const char* filename;
|
||
Arena* arena;
|
||
} Parser;
|
||
|
||
// === 递归深度限制 ===
|
||
static int parse_depth = 0;
|
||
#define MAX_PARSE_DEPTH 1000
|
||
|
||
// === 向前看 ===
|
||
static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; }
|
||
static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; }
|
||
static bool match(Parser* p, TokenKind k) {
|
||
if (peek(p)->kind == k) { p->pos++; return true; }
|
||
return false;
|
||
}
|
||
static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) {
|
||
if (peek(p)->kind == k) return advance(p);
|
||
e->message = msg; e->filename = p->filename;
|
||
e->line = peek(p)->line; e->col = peek(p)->col;
|
||
return NULL;
|
||
}
|
||
|
||
// === 运算符优先级定义 ===
|
||
typedef enum {
|
||
PREC_NONE = 0,
|
||
PREC_OR = 20,
|
||
PREC_AND = 30,
|
||
PREC_COMPARE = 40,
|
||
PREC_TERM = 50,
|
||
PREC_FACTOR = 60,
|
||
PREC_UNARY = 70,
|
||
PREC_POSTFIX = 80, // .field, call()
|
||
} Precedence;
|
||
|
||
static Precedence tok_to_prec(TokenKind kind) {
|
||
switch (kind) {
|
||
case TOK_PIPE_PIPE: return PREC_OR;
|
||
case TOK_AND_AND: return PREC_AND;
|
||
case TOK_EQ_EQ: case TOK_BANG_EQ:
|
||
case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE;
|
||
case TOK_PLUS: case TOK_MINUS: return PREC_TERM;
|
||
case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR;
|
||
default: return PREC_NONE;
|
||
}
|
||
}
|
||
|
||
static BinaryOp tok_to_binop(TokenKind kind) {
|
||
switch (kind) {
|
||
case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB;
|
||
case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV;
|
||
case TOK_PERCENT: return OP_MOD;
|
||
case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE;
|
||
case TOK_LT: return OP_LT; case TOK_GT: return OP_GT;
|
||
case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE;
|
||
case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR;
|
||
default: return OP_ADD;
|
||
}
|
||
}
|
||
|
||
// 向前声明
|
||
static AstNode* parse_expr(Parser* p, ErrorInfo* error);
|
||
static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error);
|
||
static AstNode* parse_block(Parser* p, ErrorInfo* error);
|
||
static AstNode* parse_statement(Parser* p, ErrorInfo* error);
|
||
static AstNode* parse_function(Parser* p, ErrorInfo* error);
|
||
|
||
// === 前缀解析 ===
|
||
static AstNode* parse_unary(Parser* p, ErrorInfo* error) {
|
||
const Token* op = advance(p);
|
||
AstNode* operand = parse_expr_prec(p, PREC_UNARY, error);
|
||
if (!operand) return NULL;
|
||
BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT;
|
||
return ast_make_unary(p->arena, uop, operand, tok_loc(op));
|
||
}
|
||
|
||
static AstNode* parse_group(Parser* p, ErrorInfo* error) {
|
||
advance(p); // 跳过 (
|
||
AstNode* expr = parse_expr(p, error);
|
||
if (!expr) return NULL;
|
||
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
||
return expr;
|
||
}
|
||
|
||
static AstNode* parse_literal(Parser* p) {
|
||
const Token* t = advance(p);
|
||
switch (t->kind) {
|
||
case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t));
|
||
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t));
|
||
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t));
|
||
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t));
|
||
case TOK_STR_LIT: {
|
||
char* str = arena_alloc_impl(p->arena, t->length + 1);
|
||
memcpy(str, t->start, t->length);
|
||
str[t->length] = '\0';
|
||
return ast_make_literal_str(p->arena, str, tok_loc(t));
|
||
}
|
||
default: return NULL;
|
||
}
|
||
}
|
||
|
||
// === 结构体初始化解析: Name { field: val, ... } ===
|
||
static AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) {
|
||
advance(p); // 跳过 '{'
|
||
const char* fnames[32];
|
||
AstNode* fvals[32];
|
||
int fcount = 0;
|
||
|
||
while (peek(p)->kind != TOK_RBRACE && !error->message) {
|
||
if (fcount >= 32) { error->message = "结构体初始化字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
|
||
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
|
||
if (!fname) return NULL;
|
||
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
|
||
AstNode* val = parse_expr(p, error);
|
||
if (!val) return NULL;
|
||
|
||
fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length);
|
||
fvals[fcount] = val;
|
||
fcount++;
|
||
|
||
if (peek(p)->kind == TOK_COMMA) advance(p);
|
||
else break;
|
||
}
|
||
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
|
||
|
||
const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*));
|
||
memcpy(n_arr, fnames, fcount * sizeof(const char*));
|
||
AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
|
||
memcpy(v_arr, fvals, fcount * sizeof(AstNode*));
|
||
|
||
return ast_make_struct_init(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
n_arr, v_arr, fcount, tok_loc(name));
|
||
}
|
||
|
||
// === 标识符 / 函数调用 / 结构体初始化 ===
|
||
static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
|
||
const Token* name = advance(p);
|
||
|
||
// 枚举变体引用: Name::Variant
|
||
if (peek(p)->kind == TOK_COLON_COLON) {
|
||
advance(p); // 跳过 ::
|
||
const Token* variant = expect(p, TOK_IDENT, error, "枚举变体名");
|
||
if (!variant) return NULL;
|
||
return ast_make_enum_variant(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
arena_strdup_impl(p->arena, variant->start, variant->length),
|
||
tok_loc(name));
|
||
}
|
||
|
||
// 结构体初始化: Name { field: val, ... }
|
||
// 用提前看来区别 struct init 和 block:
|
||
// struct init → { IDENT COLON ... ;block → { 可能是 let/if/while/...
|
||
if (peek(p)->kind == TOK_LBRACE) {
|
||
const Token* after_brace = &p->tokens[p->pos + 1];
|
||
if (after_brace->kind == TOK_IDENT) {
|
||
const Token* after_fname = &p->tokens[p->pos + 2];
|
||
if (after_fname->kind == TOK_COLON) {
|
||
return parse_struct_init(p, name, error);
|
||
}
|
||
}
|
||
}
|
||
|
||
// 函数调用: name(...)
|
||
if (match(p, TOK_LPAREN)) {
|
||
AstNode* args[16]; int arg_count = 0;
|
||
while (peek(p)->kind != TOK_RPAREN && !error->message) {
|
||
if (arg_count >= 16) {
|
||
error->message = "函数参数过多"; error->filename = p->filename;
|
||
error->line = peek(p)->line; error->col = peek(p)->col; return NULL;
|
||
}
|
||
args[arg_count] = parse_expr(p, error);
|
||
if (!args[arg_count]) return NULL;
|
||
arg_count++;
|
||
if (peek(p)->kind == TOK_COMMA) advance(p);
|
||
else break;
|
||
}
|
||
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
||
AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
|
||
memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
|
||
return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
|
||
arg_arr, arg_count, tok_loc(name));
|
||
}
|
||
return ast_make_ident(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
tok_loc(name));
|
||
}
|
||
|
||
// === Pratt 主循环 ===
|
||
static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) {
|
||
const Token* tok = peek(p);
|
||
AstNode* left = NULL;
|
||
|
||
// 前缀解析
|
||
if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) {
|
||
left = parse_unary(p, error);
|
||
} else if (tok->kind == TOK_LPAREN) {
|
||
left = parse_group(p, error);
|
||
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
|
||
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE ||
|
||
tok->kind == TOK_STR_LIT) {
|
||
left = parse_literal(p);
|
||
} else if (tok->kind == TOK_IDENT) {
|
||
left = parse_ident_or_call(p, error);
|
||
} else {
|
||
error->message = "无法识别的表达式"; error->filename = p->filename;
|
||
error->line = tok->line; error->col = tok->col;
|
||
return NULL;
|
||
}
|
||
if (!left) return NULL;
|
||
|
||
// 中缀/后置解析循环
|
||
while (!error->message) {
|
||
TokenKind kind = peek(p)->kind;
|
||
|
||
// 后置字段访问: expr.field
|
||
if (kind == TOK_DOT) {
|
||
advance(p); // 跳过 '.'
|
||
const Token* field = expect(p, TOK_IDENT, error, "缺少字段名");
|
||
if (!field) return NULL;
|
||
left = ast_make_field_access(p->arena, left,
|
||
arena_strdup_impl(p->arena, field->start, field->length),
|
||
tok_loc(field));
|
||
continue;
|
||
}
|
||
|
||
// 后置索引: expr[expr]
|
||
if (kind == TOK_LBRACKET) {
|
||
const Token* lbrack = advance(p); // 跳过 '['
|
||
AstNode* index = parse_expr(p, error);
|
||
if (!index) return NULL;
|
||
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL;
|
||
left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack));
|
||
continue;
|
||
}
|
||
|
||
// 中缀运算符
|
||
Precedence prec = tok_to_prec(kind);
|
||
if (prec <= min_prec) break;
|
||
|
||
const Token* op = advance(p);
|
||
AstNode* right = parse_expr_prec(p, prec, error);
|
||
if (!right) return NULL;
|
||
left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, tok_loc(op));
|
||
}
|
||
|
||
return left;
|
||
}
|
||
|
||
static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
|
||
return parse_expr_prec(p, PREC_NONE, error);
|
||
}
|
||
|
||
// === 类型工具 ===
|
||
static TypeKind token_to_type(TokenKind k) {
|
||
switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64;
|
||
case TOK_BOOL: return TYPE_BOOL; case TOK_STR: return TYPE_STR;
|
||
default: return TYPE_VOID; }
|
||
}
|
||
|
||
// === 类型表达式解析(内置类型/结构体名/数组类型)===
|
||
static TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) {
|
||
const Token* t = peek(p);
|
||
|
||
// 数组类型: [element_type; size]
|
||
if (t->kind == TOK_LBRACKET) {
|
||
advance(p); // 跳过 '['
|
||
TypeInfo elem = parse_type_expr(p, error);
|
||
if (elem.kind == TYPE_ERROR) return elem;
|
||
if (!expect(p, TOK_SEMICOLON, error, "数组类型中缺少 ';'")) {
|
||
TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti;
|
||
}
|
||
const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量");
|
||
if (!size_tok) { TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti; }
|
||
int64_t size = tok_int_value(size_tok);
|
||
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) {
|
||
TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti;
|
||
}
|
||
TypeInfo ti = {0};
|
||
ti.kind = TYPE_ARRAY;
|
||
ti.element_type = elem.kind;
|
||
ti.element_struct_name = elem.struct_name;
|
||
ti.array_size = size;
|
||
return ti;
|
||
}
|
||
|
||
TypeInfo ti = {0};
|
||
if (tok_is_type(t->kind)) {
|
||
advance(p);
|
||
ti.kind = token_to_type(t->kind);
|
||
} else if (t->kind == TOK_IDENT) {
|
||
advance(p);
|
||
ti.kind = TYPE_STRUCT;
|
||
ti.struct_name = arena_strdup_impl(p->arena, t->start, t->length);
|
||
} else {
|
||
error->message = "无效的类型"; error->filename = p->filename;
|
||
error->line = t->line; error->col = t->col;
|
||
ti.kind = TYPE_ERROR;
|
||
}
|
||
return ti;
|
||
}
|
||
|
||
// === 结构体声明解析 ===
|
||
static AstNode* parse_struct_decl(Parser* p, ErrorInfo* error) {
|
||
const Token* s_tok = advance(p); // 跳过 'struct'
|
||
const Token* name = expect(p, TOK_IDENT, error, "struct 后应为结构体名");
|
||
if (!name) return NULL;
|
||
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
|
||
|
||
AstNode* fields[32]; int fcount = 0;
|
||
while (peek(p)->kind != TOK_RBRACE && !error->message) {
|
||
if (fcount >= 32) { error->message = "结构体字段过多 (最多32)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
|
||
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
|
||
if (!fname) return NULL;
|
||
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
|
||
TypeInfo fti = parse_type_expr(p, error);
|
||
if (fti.kind == TYPE_ERROR) {
|
||
error->filename = p->filename;
|
||
return NULL;
|
||
}
|
||
fields[fcount++] = ast_make_parameter(p->arena,
|
||
arena_strdup_impl(p->arena, fname->start, fname->length),
|
||
fti.kind, fti.struct_name, tok_loc(fname));
|
||
if (peek(p)->kind == TOK_COMMA) advance(p);
|
||
else break;
|
||
}
|
||
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
|
||
|
||
AstNode** farr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
|
||
memcpy(farr, fields, fcount * sizeof(AstNode*));
|
||
return ast_make_struct_decl(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
farr, fcount, tok_loc(s_tok));
|
||
}
|
||
|
||
// === 语句解析 ===
|
||
|
||
static AstNode* parse_block(Parser* p, ErrorInfo* error) {
|
||
if (++parse_depth > MAX_PARSE_DEPTH) {
|
||
error->message = "嵌套过深"; error->filename = p->filename;
|
||
error->line = peek(p)->line; error->col = peek(p)->col;
|
||
parse_depth--; return NULL;
|
||
}
|
||
const Token* open = peek(p);
|
||
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) { parse_depth--; return NULL; }
|
||
AstNode* stmts[256]; int count = 0;
|
||
while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) {
|
||
if (count >= 256) { error->message = "代码块语句过多 (最多256)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; parse_depth--; return NULL; }
|
||
AstNode* s = parse_statement(p, error);
|
||
if (!s) { parse_depth--; return NULL; }
|
||
stmts[count++] = s;
|
||
}
|
||
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) { parse_depth--; return NULL; }
|
||
AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*));
|
||
memcpy(arr, stmts, count * sizeof(AstNode*));
|
||
parse_depth--;
|
||
return ast_make_block(p->arena, arr, count, tok_loc(open));
|
||
}
|
||
|
||
static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
|
||
const Token* t = peek(p);
|
||
|
||
if (t->kind == TOK_LET) {
|
||
advance(p);
|
||
bool is_mut = false;
|
||
if (peek(p)->kind == TOK_MUT) { is_mut = true; advance(p); }
|
||
const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名");
|
||
if (!name) return NULL;
|
||
// 可选的类型标注
|
||
TypeKind annot_type = TYPE_UNKNOWN;
|
||
bool has_type_annot = false;
|
||
const char* struct_type_name = NULL;
|
||
TypeKind annot_elem_type = 0;
|
||
const char* annot_elem_struct = NULL;
|
||
int64_t annot_arr_size = 0;
|
||
if (match(p, TOK_COLON)) {
|
||
TypeInfo ti = parse_type_expr(p, error);
|
||
if (ti.kind == TYPE_ERROR) return NULL;
|
||
annot_type = ti.kind;
|
||
struct_type_name = ti.struct_name;
|
||
annot_elem_type = ti.element_type;
|
||
annot_elem_struct = ti.element_struct_name;
|
||
annot_arr_size = ti.array_size;
|
||
has_type_annot = true;
|
||
}
|
||
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
|
||
AstNode* init = parse_expr(p, error);
|
||
if (!init) return NULL;
|
||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
return ast_make_let(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
annot_type, has_type_annot, is_mut, init, struct_type_name,
|
||
annot_elem_type, annot_elem_struct, annot_arr_size, tok_loc(t));
|
||
}
|
||
|
||
if (t->kind == TOK_IF) {
|
||
advance(p);
|
||
AstNode* cond = parse_expr(p, error);
|
||
if (!cond) return NULL;
|
||
AstNode* then_block = parse_block(p, error);
|
||
if (!then_block) return NULL;
|
||
AstNode* else_block = NULL;
|
||
if (match(p, TOK_ELSE)) {
|
||
if (peek(p)->kind == TOK_IF) {
|
||
else_block = parse_statement(p, error);
|
||
} else {
|
||
else_block = parse_block(p, error);
|
||
}
|
||
if (!else_block) return NULL;
|
||
}
|
||
return ast_make_if(p->arena, cond, then_block, else_block, tok_loc(t));
|
||
}
|
||
|
||
if (t->kind == TOK_WHILE) {
|
||
advance(p);
|
||
AstNode* cond = parse_expr(p, error);
|
||
if (!cond) return NULL;
|
||
AstNode* body = parse_block(p, error);
|
||
if (!body) return NULL;
|
||
return ast_make_while(p->arena, cond, body, tok_loc(t));
|
||
}
|
||
|
||
if (t->kind == TOK_FOR) {
|
||
advance(p); // 跳过 'for'
|
||
|
||
// 解析循环变量名
|
||
const Token* var_name = expect(p, TOK_IDENT, error, "for 后应为变量名");
|
||
if (!var_name) return NULL;
|
||
|
||
// 解析 'in'
|
||
if (!expect(p, TOK_IN, error, "缺少 'in'")) return NULL;
|
||
|
||
// 解析起始表达式
|
||
AstNode* start_expr = parse_expr(p, error);
|
||
if (!start_expr) return NULL;
|
||
|
||
// 解析 '..'
|
||
if (!expect(p, TOK_DOT_DOT, error, "缺少 '..'")) return NULL;
|
||
|
||
// 解析结束表达式
|
||
AstNode* end_expr = parse_expr(p, error);
|
||
if (!end_expr) return NULL;
|
||
|
||
// 解析循环体
|
||
AstNode* body = parse_block(p, error);
|
||
if (!body) return NULL;
|
||
|
||
// 脱糖: for i in start..end { body; }
|
||
// → { let mut i = start; while i < end { body; i = i + 1; } }
|
||
|
||
const char* vname = arena_strdup_impl(p->arena, var_name->start, var_name->length);
|
||
|
||
// 构建: let mut i = start;
|
||
AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, NULL, 0, NULL, 0, tok_loc(var_name));
|
||
|
||
// 构建: i < end (while 条件)
|
||
AstNode* cond = ast_make_binary(p->arena, OP_LT,
|
||
ast_make_ident(p->arena, vname, tok_loc(var_name)),
|
||
end_expr, tok_loc(var_name));
|
||
|
||
// 构建: i = i + 1 (循环增量)
|
||
AstNode* incr = ast_make_assign(p->arena, vname,
|
||
ast_make_binary(p->arena, OP_ADD,
|
||
ast_make_ident(p->arena, vname, tok_loc(var_name)),
|
||
ast_make_literal_i64(p->arena, 1, tok_loc(var_name)),
|
||
tok_loc(var_name)),
|
||
tok_loc(var_name));
|
||
|
||
// 将增量追加到循环体末尾
|
||
AstNode** new_stmts = arena_alloc_impl(p->arena,
|
||
(body->as.block.stmt_count + 1) * sizeof(AstNode*));
|
||
memcpy(new_stmts, body->as.block.stmts, body->as.block.stmt_count * sizeof(AstNode*));
|
||
new_stmts[body->as.block.stmt_count] = incr;
|
||
AstNode* new_body = ast_make_block(p->arena, new_stmts,
|
||
body->as.block.stmt_count + 1, body->loc);
|
||
|
||
// 构建: while i < end { ... body ... ; i = i + 1; }
|
||
AstNode* while_loop = ast_make_while(p->arena, cond, new_body, tok_loc(t));
|
||
|
||
// 包装: { let mut i = start; while i < end { ... } }
|
||
AstNode* stmts_arr[2] = { let_stmt, while_loop };
|
||
AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*));
|
||
memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*));
|
||
return ast_make_block(p->arena, stmts, 2, tok_loc(t));
|
||
}
|
||
|
||
if (t->kind == TOK_RETURN) {
|
||
advance(p);
|
||
if (match(p, TOK_SEMICOLON)) {
|
||
return ast_make_return(p->arena, NULL, tok_loc(t));
|
||
}
|
||
AstNode* expr = parse_expr(p, error);
|
||
if (!expr) return NULL;
|
||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
return ast_make_return(p->arena, expr, tok_loc(t));
|
||
}
|
||
|
||
// 数组元素赋值: ident[expr] = expr ;
|
||
// 需要前瞻: 检查 ']' 后面是否是 '=' (而非 ';' 或其它)
|
||
if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_LBRACKET) {
|
||
// 向前扫描找到对应的 ']'(不支持嵌套 '[' 在索引中)
|
||
int ahead_idx = 2;
|
||
int bracket_depth = 1;
|
||
while (bracket_depth > 0 && (t + ahead_idx)->kind != TOK_EOF) {
|
||
if ((t + ahead_idx)->kind == TOK_LBRACKET) bracket_depth++;
|
||
else if ((t + ahead_idx)->kind == TOK_RBRACKET) bracket_depth--;
|
||
if (bracket_depth > 0) ahead_idx++;
|
||
}
|
||
// 检查 ']' 后是否是 '='
|
||
if ((t + ahead_idx + 1)->kind == TOK_ASSIGN) {
|
||
const Token* name = advance(p); // 消费标识符
|
||
advance(p); // 消费 '['
|
||
AstNode* index = parse_expr(p, error);
|
||
if (!index) return NULL;
|
||
if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL;
|
||
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
|
||
AstNode* value = parse_expr(p, error);
|
||
if (!value) return NULL;
|
||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
return ast_make_array_assign(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
index, value, tok_loc(name));
|
||
}
|
||
// 否则: 不是数组赋值, 回退到下方表达式语句处理
|
||
}
|
||
|
||
// 赋值语句: ident = expr ;
|
||
if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_ASSIGN) {
|
||
const Token* name = advance(p); // 消费标识符
|
||
advance(p); // 消费 '='
|
||
AstNode* value = parse_expr(p, error);
|
||
if (!value) return NULL;
|
||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
return ast_make_assign(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
value, tok_loc(name));
|
||
}
|
||
|
||
// 复合赋值: ident += expr → ident = ident + expr
|
||
if (t->kind == TOK_IDENT) {
|
||
TokenKind next_kind = (t + 1)->kind;
|
||
if (next_kind >= TOK_PLUS_EQ && next_kind <= TOK_SLASH_EQ) {
|
||
const Token* name = advance(p); // 消费标识符
|
||
TokenKind comp_op = advance(p)->kind;
|
||
|
||
BinaryOp binop;
|
||
switch (comp_op) {
|
||
case TOK_PLUS_EQ: binop = OP_ADD; break;
|
||
case TOK_MINUS_EQ: binop = OP_SUB; break;
|
||
case TOK_STAR_EQ: binop = OP_MUL; break;
|
||
case TOK_SLASH_EQ: binop = OP_DIV; break;
|
||
default: break;
|
||
}
|
||
|
||
AstNode* rhs = parse_expr(p, error);
|
||
if (!rhs) return NULL;
|
||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
|
||
AstNode* lhs_ident = ast_make_ident(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
tok_loc(name));
|
||
AstNode* bin_expr = ast_make_binary(p->arena, binop, lhs_ident, rhs,
|
||
tok_loc(name));
|
||
return ast_make_assign(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
bin_expr, tok_loc(name));
|
||
}
|
||
}
|
||
|
||
// 表达式语句
|
||
AstNode* expr = parse_expr(p, error);
|
||
if (!expr) return NULL;
|
||
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
return ast_make_expr_stmt(p->arena, expr, tok_loc(t));
|
||
}
|
||
|
||
// === 函数解析 ===
|
||
static AstNode* parse_function(Parser* p, ErrorInfo* error) {
|
||
const Token* fn_tok = advance(p); // fn
|
||
const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名");
|
||
if (!name) return NULL;
|
||
if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL;
|
||
|
||
// 参数列表
|
||
AstNode* params[64]; int pcount = 0;
|
||
while (peek(p)->kind != TOK_RPAREN && !error->message) {
|
||
if (pcount >= 64) { error->message = "函数参数过多 (最多64)"; error->filename = p->filename; error->line = peek(p)->line; error->col = peek(p)->col; return NULL; }
|
||
const Token* pname = expect(p, TOK_IDENT, error, "参数名");
|
||
if (!pname) return NULL;
|
||
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
|
||
TypeInfo pti = parse_type_expr(p, error);
|
||
if (pti.kind == TYPE_ERROR) return NULL;
|
||
params[pcount++] = ast_make_parameter(p->arena,
|
||
arena_strdup_impl(p->arena, pname->start, pname->length),
|
||
pti.kind, pti.struct_name, tok_loc(pname));
|
||
if (match(p, TOK_COMMA)) continue;
|
||
else break;
|
||
}
|
||
if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
|
||
|
||
// 返回类型
|
||
TypeKind ret = TYPE_VOID;
|
||
const char* ret_struct_name = NULL;
|
||
if (match(p, TOK_ARROW)) {
|
||
TypeInfo rti = parse_type_expr(p, error);
|
||
if (rti.kind == TYPE_ERROR) return NULL;
|
||
ret = rti.kind;
|
||
ret_struct_name = rti.struct_name;
|
||
}
|
||
|
||
AstNode* body = parse_block(p, error);
|
||
if (!body) return NULL;
|
||
|
||
AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*));
|
||
memcpy(parr, params, pcount * sizeof(AstNode*));
|
||
return ast_make_function(p->arena,
|
||
arena_strdup_impl(p->arena, name->start, name->length),
|
||
parr, pcount, ret, ret_struct_name, body, tok_loc(fn_tok));
|
||
}
|
||
|
||
// === 程序入口 ===
|
||
AstNode* parse(Arena* a, const Token* tokens, size_t count,
|
||
const char* filename, ErrorInfo* error) {
|
||
Parser p = {.tokens = tokens, .count = count, .pos = 0,
|
||
.filename = filename, .arena = a};
|
||
AstNode* functions[256]; int fn_count = 0;
|
||
AstNode* structs[64]; int struct_count = 0;
|
||
AstNode* aliases[64]; int alias_count = 0;
|
||
AstNode* enums[64]; int enum_count = 0;
|
||
while (peek(&p)->kind != TOK_EOF && !error->message) {
|
||
if (peek(&p)->kind == TOK_STRUCT) {
|
||
if (struct_count >= 64) { error->message = "结构体过多 (最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
|
||
structs[struct_count++] = parse_struct_decl(&p, error);
|
||
} else if (peek(&p)->kind == TOK_TYPE) {
|
||
if (alias_count >= 64) { error->message = "类型别名过多 (最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
|
||
const Token* type_tok = advance(&p); // 跳过 'type'
|
||
const Token* alias_name = expect(&p, TOK_IDENT, error, "type 后应为别名");
|
||
if (!alias_name) return NULL;
|
||
if (!expect(&p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
|
||
TypeInfo rti = parse_type_expr(&p, error);
|
||
if (rti.kind == TYPE_ERROR) return NULL;
|
||
if (!expect(&p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
|
||
aliases[alias_count++] = ast_make_type_alias(a,
|
||
arena_strdup_impl(a, alias_name->start, alias_name->length),
|
||
rti.kind, rti.struct_name, tok_loc(type_tok));
|
||
} else if (peek(&p)->kind == TOK_ENUM) {
|
||
advance(&p);
|
||
const Token* name = expect(&p, TOK_IDENT, error, "enum 后应为枚举名");
|
||
if (!name) return NULL;
|
||
if (!expect(&p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
|
||
const char* variants[64]; int vcount = 0;
|
||
while (peek(&p)->kind != TOK_RBRACE && !error->message) {
|
||
if (vcount >= 64) { error->message = "枚举变体过多(最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
|
||
const Token* vname = expect(&p, TOK_IDENT, error, "变体名");
|
||
if (!vname) return NULL;
|
||
variants[vcount++] = arena_strdup_impl(p.arena, vname->start, vname->length);
|
||
if (peek(&p)->kind == TOK_COMMA) advance(&p); else break;
|
||
}
|
||
if (!expect(&p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
|
||
const char** v_arr = arena_alloc_impl(p.arena, vcount * sizeof(const char*));
|
||
memcpy(v_arr, variants, vcount * sizeof(const char*));
|
||
AstNode* enum_decl = ast_make_enum_decl(p.arena, arena_strdup_impl(p.arena, name->start, name->length), v_arr, vcount, tok_loc(name));
|
||
if (enum_count >= 64) { error->message = "枚举过多 (最多64)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
|
||
enums[enum_count++] = enum_decl;
|
||
} else if (peek(&p)->kind == TOK_FN) {
|
||
if (fn_count >= 256) { error->message = "函数过多 (最多256)"; error->filename = p.filename; error->line = peek(&p)->line; error->col = peek(&p)->col; return NULL; }
|
||
functions[fn_count++] = parse_function(&p, error);
|
||
} else {
|
||
error->message = "顶层只允许 fn、struct、type 或 enum";
|
||
error->filename = p.filename;
|
||
error->line = peek(&p)->line;
|
||
error->col = peek(&p)->col;
|
||
return NULL;
|
||
}
|
||
}
|
||
if (error->message) return NULL;
|
||
AstNode** fn_arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
|
||
memcpy(fn_arr, functions, fn_count * sizeof(AstNode*));
|
||
AstNode** st_arr = arena_alloc_impl(a, struct_count * sizeof(AstNode*));
|
||
memcpy(st_arr, structs, struct_count * sizeof(AstNode*));
|
||
AstNode** al_arr = arena_alloc_impl(a, alias_count * sizeof(AstNode*));
|
||
memcpy(al_arr, aliases, alias_count * sizeof(AstNode*));
|
||
AstNode** en_arr = arena_alloc_impl(a, enum_count * sizeof(AstNode*));
|
||
memcpy(en_arr, enums, enum_count * sizeof(AstNode*));
|
||
return ast_make_program(a, fn_arr, fn_count, st_arr, struct_count,
|
||
al_arr, alias_count, en_arr, enum_count, loc_at(0, 0));
|
||
}
|