feat: 结构体 struct — 最后一项 P0 功能

- lexer: TOK_STRUCT, TOK_DOT 关键字和运算符
- ast: AST_STRUCT_DECL/STRUCT_INIT/FIELD_ACCESS 3 种新节点
- parser: struct 声明 + .field 访问 + Name{field:val} 初始化
- sema: struct 类型符号表,字段类型解析,初始化字段检查
- codegen: LLVMStructType + extractvalue/insertvalue 字段操作
- 新增集成测试: 12_struct.l, 13_struct_nested.l
- 基于 Codex 分析报告 P0 #4

所有 P0 功能已全部完成。
This commit is contained in:
2026-06-05 12:21:22 +08:00
parent 620cec4d57
commit b390d390f3
17 changed files with 1521 additions and 47 deletions
+45 -6
View File
@@ -4,13 +4,16 @@
// 使用宏简化节点创建
#define NEW(alloc, k) \
AstNode* n = (AstNode*)arena_alloc_impl(alloc, sizeof(AstNode)); \
n->kind = (k); n->type.kind = TYPE_UNKNOWN; \
n->kind = (k); n->type.kind = TYPE_UNKNOWN; n->type.struct_name = NULL; \
n->line = line; n->col = col
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col) {
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t fn_count,
AstNode** structs, size_t struct_count, int line, int col) {
NEW(alloc, AST_PROGRAM);
n->as.program.functions = fns;
n->as.program.fn_count = count;
n->as.program.fn_count = fn_count;
n->as.program.structs = structs;
n->as.program.struct_count = struct_count;
return n;
}
@@ -23,9 +26,11 @@ AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size
return n;
}
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col) {
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type,
const char* struct_type_name, int line, int col) {
NEW(alloc, AST_PARAMETER);
n->as.parameter.name = name; n->as.parameter.type = type;
n->as.parameter.struct_type_name = struct_type_name;
return n;
}
@@ -35,10 +40,13 @@ AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, in
return n;
}
AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, bool is_mut, AstNode* init, int line, int col) {
AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot,
bool is_mut, AstNode* init, const char* struct_type_name, int line, int col) {
NEW(alloc, AST_LET_STMT);
n->as.let_stmt.name = name; n->as.let_stmt.annot_type = annot_type;
n->as.let_stmt.has_type_annot = has_type_annot; n->as.let_stmt.is_mut = is_mut; n->as.let_stmt.init = init;
n->as.let_stmt.has_type_annot = has_type_annot; n->as.let_stmt.is_mut = is_mut;
n->as.let_stmt.init = init;
n->as.let_stmt.struct_type_name = struct_type_name;
return n;
}
@@ -124,3 +132,34 @@ AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) {
n->as.ident.name = name;
return n;
}
// === 结构体相关工厂函数 ===
AstNode* ast_make_struct_decl(void* alloc, const char* name, AstNode** fields,
size_t count, int line, int col) {
NEW(alloc, AST_STRUCT_DECL);
n->as.struct_decl.name = name;
n->as.struct_decl.fields = fields;
n->as.struct_decl.field_count = count;
return n;
}
AstNode* ast_make_struct_init(void* alloc, const char* type_name,
const char** fnames, AstNode** fvals,
size_t count, int line, int col) {
NEW(alloc, AST_STRUCT_INIT);
n->as.struct_init.type_name = type_name;
n->as.struct_init.field_names = fnames;
n->as.struct_init.field_values = fvals;
n->as.struct_init.field_count = count;
return n;
}
AstNode* ast_make_field_access(void* alloc, AstNode* object, const char* field,
int line, int col) {
NEW(alloc, AST_FIELD_ACCESS);
n->as.field_access.object = object;
n->as.field_access.field = field;
n->as.field_access.field_index = -1;
return n;
}
+25 -7
View File
@@ -20,6 +20,9 @@ typedef enum {
AST_CALL_EXPR,
AST_LITERAL_EXPR,
AST_IDENT_EXPR,
AST_STRUCT_DECL, // struct Point { x: i64, y: i64 }
AST_STRUCT_INIT, // Point { x: 10, y: 20 }
AST_FIELD_ACCESS, // p.x
} AstKind;
typedef enum {
@@ -32,6 +35,7 @@ typedef enum {
// 类型信息(语义分析阶段填充)
typedef struct {
TypeKind kind;
const char* struct_name; // TYPE_STRUCT 时的结构体类型名
} TypeInfo;
// AST 节点
@@ -44,16 +48,18 @@ struct AstNode {
// 节点特有数据(按 kind 解释)
union {
// AST_PROGRAM
struct { struct AstNode** functions; size_t fn_count; } program;
struct { struct AstNode** functions; size_t fn_count;
struct AstNode** structs; size_t struct_count; } program;
// AST_FUNCTION
struct { const char* name; struct AstNode** params; size_t param_count;
TypeKind return_type; struct AstNode* body; } function;
// AST_PARAMETER
struct { const char* name; TypeKind type; } parameter;
// AST_PARAMETER (也用作结构体字段: name + type)
struct { const char* name; TypeKind type; const char* struct_type_name; } parameter;
// AST_BLOCK
struct { struct AstNode** stmts; size_t stmt_count; } block;
// AST_LET_STMT
struct { const char* name; TypeKind annot_type; bool has_type_annot; bool is_mut; struct AstNode* init; } let_stmt;
struct { const char* name; TypeKind annot_type; bool has_type_annot; bool is_mut; struct AstNode* init;
const char* struct_type_name; } let_stmt;
// AST_ASSIGN_STMT
struct { const char* name; struct AstNode* value; } assign_stmt;
// AST_IF_STMT
@@ -74,16 +80,25 @@ struct AstNode {
struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; const char* str_val; }; } literal;
// AST_IDENT_EXPR
struct { const char* name; } ident;
// AST_STRUCT_DECL
struct { const char* name; struct AstNode** fields; size_t field_count; } struct_decl;
// AST_STRUCT_INIT
struct { const char* type_name; const char** field_names;
struct AstNode** field_values; size_t field_count; } struct_init;
// AST_FIELD_ACCESS
struct { struct AstNode* object; const char* field; int field_index; } field_access;
} as;
};
// 创建节点的辅助函数(内存来自 arena,通过 void* 传递避免循环依赖)
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col);
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t fn_count,
AstNode** structs, size_t struct_count, int line, int col);
AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount,
TypeKind ret, AstNode* body, int line, int col);
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col);
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, const char* struct_type_name, int line, int col);
AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col);
AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, bool is_mut, AstNode* init, int line, int col);
AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot,
bool is_mut, AstNode* init, const char* struct_type_name, int line, int col);
AstNode* ast_make_assign(void* alloc, const char* name, AstNode* value, int line, int col);
AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col);
AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col);
@@ -97,5 +112,8 @@ AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col);
AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col);
AstNode* ast_make_literal_str(void* alloc, const char* val, int line, int col);
AstNode* ast_make_ident(void* alloc, const char* name, int line, int col);
AstNode* ast_make_struct_decl(void* alloc, const char* name, AstNode** fields, size_t count, int line, int col);
AstNode* ast_make_struct_init(void* alloc, const char* type_name, const char** fnames, AstNode** fvals, size_t count, int line, int col);
AstNode* ast_make_field_access(void* alloc, AstNode* object, const char* field, int line, int col);
#endif
+114 -3
View File
@@ -20,6 +20,14 @@ typedef struct FnEntry {
struct FnEntry* next;
} FnEntry;
// 结构体类型映射
typedef struct StructTypeEntry {
const char* name;
LLVMTypeRef llvm_type;
size_t field_count;
struct StructTypeEntry* next;
} StructTypeEntry;
typedef struct {
Arena* arena; // 代码生成阶段分配器
LLVMContextRef context; // LLVM 19+ 需要显式 Context
@@ -28,6 +36,7 @@ typedef struct {
VarEntry* var_table;
const char* error;
FnEntry* fn_table;
StructTypeEntry* struct_table;
// printf 运行时支持(内置 print 函数委托给 printf
LLVMValueRef printf_fn;
LLVMTypeRef printf_ty;
@@ -89,6 +98,21 @@ static void add_fn(CgCtx* ctx, const char* name, LLVMValueRef fn) {
ctx->fn_table = e;
}
// === 结构体类型表 ===
static void add_struct_type(CgCtx* ctx, const char* name, LLVMTypeRef ty, size_t fc) {
StructTypeEntry* e = arena_alloc(ctx->arena, sizeof(*e));
if (!e) return;
e->name = name; e->llvm_type = ty; e->field_count = fc;
e->next = ctx->struct_table;
ctx->struct_table = e;
}
static LLVMTypeRef find_struct_type(CgCtx* ctx, const char* name) {
for (StructTypeEntry* e = ctx->struct_table; e; e = e->next)
if (strcmp(e->name, name) == 0) return e->llvm_type;
return NULL;
}
// === 向前声明 ===
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node);
static void codegen_stmt(CgCtx* ctx, AstNode* node);
@@ -107,7 +131,14 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
case AST_IDENT_EXPR: {
LLVMValueRef ptr = find_var(ctx, node->as.ident.name);
if (!ptr) return NULL;
return LLVMBuildLoad2(ctx->builder, to_llvm_type(ctx, node->type.kind), ptr, "load");
LLVMTypeRef load_ty;
if (node->type.kind == TYPE_STRUCT && node->type.struct_name) {
load_ty = find_struct_type(ctx, node->type.struct_name);
if (!load_ty) load_ty = to_llvm_type(ctx, node->type.kind);
} else {
load_ty = to_llvm_type(ctx, node->type.kind);
}
return LLVMBuildLoad2(ctx->builder, load_ty, ptr, "load");
}
case AST_UNARY_EXPR: {
@@ -260,6 +291,50 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
ret_ty == LLVMVoidTypeInContext(ctx->context) ? "" : "call");
}
// === 结构体字段访问: p.x ===
case AST_FIELD_ACCESS: {
// 对对象求值(返回的是 struct 值)
LLVMValueRef struct_val = codegen_expr(ctx, node->as.field_access.object);
if (!struct_val) return NULL;
int field_idx = node->as.field_access.field_index;
if (field_idx < 0) return NULL; // sema 应当已经设置
// 用 extractvalue 从结构体值中提取字段
return LLVMBuildExtractValue(ctx->builder, struct_val,
(unsigned)field_idx, node->as.field_access.field);
}
// === 结构体初始化: Point { x: 10, y: 20 } ===
case AST_STRUCT_INIT: {
const char* st_name = node->as.struct_init.type_name;
LLVMTypeRef struct_ty = find_struct_type(ctx, st_name);
if (!struct_ty) return NULL;
// alloca 分配结构体空间
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder, struct_ty, "struct_init");
// 获取结构体字段名列表(从 struct_table 或从 AST 中)
// 对每个 init 字段,找到它在结构体中的索引并 store
for (size_t i = 0; i < node->as.struct_init.field_count; i++) {
AstNode* fval = node->as.struct_init.field_values[i];
LLVMValueRef val = codegen_expr(ctx, fval);
if (!val) return NULL;
// 获取字段指针: GEP struct_ty, alloca, 0, i
LLVMValueRef indices[] = {
LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false),
LLVMConstInt(LLVMInt32TypeInContext(ctx->context), (unsigned long long)i, false)
};
LLVMValueRef field_ptr = LLVMBuildGEP2(ctx->builder, struct_ty, alloca,
indices, 2, "field_ptr");
LLVMBuildStore(ctx->builder, val, field_ptr);
}
// 加载整个结构体值
return LLVMBuildLoad2(ctx->builder, struct_ty, alloca, "struct_val");
}
default:
return NULL;
}
@@ -273,8 +348,18 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) {
case AST_LET_STMT: {
LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init);
if (!init_val) return;
LLVMTypeRef var_type;
if (node->as.let_stmt.init->type.kind == TYPE_STRUCT &&
node->as.let_stmt.init->type.struct_name) {
var_type = find_struct_type(ctx, node->as.let_stmt.init->type.struct_name);
if (!var_type) var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind);
} else {
var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind);
}
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder,
to_llvm_type(ctx, node->as.let_stmt.init->type.kind), node->as.let_stmt.name);
var_type, node->as.let_stmt.name);
LLVMBuildStore(ctx->builder, init_val, alloca);
add_var(ctx, node->as.let_stmt.name, alloca);
break;
@@ -410,6 +495,33 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena,
memcpy_args, 3, false);
ctx.memcpy_fn = LLVMAddFunction(ctx.module, "memcpy", memcpy_ty);
// 第零遍:先创建所有命名结构体(占位符,未设置 body)
for (size_t i = 0; i < ast->as.program.struct_count; i++) {
AstNode* sd = ast->as.program.structs[i];
LLVMTypeRef llvm_st = LLVMStructCreateNamed(ctx.context, sd->as.struct_decl.name);
add_struct_type(&ctx, sd->as.struct_decl.name, llvm_st,
sd->as.struct_decl.field_count);
}
// 然后设置所有结构体的 body(此时所有结构体类型已注册,可互相引用)
for (size_t i = 0; i < ast->as.program.struct_count; i++) {
AstNode* sd = ast->as.program.structs[i];
LLVMTypeRef llvm_st = find_struct_type(&ctx, sd->as.struct_decl.name);
LLVMTypeRef* elem_types = arena_alloc(ctx.arena,
sd->as.struct_decl.field_count * sizeof(LLVMTypeRef));
for (size_t j = 0; j < sd->as.struct_decl.field_count; j++) {
AstNode* field = sd->as.struct_decl.fields[j];
if (field->as.parameter.type == TYPE_STRUCT &&
field->as.parameter.struct_type_name) {
elem_types[j] = find_struct_type(&ctx,
field->as.parameter.struct_type_name);
} else {
elem_types[j] = to_llvm_type(&ctx, field->as.parameter.type);
}
}
LLVMStructSetBody(llvm_st, elem_types,
(unsigned)sd->as.struct_decl.field_count, false);
}
// 第一遍:声明所有 L 函数
for (size_t i = 0; i < ast->as.program.fn_count; i++) {
AstNode* fn = ast->as.program.functions[i];
@@ -457,7 +569,6 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena,
}
// 验证模块(使用 ReturnStatus 以获取完整错误消息)
// 注: LLVM 22 C API 不再导出 mem2reg pass, alloca 优化需用 opt 工具
char* verify_err = NULL;
if (LLVMVerifyModule(ctx.module, LLVMReturnStatusAction, &verify_err)) {
*error_msg = verify_err ? verify_err : "模块验证失败(错误消息为 NULL";
+2
View File
@@ -62,6 +62,7 @@ static TokenKind check_keyword(const Token* tok) {
KW("i64", TOK_I64); KW("f64", TOK_F64);
KW("bool", TOK_BOOL); KW("str", TOK_STR);
KW("void", TOK_VOID);
KW("struct", TOK_STRUCT);
KW("true", TOK_TRUE); KW("false", TOK_FALSE);
#undef KW
return TOK_IDENT;
@@ -126,6 +127,7 @@ Token* lex(Arena* a, const char* source, const char* filename,
else if (c == '&' && peek_next(&l) == '&') { tokens[idx++] = make_token(&l, TOK_AND_AND, l.pos, 2); advance(&l); advance(&l); }
else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); }
else if (c == '.' && peek_next(&l) == '.') { tokens[idx++] = make_token(&l, TOK_DOT_DOT, l.pos, 2); advance(&l); advance(&l); }
else if (c == '.') { tokens[idx++] = make_token(&l, TOK_DOT, l.pos, 1); advance(&l); }
else if (c == '(') { tokens[idx++] = make_token(&l, TOK_LPAREN, l.pos, 1); advance(&l); }
else if (c == ')') { tokens[idx++] = make_token(&l, TOK_RPAREN, l.pos, 1); advance(&l); }
else if (c == '{') { tokens[idx++] = make_token(&l, TOK_LBRACE, l.pos, 1); advance(&l); }
+2
View File
@@ -7,6 +7,7 @@
static const char* NAMES[] = {
[TOK_FN] = "fn", [TOK_LET] = "let", [TOK_MUT] = "mut", [TOK_IF] = "if",
[TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_FOR] = "for", [TOK_IN] = "in", [TOK_RETURN] = "return",
[TOK_STRUCT] = "struct",
[TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_STR] = "str", [TOK_VOID] = "void",
[TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", [TOK_STR_LIT] = "字符串",
[TOK_TRUE] = "true", [TOK_FALSE] = "false",
@@ -22,6 +23,7 @@ static const char* NAMES[] = {
[TOK_LBRACE] = "{", [TOK_RBRACE] = "}",
[TOK_COMMA] = ",", [TOK_COLON] = ":", [TOK_SEMICOLON] = ";",
[TOK_ASSIGN] = "=",
[TOK_DOT] = ".",
[TOK_EOF] = "EOF", [TOK_ERROR] = "错误",
};
+2
View File
@@ -7,6 +7,7 @@
typedef enum {
// 关键字
TOK_FN, TOK_LET, TOK_MUT, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_FOR, TOK_IN, TOK_RETURN,
TOK_STRUCT,
// 类型关键字
TOK_I64, TOK_F64, TOK_BOOL, TOK_STR, TOK_VOID,
// 字面量
@@ -23,6 +24,7 @@ typedef enum {
TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE,
TOK_COMMA, TOK_COLON, TOK_SEMICOLON, TOK_ASSIGN,
// 特殊
TOK_DOT,
TOK_EOF, TOK_ERROR,
} TokenKind;
+134 -12
View File
@@ -33,6 +33,7 @@ typedef enum {
PREC_TERM = 50,
PREC_FACTOR = 60,
PREC_UNARY = 70,
PREC_POSTFIX = 80, // .field, call()
} Precedence;
static Precedence tok_to_prec(TokenKind kind) {
@@ -64,6 +65,8 @@ static BinaryOp tok_to_binop(TokenKind kind) {
static AstNode* parse_expr(Parser* p, ErrorInfo* error);
static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error);
static AstNode* parse_block(Parser* p, ErrorInfo* error);
static AstNode* parse_statement(Parser* p, ErrorInfo* error);
static AstNode* parse_function(Parser* p, ErrorInfo* error);
// === 前缀解析 ===
static AstNode* parse_unary(Parser* p, ErrorInfo* error) {
@@ -99,10 +102,58 @@ static AstNode* parse_literal(Parser* p) {
}
}
// === 结构体初始化解析: Name { field: val, ... } ===
static AstNode* parse_struct_init(Parser* p, const Token* name, ErrorInfo* error) {
advance(p); // 跳过 '{'
const char* fnames[32];
AstNode* fvals[32];
int fcount = 0;
while (peek(p)->kind != TOK_RBRACE && !error->message) {
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
if (!fname) return NULL;
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
AstNode* val = parse_expr(p, error);
if (!val) return NULL;
fnames[fcount] = arena_strdup_impl(p->arena, fname->start, fname->length);
fvals[fcount] = val;
fcount++;
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
const char** n_arr = arena_alloc_impl(p->arena, fcount * sizeof(const char*));
memcpy(n_arr, fnames, fcount * sizeof(const char*));
AstNode** v_arr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
memcpy(v_arr, fvals, fcount * sizeof(AstNode*));
return ast_make_struct_init(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
n_arr, v_arr, fcount, name->line, name->col);
}
// === 标识符 / 函数调用 / 结构体初始化 ===
static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
const Token* name = advance(p);
// 结构体初始化: Name { field: val, ... }
// 用提前看来区别 struct init 和 block
// struct init → { IDENT COLON ... block → { 可能是 let/if/while/...
if (peek(p)->kind == TOK_LBRACE) {
const Token* after_brace = &p->tokens[p->pos + 1];
if (after_brace->kind == TOK_IDENT) {
const Token* after_fname = &p->tokens[p->pos + 2];
if (after_fname->kind == TOK_COLON) {
return parse_struct_init(p, name, error);
}
}
}
// 函数调用: name(...)
if (match(p, TOK_LPAREN)) {
// 函数调用
AstNode* args[16]; int arg_count = 0;
while (peek(p)->kind != TOK_RPAREN && !error->message) {
if (arg_count >= 16) {
@@ -149,9 +200,22 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error
}
if (!left) return NULL;
// 中缀解析循环
// 中缀/后置解析循环
while (!error->message) {
TokenKind kind = peek(p)->kind;
// 后置字段访问: expr.field
if (kind == TOK_DOT) {
advance(p); // 跳过 '.'
const Token* field = expect(p, TOK_IDENT, error, "缺少字段名");
if (!field) return NULL;
left = ast_make_field_access(p->arena, left,
arena_strdup_impl(p->arena, field->start, field->length),
field->line, field->col);
continue;
}
// 中缀运算符
Precedence prec = tok_to_prec(kind);
if (prec <= min_prec) break;
@@ -179,8 +243,46 @@ static TypeKind token_to_type(TokenKind k) {
default: return TYPE_VOID; }
}
// === 结构体声明解析 ===
static AstNode* parse_struct_decl(Parser* p, ErrorInfo* error) {
const Token* s_tok = advance(p); // 跳过 'struct'
const Token* name = expect(p, TOK_IDENT, error, "struct 后应为结构体名");
if (!name) return NULL;
if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
AstNode* fields[32]; int fcount = 0;
while (peek(p)->kind != TOK_RBRACE && !error->message) {
const Token* fname = expect(p, TOK_IDENT, error, "字段名");
if (!fname) return NULL;
if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
const Token* ftype = advance(p);
TypeKind field_kind;
const char* field_struct_name = NULL;
if (is_type_token(ftype->kind)) {
field_kind = token_to_type(ftype->kind);
} else if (ftype->kind == TOK_IDENT) {
field_kind = TYPE_STRUCT;
field_struct_name = arena_strdup_impl(p->arena, ftype->start, ftype->length);
} else {
error->message = "无效的字段类型"; error->filename = p->filename;
error->line = ftype->line; error->col = ftype->col; return NULL;
}
fields[fcount++] = ast_make_parameter(p->arena,
arena_strdup_impl(p->arena, fname->start, fname->length),
field_kind, field_struct_name, fname->line, fname->col);
if (peek(p)->kind == TOK_COMMA) advance(p);
else break;
}
if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
AstNode** farr = arena_alloc_impl(p->arena, fcount * sizeof(AstNode*));
memcpy(farr, fields, fcount * sizeof(AstNode*));
return ast_make_struct_decl(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
farr, fcount, s_tok->line, s_tok->col);
}
// === 语句解析 ===
static AstNode* parse_statement(Parser* p, ErrorInfo* error);
static AstNode* parse_block(Parser* p, ErrorInfo* error) {
const Token* open = peek(p);
@@ -209,13 +311,20 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
// 可选的类型标注
TypeKind annot_type = TYPE_UNKNOWN;
bool has_type_annot = false;
const char* struct_type_name = NULL;
if (match(p, TOK_COLON)) {
const Token* type_tok = advance(p);
if (!is_type_token(type_tok->kind)) {
if (!is_type_token(type_tok->kind) && type_tok->kind != TOK_IDENT) {
error->message = "无效的类型标注"; error->filename = p->filename;
error->line = type_tok->line; error->col = type_tok->col; return NULL;
}
annot_type = token_to_type(type_tok->kind);
if (is_type_token(type_tok->kind)) {
annot_type = token_to_type(type_tok->kind);
} else {
// struct 类型名
annot_type = TYPE_STRUCT;
struct_type_name = arena_strdup_impl(p->arena, type_tok->start, type_tok->length);
}
has_type_annot = true;
}
if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
@@ -224,7 +333,7 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
return ast_make_let(p->arena,
arena_strdup_impl(p->arena, name->start, name->length),
annot_type, has_type_annot, is_mut, init, t->line, t->col);
annot_type, has_type_annot, is_mut, init, struct_type_name, t->line, t->col);
}
if (t->kind == TOK_IF) {
@@ -285,7 +394,7 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
const char* vname = arena_strdup_impl(p->arena, var_name->start, var_name->length);
// 构建: let mut i = start;
AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, var_name->line, var_name->col);
AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, NULL, var_name->line, var_name->col);
// 构建: i < end (while 条件)
AstNode* cond = ast_make_binary(p->arena, OP_LT,
@@ -399,7 +508,7 @@ static AstNode* parse_function(Parser* p, ErrorInfo* error) {
}
params[pcount++] = ast_make_parameter(p->arena,
arena_strdup_impl(p->arena, pname->start, pname->length),
token_to_type(ptype->kind), pname->line, pname->col);
token_to_type(ptype->kind), NULL, pname->line, pname->col);
if (match(p, TOK_COMMA)) continue;
else break;
}
@@ -432,11 +541,24 @@ AstNode* parse(Arena* a, const Token* tokens, size_t count,
Parser p = {.tokens = tokens, .count = count, .pos = 0,
.filename = filename, .arena = a};
AstNode* functions[256]; int fn_count = 0;
AstNode* structs[64]; int struct_count = 0;
while (peek(&p)->kind != TOK_EOF && !error->message) {
functions[fn_count++] = parse_function(&p, error);
if (peek(&p)->kind == TOK_STRUCT) {
structs[struct_count++] = parse_struct_decl(&p, error);
} else if (peek(&p)->kind == TOK_FN) {
functions[fn_count++] = parse_function(&p, error);
} else {
error->message = "顶层只允许 fn 或 struct";
error->filename = p.filename;
error->line = peek(&p)->line;
error->col = peek(&p)->col;
return NULL;
}
}
if (error->message) return NULL;
AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
memcpy(arr, functions, fn_count * sizeof(AstNode*));
return ast_make_program(a, arr, fn_count, 0, 0);
AstNode** fn_arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
memcpy(fn_arr, functions, fn_count * sizeof(AstNode*));
AstNode** st_arr = arena_alloc_impl(a, struct_count * sizeof(AstNode*));
memcpy(st_arr, structs, struct_count * sizeof(AstNode*));
return ast_make_program(a, fn_arr, fn_count, st_arr, struct_count, 0, 0);
}
+148 -11
View File
@@ -33,6 +33,9 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena*
node->type.kind = TYPE_ERROR;
} else {
node->type.kind = sym->type;
if (sym->type == TYPE_STRUCT && sym->struct_type_name) {
node->type.struct_name = sym->struct_type_name;
}
}
break;
}
@@ -156,6 +159,99 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena*
break;
}
case AST_FIELD_ACCESS: {
analyze_expr(node->as.field_access.object, scope, errors, a);
AstNode* obj = node->as.field_access.object;
if (obj->type.kind == TYPE_ERROR) {
node->type.kind = TYPE_ERROR;
break;
}
if (obj->type.kind != TYPE_STRUCT) {
error_add(errors, "<sema>", node->line, node->col,
"类型 '%s' 不是结构体,不能访问字段 '%s'",
type_name(obj->type.kind), node->as.field_access.field);
node->type.kind = TYPE_ERROR;
break;
}
// 查找结构体定义
const char* struct_name = obj->type.struct_name;
if (!struct_name) {
error_add(errors, "<sema>", node->line, node->col,
"无法确定结构体类型");
node->type.kind = TYPE_ERROR;
break;
}
Symbol* struct_sym = scope_lookup_struct(scope, struct_name);
if (!struct_sym) {
error_add(errors, "<sema>", node->line, node->col,
"未定义的结构体 '%s'", struct_name);
node->type.kind = TYPE_ERROR;
break;
}
int fi = scope_struct_field_index(struct_sym, node->as.field_access.field);
if (fi < 0) {
error_add(errors, "<sema>", node->line, node->col,
"结构体 '%s' 没有字段 '%s'", struct_name, node->as.field_access.field);
node->type.kind = TYPE_ERROR;
break;
}
node->type.kind = struct_sym->struct_field_types[fi];
node->as.field_access.field_index = fi;
// 如果字段也是结构体类型,传播类型名
if (node->type.kind == TYPE_STRUCT &&
struct_sym->struct_field_struct_names &&
struct_sym->struct_field_struct_names[fi]) {
node->type.struct_name = struct_sym->struct_field_struct_names[fi];
}
break;
}
case AST_STRUCT_INIT: {
Symbol* struct_sym = scope_lookup_struct(scope, node->as.struct_init.type_name);
if (!struct_sym) {
error_add(errors, "<sema>", node->line, node->col,
"未定义的结构体类型 '%s'", node->as.struct_init.type_name);
node->type.kind = TYPE_ERROR;
break;
}
if (node->as.struct_init.field_count != struct_sym->struct_field_count) {
error_add(errors, "<sema>", node->line, node->col,
"结构体 '%s' 有 %zu 个字段,但提供了 %zu 个",
node->as.struct_init.type_name,
struct_sym->struct_field_count,
node->as.struct_init.field_count);
node->type.kind = TYPE_ERROR;
break;
}
// 检查每个字段名和类型匹配
for (size_t i = 0; i < node->as.struct_init.field_count; i++) {
const char* fname = node->as.struct_init.field_names[i];
AstNode* fval = node->as.struct_init.field_values[i];
analyze_expr(fval, scope, errors, a);
int fi = scope_struct_field_index(struct_sym, fname);
if (fi < 0) {
error_add(errors, "<sema>", node->line, node->col,
"结构体 '%s' 没有字段 '%s'",
node->as.struct_init.type_name, fname);
node->type.kind = TYPE_ERROR;
continue;
}
TypeKind expected = struct_sym->struct_field_types[fi];
TypeKind actual = fval->type.kind;
if (actual != TYPE_ERROR && actual != expected) {
error_add(errors, "<sema>", node->line, node->col,
"字段 '%s' 类型不匹配: 期望 '%s',得到 '%s'",
fname, type_name(expected), type_name(actual));
}
}
if (node->type.kind != TYPE_ERROR) {
node->type.kind = TYPE_STRUCT;
node->type.struct_name = node->as.struct_init.type_name;
}
break;
}
default: break;
}
}
@@ -165,7 +261,25 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
switch (node->kind) {
case AST_PROGRAM:
// 第一遍:收集所有函数签名
// 第一遍:收集所有结构体定义
for (size_t i = 0; i < node->as.program.struct_count; i++) {
AstNode* sd = node->as.program.structs[i];
const char** fnames = (const char**)arena_alloc_impl(a,
sd->as.struct_decl.field_count * sizeof(const char*));
TypeKind* ftypes = (TypeKind*)arena_alloc_impl(a,
sd->as.struct_decl.field_count * sizeof(TypeKind));
const char** fstruct_names = (const char**)arena_alloc_impl(a,
sd->as.struct_decl.field_count * sizeof(const char*));
for (size_t j = 0; j < sd->as.struct_decl.field_count; j++) {
fnames[j] = sd->as.struct_decl.fields[j]->as.parameter.name;
ftypes[j] = sd->as.struct_decl.fields[j]->as.parameter.type;
fstruct_names[j] = sd->as.struct_decl.fields[j]->as.parameter.struct_type_name;
}
scope_insert_struct(scope, a, sd->as.struct_decl.name,
fnames, ftypes, fstruct_names,
sd->as.struct_decl.field_count);
}
// 第二遍:收集所有函数签名
for (size_t i = 0; i < node->as.program.fn_count; i++) {
AstNode* fn = node->as.program.functions[i];
TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind));
@@ -176,7 +290,7 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
fn->as.function.return_type, pts,
fn->as.function.param_count);
}
// 第遍:分析每个函数体
// 第遍:分析每个函数体
for (size_t i = 0; i < node->as.program.fn_count; i++) {
analyze_node(node->as.program.functions[i], scope, errors, a);
}
@@ -203,14 +317,29 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
analyze_expr(node->as.let_stmt.init, scope, errors, a);
TypeKind inferred = node->as.let_stmt.init->type.kind;
TypeKind var_type;
const char* var_struct_name = NULL;
if (node->as.let_stmt.has_type_annot) {
// 使用显式类型标注
var_type = node->as.let_stmt.annot_type;
const char* annot_struct = node->as.let_stmt.struct_type_name;
if (annot_struct) {
// struct 类型标注
Symbol* st_sym = scope_lookup_struct(scope, annot_struct);
if (!st_sym) {
error_add(errors, "<sema>", node->line, node->col,
"未定义的结构体类型 '%s'", annot_struct);
break;
}
var_type = TYPE_STRUCT;
var_struct_name = annot_struct;
} else {
var_type = node->as.let_stmt.annot_type;
}
if (inferred != TYPE_ERROR && inferred != var_type) {
error_add(errors, "<sema>", node->line, node->col,
"变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'",
node->as.let_stmt.name, type_name(var_type), type_name(inferred));
node->as.let_stmt.name,
annot_struct ? annot_struct : type_name(var_type),
type_name(inferred));
}
} else {
// 类型推断
@@ -220,15 +349,23 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
break;
}
var_type = inferred;
if (inferred == TYPE_STRUCT) {
var_struct_name = node->as.let_stmt.init->type.struct_name;
}
}
node->type.kind = var_type;
node->type.struct_name = var_struct_name;
Symbol* sym = scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, var_type);
if (!sym) {
error_add(errors, "<sema>", node->line, node->col,
"变量 '%s' 重复定义", node->as.let_stmt.name);
} else {
sym->is_mut = node->as.let_stmt.is_mut;
if (var_struct_name) {
sym->type = TYPE_STRUCT;
sym->struct_type_name = var_struct_name;
}
}
break;
}
@@ -304,17 +441,17 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena*
}
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) {
Scope* global = scope_new(arena, NULL);
Scope* global_scope = scope_new(arena, NULL);
// 注册内置函数
TypeKind params_i64[] = {TYPE_I64};
scope_insert_function(global, arena, "print_i64", TYPE_VOID, params_i64, 1);
scope_insert_function(global_scope, arena, "print_i64", TYPE_VOID, params_i64, 1);
TypeKind params_f64[] = {TYPE_F64};
scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1);
scope_insert_function(global_scope, arena, "print_f64", TYPE_VOID, params_f64, 1);
TypeKind params_bool[] = {TYPE_BOOL};
scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1);
scope_insert_function(global_scope, arena, "print_bool", TYPE_VOID, params_bool, 1);
TypeKind params_str[] = {TYPE_STR};
scope_insert_function(global, arena, "print_str", TYPE_VOID, params_str, 1);
scope_insert_function(global_scope, arena, "print_str", TYPE_VOID, params_str, 1);
analyze_node(ast, global, errors, arena);
analyze_node(ast, global_scope, errors, arena);
}
+49
View File
@@ -29,6 +29,10 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
sym->name = name; sym->kind = kind; sym->type = type;
sym->is_mut = false; sym->return_type = TYPE_VOID;
sym->param_types = NULL; sym->param_count = 0;
sym->struct_field_names = NULL;
sym->struct_field_types = NULL;
sym->struct_field_count = 0;
sym->struct_type_name = NULL;
sym->next = scope->head;
scope->head = sym;
return sym;
@@ -44,7 +48,52 @@ Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol));
sym->name = name; sym->kind = SYM_FUNCTION; sym->type = TYPE_VOID;
sym->return_type = ret; sym->param_types = pt; sym->param_count = pc;
sym->struct_field_names = NULL;
sym->struct_field_types = NULL;
sym->struct_field_count = 0;
sym->struct_type_name = NULL;
sym->next = scope->head;
scope->head = sym;
return sym;
}
Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name,
const char** fnames, TypeKind* ftypes,
const char** fstruct_names, size_t fc) {
if (scope->head) {
for (Symbol* sym = scope->head; sym; sym = sym->next) {
if (strcmp(sym->name, name) == 0) return NULL;
}
}
Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol));
sym->name = name; sym->kind = SYM_STRUCT; sym->type = TYPE_STRUCT;
sym->is_mut = false; sym->return_type = TYPE_VOID;
sym->param_types = NULL; sym->param_count = 0;
sym->struct_field_names = fnames;
sym->struct_field_types = ftypes;
sym->struct_field_struct_names = fstruct_names;
sym->struct_field_count = fc;
sym->struct_type_name = NULL;
sym->next = scope->head;
scope->head = sym;
return sym;
}
Symbol* scope_lookup_struct(const Scope* scope, const char* name) {
for (const Scope* s = scope; s; s = s->parent) {
for (Symbol* sym = s->head; sym; sym = sym->next) {
if (sym->kind == SYM_STRUCT && strcmp(sym->name, name) == 0)
return sym;
}
}
return NULL;
}
int scope_struct_field_index(const Symbol* sym, const char* field_name) {
if (sym->kind != SYM_STRUCT) return -1;
for (size_t i = 0; i < sym->struct_field_count; i++) {
if (strcmp(sym->struct_field_names[i], field_name) == 0)
return (int)i;
}
return -1;
}
+19 -1
View File
@@ -4,7 +4,7 @@
#include "l_lang.h"
#include "ast.h"
typedef enum { SYM_VARIABLE, SYM_PARAMETER, SYM_FUNCTION } SymbolKind;
typedef enum { SYM_VARIABLE, SYM_PARAMETER, SYM_FUNCTION, SYM_STRUCT } SymbolKind;
typedef struct Symbol {
const char* name;
@@ -15,6 +15,13 @@ typedef struct Symbol {
TypeKind return_type;
TypeKind* param_types;
size_t param_count;
// 结构体特有(SYM_STRUCT
const char** struct_field_names;
TypeKind* struct_field_types;
const char** struct_field_struct_names; // 字段为 struct 类型时的具体类型名
size_t struct_field_count;
// 变量引用结构体类型时,记录具体类型名
const char* struct_type_name;
// 链表(同一作用域内的下一个符号)
struct Symbol* next;
} Symbol;
@@ -38,4 +45,15 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
TypeKind ret, TypeKind* pt, size_t pc);
// 插入结构体符号
Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name,
const char** fnames, TypeKind* ftypes,
const char** fstruct_names, size_t fc);
// 查找结构体符号(在所有作用域中)
Symbol* scope_lookup_struct(const Scope* scope, const char* name);
// 在结构体符号中查找字段索引(返回 -1 表示未找到)
int scope_struct_field_index(const Symbol* sym, const char* field_name);
#endif