feat: 结构体 struct — 最后一项 P0 功能

- lexer: TOK_STRUCT, TOK_DOT 关键字和运算符
- ast: AST_STRUCT_DECL/STRUCT_INIT/FIELD_ACCESS 3 种新节点
- parser: struct 声明 + .field 访问 + Name{field:val} 初始化
- sema: struct 类型符号表,字段类型解析,初始化字段检查
- codegen: LLVMStructType + extractvalue/insertvalue 字段操作
- 新增集成测试: 12_struct.l, 13_struct_nested.l
- 基于 Codex 分析报告 P0 #4

所有 P0 功能已全部完成。
This commit is contained in:
2026-06-05 12:21:22 +08:00
parent 620cec4d57
commit b390d390f3
17 changed files with 1521 additions and 47 deletions
+114 -3
View File
@@ -20,6 +20,14 @@ typedef struct FnEntry {
struct FnEntry* next;
} FnEntry;
// 结构体类型映射
typedef struct StructTypeEntry {
const char* name;
LLVMTypeRef llvm_type;
size_t field_count;
struct StructTypeEntry* next;
} StructTypeEntry;
typedef struct {
Arena* arena; // 代码生成阶段分配器
LLVMContextRef context; // LLVM 19+ 需要显式 Context
@@ -28,6 +36,7 @@ typedef struct {
VarEntry* var_table;
const char* error;
FnEntry* fn_table;
StructTypeEntry* struct_table;
// printf 运行时支持(内置 print 函数委托给 printf
LLVMValueRef printf_fn;
LLVMTypeRef printf_ty;
@@ -89,6 +98,21 @@ static void add_fn(CgCtx* ctx, const char* name, LLVMValueRef fn) {
ctx->fn_table = e;
}
// === 结构体类型表 ===
static void add_struct_type(CgCtx* ctx, const char* name, LLVMTypeRef ty, size_t fc) {
StructTypeEntry* e = arena_alloc(ctx->arena, sizeof(*e));
if (!e) return;
e->name = name; e->llvm_type = ty; e->field_count = fc;
e->next = ctx->struct_table;
ctx->struct_table = e;
}
static LLVMTypeRef find_struct_type(CgCtx* ctx, const char* name) {
for (StructTypeEntry* e = ctx->struct_table; e; e = e->next)
if (strcmp(e->name, name) == 0) return e->llvm_type;
return NULL;
}
// === 向前声明 ===
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node);
static void codegen_stmt(CgCtx* ctx, AstNode* node);
@@ -107,7 +131,14 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
case AST_IDENT_EXPR: {
LLVMValueRef ptr = find_var(ctx, node->as.ident.name);
if (!ptr) return NULL;
return LLVMBuildLoad2(ctx->builder, to_llvm_type(ctx, node->type.kind), ptr, "load");
LLVMTypeRef load_ty;
if (node->type.kind == TYPE_STRUCT && node->type.struct_name) {
load_ty = find_struct_type(ctx, node->type.struct_name);
if (!load_ty) load_ty = to_llvm_type(ctx, node->type.kind);
} else {
load_ty = to_llvm_type(ctx, node->type.kind);
}
return LLVMBuildLoad2(ctx->builder, load_ty, ptr, "load");
}
case AST_UNARY_EXPR: {
@@ -260,6 +291,50 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
ret_ty == LLVMVoidTypeInContext(ctx->context) ? "" : "call");
}
// === 结构体字段访问: p.x ===
case AST_FIELD_ACCESS: {
// 对对象求值(返回的是 struct 值)
LLVMValueRef struct_val = codegen_expr(ctx, node->as.field_access.object);
if (!struct_val) return NULL;
int field_idx = node->as.field_access.field_index;
if (field_idx < 0) return NULL; // sema 应当已经设置
// 用 extractvalue 从结构体值中提取字段
return LLVMBuildExtractValue(ctx->builder, struct_val,
(unsigned)field_idx, node->as.field_access.field);
}
// === 结构体初始化: Point { x: 10, y: 20 } ===
case AST_STRUCT_INIT: {
const char* st_name = node->as.struct_init.type_name;
LLVMTypeRef struct_ty = find_struct_type(ctx, st_name);
if (!struct_ty) return NULL;
// alloca 分配结构体空间
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder, struct_ty, "struct_init");
// 获取结构体字段名列表(从 struct_table 或从 AST 中)
// 对每个 init 字段,找到它在结构体中的索引并 store
for (size_t i = 0; i < node->as.struct_init.field_count; i++) {
AstNode* fval = node->as.struct_init.field_values[i];
LLVMValueRef val = codegen_expr(ctx, fval);
if (!val) return NULL;
// 获取字段指针: GEP struct_ty, alloca, 0, i
LLVMValueRef indices[] = {
LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false),
LLVMConstInt(LLVMInt32TypeInContext(ctx->context), (unsigned long long)i, false)
};
LLVMValueRef field_ptr = LLVMBuildGEP2(ctx->builder, struct_ty, alloca,
indices, 2, "field_ptr");
LLVMBuildStore(ctx->builder, val, field_ptr);
}
// 加载整个结构体值
return LLVMBuildLoad2(ctx->builder, struct_ty, alloca, "struct_val");
}
default:
return NULL;
}
@@ -273,8 +348,18 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) {
case AST_LET_STMT: {
LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init);
if (!init_val) return;
LLVMTypeRef var_type;
if (node->as.let_stmt.init->type.kind == TYPE_STRUCT &&
node->as.let_stmt.init->type.struct_name) {
var_type = find_struct_type(ctx, node->as.let_stmt.init->type.struct_name);
if (!var_type) var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind);
} else {
var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind);
}
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder,
to_llvm_type(ctx, node->as.let_stmt.init->type.kind), node->as.let_stmt.name);
var_type, node->as.let_stmt.name);
LLVMBuildStore(ctx->builder, init_val, alloca);
add_var(ctx, node->as.let_stmt.name, alloca);
break;
@@ -410,6 +495,33 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena,
memcpy_args, 3, false);
ctx.memcpy_fn = LLVMAddFunction(ctx.module, "memcpy", memcpy_ty);
// 第零遍:先创建所有命名结构体(占位符,未设置 body)
for (size_t i = 0; i < ast->as.program.struct_count; i++) {
AstNode* sd = ast->as.program.structs[i];
LLVMTypeRef llvm_st = LLVMStructCreateNamed(ctx.context, sd->as.struct_decl.name);
add_struct_type(&ctx, sd->as.struct_decl.name, llvm_st,
sd->as.struct_decl.field_count);
}
// 然后设置所有结构体的 body(此时所有结构体类型已注册,可互相引用)
for (size_t i = 0; i < ast->as.program.struct_count; i++) {
AstNode* sd = ast->as.program.structs[i];
LLVMTypeRef llvm_st = find_struct_type(&ctx, sd->as.struct_decl.name);
LLVMTypeRef* elem_types = arena_alloc(ctx.arena,
sd->as.struct_decl.field_count * sizeof(LLVMTypeRef));
for (size_t j = 0; j < sd->as.struct_decl.field_count; j++) {
AstNode* field = sd->as.struct_decl.fields[j];
if (field->as.parameter.type == TYPE_STRUCT &&
field->as.parameter.struct_type_name) {
elem_types[j] = find_struct_type(&ctx,
field->as.parameter.struct_type_name);
} else {
elem_types[j] = to_llvm_type(&ctx, field->as.parameter.type);
}
}
LLVMStructSetBody(llvm_st, elem_types,
(unsigned)sd->as.struct_decl.field_count, false);
}
// 第一遍:声明所有 L 函数
for (size_t i = 0; i < ast->as.program.fn_count; i++) {
AstNode* fn = ast->as.program.functions[i];
@@ -457,7 +569,6 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena,
}
// 验证模块(使用 ReturnStatus 以获取完整错误消息)
// 注: LLVM 22 C API 不再导出 mem2reg pass, alloca 优化需用 opt 工具
char* verify_err = NULL;
if (LLVMVerifyModule(ctx.module, LLVMReturnStatusAction, &verify_err)) {
*error_msg = verify_err ? verify_err : "模块验证失败(错误消息为 NULL";