feat: 结构体 struct — 最后一项 P0 功能
- lexer: TOK_STRUCT, TOK_DOT 关键字和运算符
- ast: AST_STRUCT_DECL/STRUCT_INIT/FIELD_ACCESS 3 种新节点
- parser: struct 声明 + .field 访问 + Name{field:val} 初始化
- sema: struct 类型符号表,字段类型解析,初始化字段检查
- codegen: LLVMStructType + extractvalue/insertvalue 字段操作
- 新增集成测试: 12_struct.l, 13_struct_nested.l
- 基于 Codex 分析报告 P0 #4
所有 P0 功能已全部完成。
This commit is contained in:
+114
-3
@@ -20,6 +20,14 @@ typedef struct FnEntry {
|
||||
struct FnEntry* next;
|
||||
} FnEntry;
|
||||
|
||||
// 结构体类型映射
|
||||
typedef struct StructTypeEntry {
|
||||
const char* name;
|
||||
LLVMTypeRef llvm_type;
|
||||
size_t field_count;
|
||||
struct StructTypeEntry* next;
|
||||
} StructTypeEntry;
|
||||
|
||||
typedef struct {
|
||||
Arena* arena; // 代码生成阶段分配器
|
||||
LLVMContextRef context; // LLVM 19+ 需要显式 Context
|
||||
@@ -28,6 +36,7 @@ typedef struct {
|
||||
VarEntry* var_table;
|
||||
const char* error;
|
||||
FnEntry* fn_table;
|
||||
StructTypeEntry* struct_table;
|
||||
// printf 运行时支持(内置 print 函数委托给 printf)
|
||||
LLVMValueRef printf_fn;
|
||||
LLVMTypeRef printf_ty;
|
||||
@@ -89,6 +98,21 @@ static void add_fn(CgCtx* ctx, const char* name, LLVMValueRef fn) {
|
||||
ctx->fn_table = e;
|
||||
}
|
||||
|
||||
// === 结构体类型表 ===
|
||||
static void add_struct_type(CgCtx* ctx, const char* name, LLVMTypeRef ty, size_t fc) {
|
||||
StructTypeEntry* e = arena_alloc(ctx->arena, sizeof(*e));
|
||||
if (!e) return;
|
||||
e->name = name; e->llvm_type = ty; e->field_count = fc;
|
||||
e->next = ctx->struct_table;
|
||||
ctx->struct_table = e;
|
||||
}
|
||||
|
||||
static LLVMTypeRef find_struct_type(CgCtx* ctx, const char* name) {
|
||||
for (StructTypeEntry* e = ctx->struct_table; e; e = e->next)
|
||||
if (strcmp(e->name, name) == 0) return e->llvm_type;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// === 向前声明 ===
|
||||
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node);
|
||||
static void codegen_stmt(CgCtx* ctx, AstNode* node);
|
||||
@@ -107,7 +131,14 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
|
||||
case AST_IDENT_EXPR: {
|
||||
LLVMValueRef ptr = find_var(ctx, node->as.ident.name);
|
||||
if (!ptr) return NULL;
|
||||
return LLVMBuildLoad2(ctx->builder, to_llvm_type(ctx, node->type.kind), ptr, "load");
|
||||
LLVMTypeRef load_ty;
|
||||
if (node->type.kind == TYPE_STRUCT && node->type.struct_name) {
|
||||
load_ty = find_struct_type(ctx, node->type.struct_name);
|
||||
if (!load_ty) load_ty = to_llvm_type(ctx, node->type.kind);
|
||||
} else {
|
||||
load_ty = to_llvm_type(ctx, node->type.kind);
|
||||
}
|
||||
return LLVMBuildLoad2(ctx->builder, load_ty, ptr, "load");
|
||||
}
|
||||
|
||||
case AST_UNARY_EXPR: {
|
||||
@@ -260,6 +291,50 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
|
||||
ret_ty == LLVMVoidTypeInContext(ctx->context) ? "" : "call");
|
||||
}
|
||||
|
||||
// === 结构体字段访问: p.x ===
|
||||
case AST_FIELD_ACCESS: {
|
||||
// 对对象求值(返回的是 struct 值)
|
||||
LLVMValueRef struct_val = codegen_expr(ctx, node->as.field_access.object);
|
||||
if (!struct_val) return NULL;
|
||||
|
||||
int field_idx = node->as.field_access.field_index;
|
||||
if (field_idx < 0) return NULL; // sema 应当已经设置
|
||||
|
||||
// 用 extractvalue 从结构体值中提取字段
|
||||
return LLVMBuildExtractValue(ctx->builder, struct_val,
|
||||
(unsigned)field_idx, node->as.field_access.field);
|
||||
}
|
||||
|
||||
// === 结构体初始化: Point { x: 10, y: 20 } ===
|
||||
case AST_STRUCT_INIT: {
|
||||
const char* st_name = node->as.struct_init.type_name;
|
||||
LLVMTypeRef struct_ty = find_struct_type(ctx, st_name);
|
||||
if (!struct_ty) return NULL;
|
||||
|
||||
// alloca 分配结构体空间
|
||||
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder, struct_ty, "struct_init");
|
||||
|
||||
// 获取结构体字段名列表(从 struct_table 或从 AST 中)
|
||||
// 对每个 init 字段,找到它在结构体中的索引并 store
|
||||
for (size_t i = 0; i < node->as.struct_init.field_count; i++) {
|
||||
AstNode* fval = node->as.struct_init.field_values[i];
|
||||
LLVMValueRef val = codegen_expr(ctx, fval);
|
||||
if (!val) return NULL;
|
||||
|
||||
// 获取字段指针: GEP struct_ty, alloca, 0, i
|
||||
LLVMValueRef indices[] = {
|
||||
LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false),
|
||||
LLVMConstInt(LLVMInt32TypeInContext(ctx->context), (unsigned long long)i, false)
|
||||
};
|
||||
LLVMValueRef field_ptr = LLVMBuildGEP2(ctx->builder, struct_ty, alloca,
|
||||
indices, 2, "field_ptr");
|
||||
LLVMBuildStore(ctx->builder, val, field_ptr);
|
||||
}
|
||||
|
||||
// 加载整个结构体值
|
||||
return LLVMBuildLoad2(ctx->builder, struct_ty, alloca, "struct_val");
|
||||
}
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@@ -273,8 +348,18 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) {
|
||||
case AST_LET_STMT: {
|
||||
LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init);
|
||||
if (!init_val) return;
|
||||
|
||||
LLVMTypeRef var_type;
|
||||
if (node->as.let_stmt.init->type.kind == TYPE_STRUCT &&
|
||||
node->as.let_stmt.init->type.struct_name) {
|
||||
var_type = find_struct_type(ctx, node->as.let_stmt.init->type.struct_name);
|
||||
if (!var_type) var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind);
|
||||
} else {
|
||||
var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind);
|
||||
}
|
||||
|
||||
LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder,
|
||||
to_llvm_type(ctx, node->as.let_stmt.init->type.kind), node->as.let_stmt.name);
|
||||
var_type, node->as.let_stmt.name);
|
||||
LLVMBuildStore(ctx->builder, init_val, alloca);
|
||||
add_var(ctx, node->as.let_stmt.name, alloca);
|
||||
break;
|
||||
@@ -410,6 +495,33 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena,
|
||||
memcpy_args, 3, false);
|
||||
ctx.memcpy_fn = LLVMAddFunction(ctx.module, "memcpy", memcpy_ty);
|
||||
|
||||
// 第零遍:先创建所有命名结构体(占位符,未设置 body)
|
||||
for (size_t i = 0; i < ast->as.program.struct_count; i++) {
|
||||
AstNode* sd = ast->as.program.structs[i];
|
||||
LLVMTypeRef llvm_st = LLVMStructCreateNamed(ctx.context, sd->as.struct_decl.name);
|
||||
add_struct_type(&ctx, sd->as.struct_decl.name, llvm_st,
|
||||
sd->as.struct_decl.field_count);
|
||||
}
|
||||
// 然后设置所有结构体的 body(此时所有结构体类型已注册,可互相引用)
|
||||
for (size_t i = 0; i < ast->as.program.struct_count; i++) {
|
||||
AstNode* sd = ast->as.program.structs[i];
|
||||
LLVMTypeRef llvm_st = find_struct_type(&ctx, sd->as.struct_decl.name);
|
||||
LLVMTypeRef* elem_types = arena_alloc(ctx.arena,
|
||||
sd->as.struct_decl.field_count * sizeof(LLVMTypeRef));
|
||||
for (size_t j = 0; j < sd->as.struct_decl.field_count; j++) {
|
||||
AstNode* field = sd->as.struct_decl.fields[j];
|
||||
if (field->as.parameter.type == TYPE_STRUCT &&
|
||||
field->as.parameter.struct_type_name) {
|
||||
elem_types[j] = find_struct_type(&ctx,
|
||||
field->as.parameter.struct_type_name);
|
||||
} else {
|
||||
elem_types[j] = to_llvm_type(&ctx, field->as.parameter.type);
|
||||
}
|
||||
}
|
||||
LLVMStructSetBody(llvm_st, elem_types,
|
||||
(unsigned)sd->as.struct_decl.field_count, false);
|
||||
}
|
||||
|
||||
// 第一遍:声明所有 L 函数
|
||||
for (size_t i = 0; i < ast->as.program.fn_count; i++) {
|
||||
AstNode* fn = ast->as.program.functions[i];
|
||||
@@ -457,7 +569,6 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena,
|
||||
}
|
||||
|
||||
// 验证模块(使用 ReturnStatus 以获取完整错误消息)
|
||||
// 注: LLVM 22 C API 不再导出 mem2reg pass, alloca 优化需用 opt 工具
|
||||
char* verify_err = NULL;
|
||||
if (LLVMVerifyModule(ctx.module, LLVMReturnStatusAction, &verify_err)) {
|
||||
*error_msg = verify_err ? verify_err : "模块验证失败(错误消息为 NULL)";
|
||||
|
||||
Reference in New Issue
Block a user