From 2923e7574dd4217060dd60d34696c8ab39d1de5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Fri, 5 Jun 2026 14:19:01 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=95=B0=E7=BB=84+=E7=B4=A2=E5=BC=95?= =?UTF-8?q?=20[T;N],=20arr[i]=20(P1=20#6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - lexer: TOK_LBRACKET, TOK_RBRACKET - type: TYPE_ARRAY + TypeInfo扩展(element_type/array_size) - ast: AST_INDEX_EXPR, AST_ARRAY_ASSIGN_STMT - parser: parse_type_expr()支持[T;N], Pratt加[索引], 数组元素赋值 - sema: 数组类型检查, 索引必须i64, 元素赋值类型匹配 - codegen: type_info_to_llvm(TYPE_ARRAY), GEP+load/store - 新增集成测试: 18_array.l 测试: 136 通过 (41+15+59+21) --- include/l_lang.h | 2 + src/ast/ast.c | 23 ++++++- src/ast/ast.h | 19 +++++- src/codegen/codegen.c | 142 +++++++++++++++++++++++++++++++++------ src/lexer/lexer.c | 2 + src/lexer/token.c | 1 + src/lexer/token.h | 1 + src/parser/parser.c | 75 ++++++++++++++++++++- src/sema/sema.c | 134 ++++++++++++++++++++++++++++-------- src/sema/symbol.c | 12 ++++ src/sema/symbol.h | 4 ++ test/programs/18_array.l | 10 +++ test/test_codegen.c | 71 +++++++++++++++++++- test/test_sema.c | 74 ++++++++++++++++++++ 14 files changed, 512 insertions(+), 58 deletions(-) create mode 100644 test/programs/18_array.l diff --git a/include/l_lang.h b/include/l_lang.h index 04494c6..2625c9d 100644 --- a/include/l_lang.h +++ b/include/l_lang.h @@ -14,6 +14,7 @@ typedef enum { TYPE_VOID, TYPE_STRUCT, // 结构体类型 TYPE_ENUM, // 枚举类型 + TYPE_ARRAY, // 固定大小数组类型 TYPE_UNKNOWN, // 尚未推断 TYPE_ERROR, // 类型错误 } TypeKind; @@ -27,6 +28,7 @@ static inline const char* type_name(TypeKind kind) { case TYPE_VOID: return "void"; case TYPE_STRUCT: return "struct"; case TYPE_ENUM: return "enum"; + case TYPE_ARRAY: return "array"; default: return ""; } } diff --git a/src/ast/ast.c b/src/ast/ast.c index c189dd1..79a94e5 100644 --- a/src/ast/ast.c +++ b/src/ast/ast.c @@ -6,6 +6,8 @@ AstNode* n = (AstNode*)arena_alloc_impl(alloc, sizeof(AstNode)); \ if (!n) return NULL; \ n->kind = (k); n->type.kind = TYPE_UNKNOWN; n->type.struct_name = NULL; \ + n->type.element_type = 0; n->type.element_struct_name = NULL; \ + n->type.array_size = 0; \ n->loc = loc AstNode* ast_make_program(void* alloc, AstNode** fns, size_t fn_count, @@ -49,12 +51,16 @@ AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, SourceLoc lo } AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, - bool is_mut, AstNode* init, const char* struct_type_name, SourceLoc loc) { + bool is_mut, AstNode* init, const char* struct_type_name, + TypeKind annot_elem_type, const char* annot_elem_struct, int64_t annot_array_size, SourceLoc loc) { NEW(alloc, AST_LET_STMT); n->as.let_stmt.name = name; n->as.let_stmt.annot_type = annot_type; n->as.let_stmt.has_type_annot = has_type_annot; n->as.let_stmt.is_mut = is_mut; n->as.let_stmt.init = init; n->as.let_stmt.struct_type_name = struct_type_name; + n->as.let_stmt.annot_element_type = annot_elem_type; + n->as.let_stmt.annot_element_struct_name = annot_elem_struct; + n->as.let_stmt.annot_array_size = annot_array_size; return n; } @@ -200,3 +206,18 @@ AstNode* ast_make_enum_variant(void* alloc, const char* enum_name, n->as.enum_variant.variant_index = -1; return n; } + +AstNode* ast_make_index_expr(void* alloc, AstNode* array, AstNode* index, SourceLoc loc) { + NEW(alloc, AST_INDEX_EXPR); + n->as.index_expr.array = array; + n->as.index_expr.index = index; + return n; +} + +AstNode* ast_make_array_assign(void* alloc, const char* name, AstNode* index, AstNode* value, SourceLoc loc) { + NEW(alloc, AST_ARRAY_ASSIGN_STMT); + n->as.array_assign.name = name; + n->as.array_assign.index = index; + n->as.array_assign.value = value; + return n; +} diff --git a/src/ast/ast.h b/src/ast/ast.h index 40fada6..76b2d1b 100644 --- a/src/ast/ast.h +++ b/src/ast/ast.h @@ -26,6 +26,8 @@ typedef enum { AST_TYPE_ALIAS, // type Meters = i64 AST_ENUM_DECL, // enum Color { Red, Green, Blue } AST_ENUM_VARIANT, // Color::Red + AST_INDEX_EXPR, // arr[i] + AST_ARRAY_ASSIGN_STMT,// arr[i] = expr } AstKind; typedef enum { @@ -38,7 +40,10 @@ typedef enum { // 类型信息(语义分析阶段填充) typedef struct { TypeKind kind; - const char* struct_name; // TYPE_STRUCT 时的结构体类型名 + const char* struct_name; // TYPE_STRUCT / TYPE_ENUM + TypeKind element_type; // TYPE_ARRAY: 元素类型的 TypeKind + const char* element_struct_name; // TYPE_ARRAY: 元素为 struct 时的类型名 + int64_t array_size; // TYPE_ARRAY: 固定大小 } TypeInfo; // AST 节点 @@ -64,7 +69,8 @@ struct AstNode { struct { struct AstNode** stmts; size_t stmt_count; } block; // AST_LET_STMT struct { const char* name; TypeKind annot_type; bool has_type_annot; bool is_mut; struct AstNode* init; - const char* struct_type_name; } let_stmt; + const char* struct_type_name; + TypeKind annot_element_type; const char* annot_element_struct_name; int64_t annot_array_size; } let_stmt; // AST_ASSIGN_STMT struct { const char* name; struct AstNode* value; } assign_stmt; // AST_IF_STMT @@ -98,6 +104,10 @@ struct AstNode { struct { const char* name; const char** variants; size_t variant_count; } enum_decl; // AST_ENUM_VARIANT struct { const char* enum_name; const char* variant_name; int variant_index; } enum_variant; + // AST_INDEX_EXPR + struct { struct AstNode* array; struct AstNode* index; } index_expr; + // AST_ARRAY_ASSIGN_STMT + struct { const char* name; struct AstNode* index; struct AstNode* value; } array_assign; } as; }; @@ -111,7 +121,8 @@ AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, const char* struct_type_name, SourceLoc loc); AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, SourceLoc loc); AstNode* ast_make_let(void* alloc, const char* name, TypeKind annot_type, bool has_type_annot, - bool is_mut, AstNode* init, const char* struct_type_name, SourceLoc loc); + bool is_mut, AstNode* init, const char* struct_type_name, + TypeKind annot_elem_type, const char* annot_elem_struct, int64_t annot_array_size, SourceLoc loc); AstNode* ast_make_assign(void* alloc, const char* name, AstNode* value, SourceLoc loc); AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, SourceLoc loc); AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, SourceLoc loc); @@ -132,5 +143,7 @@ AstNode* ast_make_type_alias(void* alloc, const char* name, TypeKind aliased, const char* aliased_struct, SourceLoc loc); AstNode* ast_make_enum_decl(void* alloc, const char* name, const char** variants, size_t count, SourceLoc loc); AstNode* ast_make_enum_variant(void* alloc, const char* enum_name, const char* variant_name, SourceLoc loc); +AstNode* ast_make_index_expr(void* alloc, AstNode* array, AstNode* index, SourceLoc loc); +AstNode* ast_make_array_assign(void* alloc, const char* name, AstNode* index, AstNode* value, SourceLoc loc); #endif diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c index fe9f2bb..666e522 100644 --- a/src/codegen/codegen.c +++ b/src/codegen/codegen.c @@ -12,6 +12,7 @@ static int codegen_depth = 0; typedef struct VarEntry { const char* name; LLVMValueRef alloca; + LLVMTypeRef alloca_type; // 分配的类型(GEP 需要) struct VarEntry* next; } VarEntry; @@ -86,10 +87,10 @@ static LLVMValueRef find_var(CgCtx* ctx, const char* name) { return NULL; } -static void add_var(CgCtx* ctx, const char* name, LLVMValueRef alloca) { +static void add_var(CgCtx* ctx, const char* name, LLVMValueRef alloca, LLVMTypeRef alloca_type) { VarEntry* e = arena_alloc(ctx->arena, sizeof(*e)); if (!e) return; - e->name = name; e->alloca = alloca; e->next = ctx->var_table; + e->name = name; e->alloca = alloca; e->alloca_type = alloca_type; e->next = ctx->var_table; ctx->var_table = e; } @@ -126,6 +127,27 @@ static LLVMTypeRef find_struct_type(CgCtx* ctx, const char* name) { return NULL; } +// 从 TypeInfo 生成 LLVM 类型(支持数组、结构体等复合类型) +static LLVMTypeRef type_info_to_llvm(CgCtx* ctx, const TypeInfo* ti) { + switch (ti->kind) { + case TYPE_ARRAY: { + TypeInfo elem = { .kind = ti->element_type, .struct_name = ti->element_struct_name }; + LLVMTypeRef elem_ty = type_info_to_llvm(ctx, &elem); + return LLVMArrayType(elem_ty, (unsigned)ti->array_size); + } + case TYPE_STRUCT: + if (ti->struct_name) { + LLVMTypeRef st = find_struct_type(ctx, ti->struct_name); + if (st) return st; + } + return LLVMVoidTypeInContext(ctx->context); + case TYPE_ENUM: + return LLVMInt64TypeInContext(ctx->context); + default: + return to_llvm_type(ctx, ti->kind); + } +} + // === 向前声明 === static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node); static void codegen_stmt(CgCtx* ctx, AstNode* node); @@ -144,13 +166,7 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { case AST_IDENT_EXPR: { LLVMValueRef ptr = find_var(ctx, node->as.ident.name); if (!ptr) return NULL; - LLVMTypeRef load_ty; - if (node->type.kind == TYPE_STRUCT && node->type.struct_name) { - load_ty = find_struct_type(ctx, node->type.struct_name); - if (!load_ty) load_ty = to_llvm_type(ctx, node->type.kind); - } else { - load_ty = to_llvm_type(ctx, node->type.kind); - } + LLVMTypeRef load_ty = type_info_to_llvm(ctx, &node->type); return LLVMBuildLoad2(ctx->builder, load_ty, ptr, "load"); } @@ -352,6 +368,47 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { return LLVMConstInt(LLVMInt64TypeInContext(ctx->context), (unsigned long long)node->as.enum_variant.variant_index, true); + case AST_INDEX_EXPR: { + // 获取数组变量的指针 + AstNode* arr_node = node->as.index_expr.array; + LLVMValueRef arr_ptr = NULL; + LLVMTypeRef arr_gp_type = NULL; + + if (arr_node->kind == AST_IDENT_EXPR) { + arr_ptr = find_var(ctx, arr_node->as.ident.name); + // 从变量表获取数组类型用于 GEP + for (VarEntry* e = ctx->var_table; e; e = e->next) { + if (strcmp(e->name, arr_node->as.ident.name) == 0) { + arr_gp_type = e->alloca_type; break; + } + } + } + if (!arr_ptr || !arr_gp_type) return NULL; + + // 生成索引值 + LLVMValueRef idx_val = codegen_expr(ctx, node->as.index_expr.index); + if (!idx_val) return NULL; + + // GEP 索引必须是 i32,但 L 使用 i64。截断。 + LLVMValueRef idx_i32 = LLVMBuildTrunc(ctx->builder, idx_val, + LLVMInt32TypeInContext(ctx->context), "idx32"); + + LLVMValueRef indices[] = { + LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false), + idx_i32 + }; + LLVMValueRef elem_ptr = LLVMBuildGEP2(ctx->builder, arr_gp_type, arr_ptr, indices, 2, "arr_elem"); + + LLVMTypeRef elem_load_ty; + if (node->type.kind == TYPE_STRUCT && node->type.struct_name) { + elem_load_ty = find_struct_type(ctx, node->type.struct_name); + if (!elem_load_ty) elem_load_ty = to_llvm_type(ctx, node->type.kind); + } else { + elem_load_ty = type_info_to_llvm(ctx, &node->type); + } + return LLVMBuildLoad2(ctx->builder, elem_load_ty, elem_ptr, "arr_load"); + } + default: return NULL; } @@ -390,22 +447,38 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) { switch (node->kind) { case AST_LET_STMT: { - LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init); - if (!init_val) return; - + // 使用节点的完整类型信息来确定 LLVM 类型 + // 如果 sema 未运行 (node->type.kind == TYPE_UNKNOWN),回退到 init 的类型 LLVMTypeRef var_type; - if (node->as.let_stmt.init->type.kind == TYPE_STRUCT && - node->as.let_stmt.init->type.struct_name) { - var_type = find_struct_type(ctx, node->as.let_stmt.init->type.struct_name); - if (!var_type) var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind); + if (node->type.kind == TYPE_UNKNOWN) { + // 回退到旧行为:使用 init 表达式的类型 + AstNode* init_node = node->as.let_stmt.init; + if (init_node->type.kind == TYPE_STRUCT && init_node->type.struct_name) { + var_type = find_struct_type(ctx, init_node->type.struct_name); + if (!var_type) var_type = to_llvm_type(ctx, init_node->type.kind); + } else { + var_type = to_llvm_type(ctx, init_node->type.kind); + } } else { - var_type = to_llvm_type(ctx, node->as.let_stmt.init->type.kind); + var_type = type_info_to_llvm(ctx, &node->type); } + if (!var_type) return; LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder, var_type, node->as.let_stmt.name); - LLVMBuildStore(ctx->builder, init_val, alloca); - add_var(ctx, node->as.let_stmt.name, alloca); + + // 尝试生成 init 值;数组类型可能 init 失败 (自引用占位符) + LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init); + if (init_val) { + LLVMBuildStore(ctx->builder, init_val, alloca); + } else if (node->type.kind == TYPE_ARRAY) { + // 数组声明: init 失败是预期的 (自引用), 存储零初始化 + LLVMValueRef zero_init = LLVMConstNull(var_type); + LLVMBuildStore(ctx->builder, zero_init, alloca); + } else { + return; + } + add_var(ctx, node->as.let_stmt.name, alloca, var_type); // 自动内存管理: 只追踪 str 堆分配 (拼接/malloc) // struct 是栈上值类型,不能 free();含 str 字段时 v0.5 扩展 @@ -503,6 +576,35 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) { break; } + case AST_ARRAY_ASSIGN_STMT: { + LLVMValueRef arr_ptr = find_var(ctx, node->as.array_assign.name); + if (!arr_ptr) return; + + // 获取数组的 LLVM 类型(从变量表中) + VarEntry* ve = NULL; + for (VarEntry* e = ctx->var_table; e; e = e->next) + if (strcmp(e->name, node->as.array_assign.name) == 0) { ve = e; break; } + + LLVMValueRef idx_val = codegen_expr(ctx, node->as.array_assign.index); + if (!idx_val) return; + + LLVMValueRef val_val = codegen_expr(ctx, node->as.array_assign.value); + if (!val_val) return; + + // i64 → i32 截断 + LLVMValueRef idx_i32 = LLVMBuildTrunc(ctx->builder, idx_val, + LLVMInt32TypeInContext(ctx->context), "idx32"); + + LLVMValueRef indices[] = { + LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false), + idx_i32 + }; + LLVMValueRef elem_ptr = LLVMBuildGEP2(ctx->builder, ve->alloca_type, arr_ptr, indices, 2, "arr_assign_elem"); + + LLVMBuildStore(ctx->builder, val_val, elem_ptr); + break; + } + case AST_WHILE_STMT: { LLVMBasicBlockRef cur_bb = LLVMGetInsertBlock(ctx->builder); LLVMValueRef cur_fn = LLVMGetBasicBlockParent(cur_bb); @@ -659,7 +761,7 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena, LLVMValueRef alloca = LLVMBuildAlloca(ctx.builder, param_ty, pnode->as.parameter.name); LLVMBuildStore(ctx.builder, param, alloca); - add_var(&ctx, pnode->as.parameter.name, alloca); + add_var(&ctx, pnode->as.parameter.name, alloca, param_ty); } codegen_stmt(&ctx, fn->as.function.body); diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 4d493df..7de0302 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -137,6 +137,8 @@ Token* lex(Arena* a, const char* source, const char* filename, else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); } else if (c == '.' && peek_next(&l) == '.') { tokens[idx++] = make_token(&l, TOK_DOT_DOT, l.pos, 2); advance(&l); advance(&l); } else if (c == '.') { tokens[idx++] = make_token(&l, TOK_DOT, l.pos, 1); advance(&l); } + else if (c == '[') { tokens[idx++] = make_token(&l, TOK_LBRACKET, l.pos, 1); advance(&l); } + else if (c == ']') { tokens[idx++] = make_token(&l, TOK_RBRACKET, l.pos, 1); advance(&l); } else if (c == '(') { tokens[idx++] = make_token(&l, TOK_LPAREN, l.pos, 1); advance(&l); } else if (c == ')') { tokens[idx++] = make_token(&l, TOK_RPAREN, l.pos, 1); advance(&l); } else if (c == '{') { tokens[idx++] = make_token(&l, TOK_LBRACE, l.pos, 1); advance(&l); } diff --git a/src/lexer/token.c b/src/lexer/token.c index aac1bb7..93c1571 100644 --- a/src/lexer/token.c +++ b/src/lexer/token.c @@ -21,6 +21,7 @@ static const char* NAMES[] = { [TOK_PLUS_EQ] = "+=", [TOK_MINUS_EQ] = "-=", [TOK_STAR_EQ] = "*=", [TOK_SLASH_EQ] = "/=", [TOK_LPAREN] = "(", [TOK_RPAREN] = ")", [TOK_LBRACE] = "{", [TOK_RBRACE] = "}", + [TOK_LBRACKET] = "[", [TOK_RBRACKET] = "]", [TOK_COMMA] = ",", [TOK_COLON] = ":", [TOK_SEMICOLON] = ";", [TOK_ASSIGN] = "=", [TOK_DOT] = ".", [TOK_COLON_COLON] = "::", diff --git a/src/lexer/token.h b/src/lexer/token.h index 85dd797..6b990a4 100644 --- a/src/lexer/token.h +++ b/src/lexer/token.h @@ -22,6 +22,7 @@ typedef enum { TOK_PLUS_EQ, TOK_MINUS_EQ, TOK_STAR_EQ, TOK_SLASH_EQ, // 分隔符 TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE, + TOK_LBRACKET, TOK_RBRACKET, TOK_COMMA, TOK_COLON, TOK_SEMICOLON, TOK_ASSIGN, // 特殊 TOK_DOT, TOK_COLON_COLON, diff --git a/src/parser/parser.c b/src/parser/parser.c index 7e9e6c1..b4ac6f8 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -231,6 +231,16 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error continue; } + // 后置索引: expr[expr] + if (kind == TOK_LBRACKET) { + const Token* lbrack = advance(p); // 跳过 '[' + AstNode* index = parse_expr(p, error); + if (!index) return NULL; + if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL; + left = ast_make_index_expr(p->arena, left, index, tok_loc(lbrack)); + continue; + } + // 中缀运算符 Precedence prec = tok_to_prec(kind); if (prec <= min_prec) break; @@ -255,9 +265,32 @@ static TypeKind token_to_type(TokenKind k) { default: return TYPE_VOID; } } -// === 类型表达式解析(内置类型/结构体名)=== +// === 类型表达式解析(内置类型/结构体名/数组类型)=== static TypeInfo parse_type_expr(Parser* p, ErrorInfo* error) { const Token* t = peek(p); + + // 数组类型: [element_type; size] + if (t->kind == TOK_LBRACKET) { + advance(p); // 跳过 '[' + TypeInfo elem = parse_type_expr(p, error); + if (elem.kind == TYPE_ERROR) return elem; + if (!expect(p, TOK_SEMICOLON, error, "数组类型中缺少 ';'")) { + TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti; + } + const Token* size_tok = expect(p, TOK_INT_LIT, error, "数组大小必须是整数常量"); + if (!size_tok) { TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti; } + int64_t size = tok_int_value(size_tok); + if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) { + TypeInfo ti = {0}; ti.kind = TYPE_ERROR; return ti; + } + TypeInfo ti = {0}; + ti.kind = TYPE_ARRAY; + ti.element_type = elem.kind; + ti.element_struct_name = elem.struct_name; + ti.array_size = size; + return ti; + } + TypeInfo ti = {0}; if (tok_is_type(t->kind)) { advance(p); @@ -344,11 +377,17 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { TypeKind annot_type = TYPE_UNKNOWN; bool has_type_annot = false; const char* struct_type_name = NULL; + TypeKind annot_elem_type = 0; + const char* annot_elem_struct = NULL; + int64_t annot_arr_size = 0; if (match(p, TOK_COLON)) { TypeInfo ti = parse_type_expr(p, error); if (ti.kind == TYPE_ERROR) return NULL; annot_type = ti.kind; struct_type_name = ti.struct_name; + annot_elem_type = ti.element_type; + annot_elem_struct = ti.element_struct_name; + annot_arr_size = ti.array_size; has_type_annot = true; } if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL; @@ -357,7 +396,8 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; return ast_make_let(p->arena, arena_strdup_impl(p->arena, name->start, name->length), - annot_type, has_type_annot, is_mut, init, struct_type_name, tok_loc(t)); + annot_type, has_type_annot, is_mut, init, struct_type_name, + annot_elem_type, annot_elem_struct, annot_arr_size, tok_loc(t)); } if (t->kind == TOK_IF) { @@ -418,7 +458,7 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { const char* vname = arena_strdup_impl(p->arena, var_name->start, var_name->length); // 构建: let mut i = start; - AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, NULL, tok_loc(var_name)); + AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, NULL, 0, NULL, 0, tok_loc(var_name)); // 构建: i < end (while 条件) AstNode* cond = ast_make_binary(p->arena, OP_LT, @@ -462,6 +502,35 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { return ast_make_return(p->arena, expr, tok_loc(t)); } + // 数组元素赋值: ident[expr] = expr ; + // 需要前瞻: 检查 ']' 后面是否是 '=' (而非 ';' 或其它) + if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_LBRACKET) { + // 向前扫描找到对应的 ']'(不支持嵌套 '[' 在索引中) + int ahead_idx = 2; + int bracket_depth = 1; + while (bracket_depth > 0 && (t + ahead_idx)->kind != TOK_EOF) { + if ((t + ahead_idx)->kind == TOK_LBRACKET) bracket_depth++; + else if ((t + ahead_idx)->kind == TOK_RBRACKET) bracket_depth--; + if (bracket_depth > 0) ahead_idx++; + } + // 检查 ']' 后是否是 '=' + if ((t + ahead_idx + 1)->kind == TOK_ASSIGN) { + const Token* name = advance(p); // 消费标识符 + advance(p); // 消费 '[' + AstNode* index = parse_expr(p, error); + if (!index) return NULL; + if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL; + if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL; + AstNode* value = parse_expr(p, error); + if (!value) return NULL; + if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; + return ast_make_array_assign(p->arena, + arena_strdup_impl(p->arena, name->start, name->length), + index, value, tok_loc(name)); + } + // 否则: 不是数组赋值, 回退到下方表达式语句处理 + } + // 赋值语句: ident = expr ; if (t->kind == TOK_IDENT && (t + 1)->kind == TOK_ASSIGN) { const Token* name = advance(p); // 消费标识符 diff --git a/src/sema/sema.c b/src/sema/sema.c index 3f8d631..d976b90 100644 --- a/src/sema/sema.c +++ b/src/sema/sema.c @@ -51,6 +51,11 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* if (sym->type == TYPE_STRUCT && sym->struct_type_name) { node->type.struct_name = sym->struct_type_name; } + if (sym->type == TYPE_ARRAY) { + node->type.element_type = sym->array_element_type; + node->type.element_struct_name = sym->array_element_struct_name; + node->type.array_size = sym->array_size; + } } break; } @@ -316,6 +321,30 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* break; } + case AST_INDEX_EXPR: { + analyze_expr(node->as.index_expr.array, scope, errors, a); + analyze_expr(node->as.index_expr.index, scope, errors, a); + AstNode* arr = node->as.index_expr.array; + AstNode* idx = node->as.index_expr.index; + + if (arr->type.kind == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; } + if (arr->type.kind != TYPE_ARRAY) { + error_add(errors, "", node->loc.line, node->loc.col, + "类型 '%s' 不支持索引操作", type_name(arr->type.kind)); + node->type.kind = TYPE_ERROR; break; + } + if (idx->type.kind == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; } + if (idx->type.kind != TYPE_I64) { + error_add(errors, "", node->loc.line, node->loc.col, + "数组索引必须是 i64 类型, 得到 '%s'", type_name(idx->type.kind)); + node->type.kind = TYPE_ERROR; break; + } + // 结果类型 = 元素类型 + node->type.kind = arr->type.element_type; + node->type.struct_name = arr->type.element_struct_name; + break; + } + default: break; } } @@ -450,41 +479,50 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* break; case AST_LET_STMT: { - analyze_expr(node->as.let_stmt.init, scope, errors, a); - TypeKind inferred = node->as.let_stmt.init->type.kind; TypeKind var_type; const char* var_struct_name = NULL; + bool is_array_type = false; if (node->as.let_stmt.has_type_annot) { - const char* annot_struct = node->as.let_stmt.struct_type_name; - if (annot_struct) { - // 先检查是否是类型别名 - Symbol* alias_sym = scope_lookup(scope, annot_struct); - if (alias_sym && alias_sym->is_type_alias) { - var_type = alias_sym->type; - var_struct_name = alias_sym->struct_type_name; - } else { - // struct 类型标注 - Symbol* st_sym = scope_lookup_struct(scope, annot_struct); - if (!st_sym) { - error_add(errors, "", node->loc.line, node->loc.col, - "未定义的类型 '%s'", annot_struct); - break; - } - var_type = TYPE_STRUCT; - var_struct_name = annot_struct; - } + if (node->as.let_stmt.annot_type == TYPE_ARRAY) { + // 数组类型标注: 跳过 init 分析 (init 是自引用的占位符) + is_array_type = true; + var_type = TYPE_ARRAY; } else { - var_type = node->as.let_stmt.annot_type; - } - if (inferred != TYPE_ERROR && inferred != var_type) { - error_add(errors, "", node->loc.line, node->loc.col, - "变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'", - node->as.let_stmt.name, - annot_struct ? annot_struct : type_name(var_type), - type_name(inferred)); + analyze_expr(node->as.let_stmt.init, scope, errors, a); + TypeKind inferred = node->as.let_stmt.init->type.kind; + const char* annot_struct = node->as.let_stmt.struct_type_name; + if (annot_struct) { + // 先检查是否是类型别名 + Symbol* alias_sym = scope_lookup(scope, annot_struct); + if (alias_sym && alias_sym->is_type_alias) { + var_type = alias_sym->type; + var_struct_name = alias_sym->struct_type_name; + } else { + // struct 类型标注 + Symbol* st_sym = scope_lookup_struct(scope, annot_struct); + if (!st_sym) { + error_add(errors, "", node->loc.line, node->loc.col, + "未定义的类型 '%s'", annot_struct); + break; + } + var_type = TYPE_STRUCT; + var_struct_name = annot_struct; + } + } else { + var_type = node->as.let_stmt.annot_type; + } + if (inferred != TYPE_ERROR && inferred != var_type) { + error_add(errors, "", node->loc.line, node->loc.col, + "变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'", + node->as.let_stmt.name, + annot_struct ? annot_struct : type_name(var_type), + type_name(inferred)); + } } } else { + analyze_expr(node->as.let_stmt.init, scope, errors, a); + TypeKind inferred = node->as.let_stmt.init->type.kind; // 类型推断 if (inferred == TYPE_ERROR || inferred == TYPE_VOID) { error_add(errors, "", node->loc.line, node->loc.col, @@ -499,6 +537,11 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* node->type.kind = var_type; node->type.struct_name = var_struct_name; + if (is_array_type) { + node->type.element_type = node->as.let_stmt.annot_element_type; + node->type.element_struct_name = node->as.let_stmt.annot_element_struct_name; + node->type.array_size = node->as.let_stmt.annot_array_size; + } Symbol* sym = scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, var_type); if (!sym) { error_add(errors, "", node->loc.line, node->loc.col, @@ -509,6 +552,11 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* sym->type = TYPE_STRUCT; sym->struct_type_name = var_struct_name; } + if (is_array_type) { + sym->array_element_type = node->type.element_type; + sym->array_element_struct_name = node->type.element_struct_name; + sym->array_size = node->type.array_size; + } } break; } @@ -545,6 +593,36 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* break; } + case AST_ARRAY_ASSIGN_STMT: { + Symbol* sym = scope_lookup(scope, node->as.array_assign.name); + if (!sym) { + error_add(errors, "", node->loc.line, node->loc.col, + "未定义的变量 '%s'", node->as.array_assign.name); + node->type.kind = TYPE_ERROR; break; + } + if (sym->type != TYPE_ARRAY) { + error_add(errors, "", node->loc.line, node->loc.col, + "'%s' 不是数组类型,不能使用索引赋值", node->as.array_assign.name); + node->type.kind = TYPE_ERROR; break; + } + analyze_expr(node->as.array_assign.index, scope, errors, a); + analyze_expr(node->as.array_assign.value, scope, errors, a); + AstNode* idx = node->as.array_assign.index; + AstNode* val = node->as.array_assign.value; + if (idx->type.kind != TYPE_ERROR && idx->type.kind != TYPE_I64) { + error_add(errors, "", node->loc.line, node->loc.col, + "数组索引必须是 i64 类型, 得到 '%s'", type_name(idx->type.kind)); + } + TypeKind elem_kind = sym->type == TYPE_ARRAY ? TYPE_I64 : sym->type; + if (val->type.kind != TYPE_ERROR && val->type.kind != elem_kind) { + error_add(errors, "", node->loc.line, node->loc.col, + "数组元素类型不匹配: 期望 '%s',得到 '%s'", + type_name(elem_kind), type_name(val->type.kind)); + } + node->type.kind = TYPE_VOID; + break; + } + case AST_IF_STMT: analyze_expr(node->as.if_stmt.cond, scope, errors, a); if (node->as.if_stmt.cond->type.kind != TYPE_BOOL && diff --git a/src/sema/symbol.c b/src/sema/symbol.c index 90469c3..50b4759 100644 --- a/src/sema/symbol.c +++ b/src/sema/symbol.c @@ -35,6 +35,9 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name, sym->struct_field_types = NULL; sym->struct_field_count = 0; sym->struct_type_name = NULL; + sym->array_element_type = 0; + sym->array_element_struct_name = NULL; + sym->array_size = 0; sym->is_type_alias = false; sym->next = scope->head; scope->head = sym; @@ -61,6 +64,9 @@ Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, sym->struct_field_types = NULL; sym->struct_field_count = 0; sym->struct_type_name = NULL; + sym->array_element_type = 0; + sym->array_element_struct_name = NULL; + sym->array_size = 0; sym->is_type_alias = false; sym->next = scope->head; scope->head = sym; @@ -85,6 +91,9 @@ Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name, sym->struct_field_struct_names = fstruct_names; sym->struct_field_count = fc; sym->struct_type_name = NULL; + sym->array_element_type = 0; + sym->array_element_struct_name = NULL; + sym->array_size = 0; sym->is_type_alias = false; sym->next = scope->head; scope->head = sym; @@ -127,6 +136,9 @@ Symbol* scope_insert_enum(Scope* scope, void* alloc, const char* name, sym->struct_field_struct_names = NULL; sym->struct_field_count = vc; sym->struct_type_name = NULL; + sym->array_element_type = 0; + sym->array_element_struct_name = NULL; + sym->array_size = 0; sym->is_type_alias = false; sym->next = scope->head; scope->head = sym; diff --git a/src/sema/symbol.h b/src/sema/symbol.h index 11ae22c..4777274 100644 --- a/src/sema/symbol.h +++ b/src/sema/symbol.h @@ -24,6 +24,10 @@ typedef struct Symbol { size_t struct_field_count; // 变量引用结构体类型时,记录具体类型名 const char* struct_type_name; + // 数组变量特有(TYPE_ARRAY) + TypeKind array_element_type; + const char* array_element_struct_name; + int64_t array_size; // 类型别名标记 bool is_type_alias; // 链表(同一作用域内的下一个符号) diff --git a/test/programs/18_array.l b/test/programs/18_array.l new file mode 100644 index 0000000..4a9d2f3 --- /dev/null +++ b/test/programs/18_array.l @@ -0,0 +1,10 @@ +fn main() -> i64 { + let arr: [i64; 3] = arr; + arr[0] = 10; + arr[1] = 20; + arr[2] = 30; + print_i64(arr[0]); + print_i64(arr[1]); + print_i64(arr[2]); + return 0; +} diff --git a/test/test_codegen.c b/test/test_codegen.c index 354af46..3b16466 100644 --- a/test/test_codegen.c +++ b/test/test_codegen.c @@ -145,7 +145,7 @@ void test_codegen_struct_decl() { init->type.struct_name = "Point"; AstNode* let_stmt = ast_make_let(&a, "p", TYPE_UNKNOWN, false, false, - init, NULL, loc_at(1, 1)); + init, NULL, 0, NULL, 0, loc_at(1, 1)); /* return p.x; */ AstNode* p_ident = ast_make_ident(&a, "p", loc_at(1, 1)); @@ -200,7 +200,7 @@ void test_codegen_struct_field_access() { init->type.struct_name = "Point"; AstNode* let_stmt = ast_make_let(&a, "p", TYPE_UNKNOWN, false, false, - init, NULL, loc_at(1, 1)); + init, NULL, 0, NULL, 0, loc_at(1, 1)); /* return p.y; */ AstNode* p_ident = ast_make_ident(&a, "p", loc_at(1, 1)); @@ -251,7 +251,7 @@ void test_codegen_enum() { cv->type.kind = TYPE_ENUM; AstNode* let_stmt = ast_make_let(&a, "c", TYPE_UNKNOWN, false, false, - cv, NULL, loc_at(1, 1)); + cv, NULL, 0, NULL, 0, loc_at(1, 1)); /* print_i64(c); */ AstNode* c_ident = ast_make_ident(&a, "c", loc_at(1, 1)); @@ -284,6 +284,70 @@ void test_codegen_enum() { arena_destroy(&a); } +/* === 数组代码生成测试 === */ + +void test_codegen_array() { + Arena a = arena_create(1); + + /* 构造 AST: + fn main() -> i64 { + let arr: [i64; 3] = arr; + arr[0] = 10; + print_i64(arr[0]); + return 0; + } + */ + // let arr: [i64; 3] = arr; + AstNode* arr_init = ast_make_ident(&a, "arr", loc_at(1, 1)); + AstNode* let_stmt = ast_make_let(&a, "arr", TYPE_ARRAY, true, false, + arr_init, NULL, TYPE_I64, NULL, 3, loc_at(1, 1)); + // 手动设置 LET_STMT 的类型(绕过 sema) + let_stmt->type.kind = TYPE_ARRAY; + let_stmt->type.element_type = TYPE_I64; + let_stmt->type.array_size = 3; + + // arr[0] = 10; + AstNode* arr_assign = ast_make_array_assign(&a, "arr", + ast_make_literal_i64(&a, 0, loc_at(1, 1)), + ast_make_literal_i64(&a, 10, loc_at(1, 1)), loc_at(1, 1)); + + // arr[0] 表达式(print_i64 的参数) + AstNode* arr_ident = ast_make_ident(&a, "arr", loc_at(1, 1)); + arr_ident->type.kind = TYPE_ARRAY; + arr_ident->type.element_type = TYPE_I64; + arr_ident->type.array_size = 3; + AstNode* idx_expr = ast_make_index_expr(&a, arr_ident, + ast_make_literal_i64(&a, 0, loc_at(1, 1)), loc_at(1, 1)); + idx_expr->type.kind = TYPE_I64; // 元素类型 + + // print_i64(arr[0]); + AstNode* args[] = { idx_expr }; + AstNode* print_call = ast_make_call(&a, "print_i64", args, 1, loc_at(1, 1)); + + // return 0; + AstNode* ret = ast_make_return(&a, ast_make_literal_i64(&a, 0, loc_at(1, 1)), loc_at(1, 1)); + + AstNode* stmts[] = { let_stmt, arr_assign, print_call, ret }; + AstNode* body = ast_make_block(&a, stmts, 4, loc_at(1, 1)); + AstNode* fn = ast_make_function(&a, "main", NULL, 0, TYPE_I64, NULL, body, loc_at(1, 1)); + AstNode* fns[] = { fn }; + AstNode* prog = ast_make_program(&a, fns, 1, NULL, 0, NULL, 0, NULL, 0, loc_at(1, 1)); + + const char* err = NULL; + LLVMContextRef ctx = NULL; + LLVMModuleRef mod = codegen_module(prog, &a, "test_array", &err, &ctx); + ASSERT(mod != NULL); + ASSERT(err == NULL); + + char* verify_err = NULL; + int failed = LLVMVerifyModule(mod, LLVMReturnStatusAction, &verify_err); + ASSERT(!failed); + + LLVMDisposeModule(mod); + LLVMContextDispose(ctx); + arena_destroy(&a); +} + int main(void) { TEST_RUN(test_codegen_simple_function); TEST_RUN(test_codegen_if_else); @@ -292,5 +356,6 @@ int main(void) { TEST_RUN(test_codegen_struct_decl); TEST_RUN(test_codegen_struct_field_access); TEST_RUN(test_codegen_enum); + TEST_RUN(test_codegen_array); return test_summary(); } diff --git a/test/test_sema.c b/test/test_sema.c index 6b90a52..7d1f78b 100644 --- a/test/test_sema.c +++ b/test/test_sema.c @@ -259,6 +259,76 @@ void test_enum_bad_variant() { arena_destroy(&a); } +/* === 数组语义分析测试 === */ + +void test_array_ok() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, + "fn main() { let arr: [i64; 3] = arr; arr[0]; return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count == 0); + arena_destroy(&a); +} + +void test_array_index_type_error() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, + "fn main() { let arr: [i64; 3] = arr; arr[true]; return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count > 0); // true 不是 i64 + arena_destroy(&a); +} + +void test_array_not_indexable() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, + "fn main() { let x: i64 = 0; x[0]; return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count > 0); // i64 不是数组 + arena_destroy(&a); +} + +void test_array_assign_ok() { + Arena a = arena_create(1); + size_t tc; ErrorInfo lex_err = {0}; + Token* toks = lex(&a, + "fn main() { let arr: [i64; 3] = arr; arr[0] = 42; return; }", + "test", &tc, &lex_err); + ASSERT(toks != NULL); + ErrorInfo parse_err = {0}; + AstNode* ast = parse(&a, toks, tc, "test", &parse_err); + ASSERT(ast != NULL); + + ErrorList errors; error_init(&errors); + sema_analyze(ast, &errors, &a); + ASSERT(errors.count == 0); + arena_destroy(&a); +} + int main(void) { TEST_RUN(test_type_error); TEST_RUN(test_undefined_var); @@ -275,5 +345,9 @@ int main(void) { TEST_RUN(test_type_alias_struct); TEST_RUN(test_enum_ok); TEST_RUN(test_enum_bad_variant); + TEST_RUN(test_array_ok); + TEST_RUN(test_array_index_type_error); + TEST_RUN(test_array_not_indexable); + TEST_RUN(test_array_assign_ok); return test_summary(); }