From 443b22bdf17a6d2b4dbae6d8f6e261c4371b874b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Sun, 7 Jun 2026 13:58:54 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=97=E8=A1=A8=E6=8E=A8=E5=AF=BC?= =?UTF-8?q?=E5=BC=8F=20[for=20x=20in=20arr:=20expr]=20=E2=80=94=20parser+s?= =?UTF-8?q?ema+codegen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AST(29): +AST_LIST_COMP, parser 解析 [for var in expr: body] sema: 创建子作用域注册循环变量, codegen: for 循环绑定+填充结果数组 已知限制: 仅支持 2 元素及以下数组 (大数组 alloca 对齐问题待修) Co-Authored-By: Claude Opus 4.7 --- src/ast/ast.c | 8 +++++ src/ast/ast.h | 4 +++ src/codegen/cg_expr.c | 69 ++++++++++++++++++++++++++++++++++++ src/parser/expr.c | 15 ++++++++ src/sema/typeck.c | 22 ++++++++++++ test/programs/39_list_comp.l | 10 ++++++ 6 files changed, 128 insertions(+) create mode 100644 test/programs/39_list_comp.l diff --git a/src/ast/ast.c b/src/ast/ast.c index ae2ece6..18d8db6 100644 --- a/src/ast/ast.c +++ b/src/ast/ast.c @@ -109,6 +109,14 @@ AstNode* ast_make_defer_stmt(void* alloc, AstNode* body, SourceLoc loc) { return n; } +AstNode* ast_make_list_comp(void* alloc, const char* var, AstNode* arr, AstNode* map, SourceLoc loc) { + NEW(alloc, AST_LIST_COMP); + n->as.list_comp.var_name = var; + n->as.list_comp.array = arr; + n->as.list_comp.map_expr = map; + return n; +} + AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, SourceLoc loc) { NEW(alloc, AST_BINARY_EXPR); n->as.binary.op = op; n->as.binary.left = left; n->as.binary.right = right; diff --git a/src/ast/ast.h b/src/ast/ast.h index 269bb15..5e2422e 100644 --- a/src/ast/ast.h +++ b/src/ast/ast.h @@ -16,6 +16,7 @@ typedef enum { AST_RETURN_STMT, AST_EXPR_STMT, AST_DEFER_STMT, + AST_LIST_COMP, // [for x in expr: body] AST_BINARY_EXPR, AST_UNARY_EXPR, AST_CALL_EXPR, @@ -91,6 +92,8 @@ struct AstNode { struct { struct AstNode* expr; } expr_stmt; // AST_DEFER_STMT struct { struct AstNode* body; } defer_stmt; + // AST_LIST_COMP + struct { const char* var_name; struct AstNode* array; struct AstNode* map_expr; } list_comp; // AST_BINARY_EXPR struct { BinaryOp op; struct AstNode* left; struct AstNode* right; } binary; // AST_UNARY_EXPR @@ -157,6 +160,7 @@ AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, SourceLoc loc AstNode* ast_make_return(void* alloc, AstNode* expr, SourceLoc loc); AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, SourceLoc loc); AstNode* ast_make_defer_stmt(void* alloc, AstNode* expr, SourceLoc loc); +AstNode* ast_make_list_comp(void* alloc, const char* var, AstNode* arr, AstNode* map, SourceLoc loc); AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, SourceLoc loc); AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, SourceLoc loc); AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, const char** arg_names, size_t count, SourceLoc loc); diff --git a/src/codegen/cg_expr.c b/src/codegen/cg_expr.c index 4a4204f..e19c31f 100644 --- a/src/codegen/cg_expr.c +++ b/src/codegen/cg_expr.c @@ -403,6 +403,74 @@ CG_HANDLER(cg_if_expr) // === Visitor Dispatch 表 === static AstDispatch cg_dispatch; +static LLVMValueRef cg_list_comp_impl(CgCtx* ctx, AstNode* node) { + TypeInfo* ti = &node->type; + LLVMTypeRef elem_ty = type_info_to_llvm(ctx, ti); + LLVMTypeRef arr_ty = LLVMArrayType(elem_ty, (unsigned)ti->array_size); + LLVMValueRef result = LLVMBuildAlloca(ctx->builder, arr_ty, "list"); + // 初始化为零 + LLVMBuildStore(ctx->builder, LLVMConstNull(arr_ty), result); + // 获取源数组指针 (需要 alloca 做 GEP, 不能用 load 后的值) + LLVMValueRef src_ptr = NULL; + if (node->as.list_comp.array->kind == AST_IDENT_EXPR) { + src_ptr = find_var(ctx, node->as.list_comp.array->as.ident.name); + } + if (!src_ptr) return NULL; + // for i in 0 to N + LLVMValueRef func = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)); + LLVMBasicBlockRef cond_bb = LLVMAppendBasicBlockInContext(ctx->context, func, "lc_cond"); + LLVMBasicBlockRef body_bb = LLVMAppendBasicBlockInContext(ctx->context, func, "lc_body"); + LLVMBasicBlockRef exit_bb = LLVMAppendBasicBlockInContext(ctx->context, func, "lc_exit"); + // 循环变量 i + LLVMValueRef i_alloca = LLVMBuildAlloca(ctx->builder, + LLVMInt64TypeInContext(ctx->context), "lc_i"); + LLVMBuildStore(ctx->builder, + LLVMConstInt(LLVMInt64TypeInContext(ctx->context), 0, false), i_alloca); + LLVMBuildBr(ctx->builder, cond_bb); + // 条件块 + LLVMPositionBuilderAtEnd(ctx->builder, cond_bb); + LLVMValueRef i_val = LLVMBuildLoad2(ctx->builder, + LLVMInt64TypeInContext(ctx->context), i_alloca, "i"); + LLVMValueRef end_val = LLVMConstInt(LLVMInt64TypeInContext(ctx->context), + (unsigned long long)ti->array_size, false); + LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntSLT, i_val, end_val, "lc_cond"); + LLVMBuildCondBr(ctx->builder, cond, body_bb, exit_bb); + // 循环体 + LLVMPositionBuilderAtEnd(ctx->builder, body_bb); + LLVMValueRef i32_trunc = LLVMBuildTrunc(ctx->builder, i_val, + LLVMInt32TypeInContext(ctx->context), "i32"); + // 绑定循环变量: var x = arr[i] + LLVMValueRef src_indices[] = { + LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false), i32_trunc + }; + LLVMValueRef src_gep = LLVMBuildGEP2(ctx->builder, arr_ty, src_ptr, src_indices, 2, "src_gep"); + LLVMValueRef var_alloca = LLVMBuildAlloca(ctx->builder, elem_ty, + node->as.list_comp.var_name); + LLVMBuildStore(ctx->builder, + LLVMBuildLoad2(ctx->builder, elem_ty, src_gep, "src_val"), var_alloca); + add_var(ctx, node->as.list_comp.var_name, var_alloca, elem_ty); + // map 表达式(可引用循环变量) + LLVMValueRef map_val = codegen_expr(ctx, node->as.list_comp.map_expr); + if (!map_val) return NULL; + // 结果数组: GEP + store + LLVMValueRef dst_indices[] = { + LLVMConstInt(LLVMInt32TypeInContext(ctx->context), 0, false), + LLVMBuildTrunc(ctx->builder, i_val, + LLVMInt32TypeInContext(ctx->context), "i32") + }; + LLVMBuildStore(ctx->builder, map_val, + LLVMBuildGEP2(ctx->builder, arr_ty, result, dst_indices, 2, "dst_ptr")); + // i = i + 1 + LLVMValueRef next_i = LLVMBuildAdd(ctx->builder, i_val, + LLVMConstInt(LLVMInt64TypeInContext(ctx->context), 1, false), "i_inc"); + LLVMBuildStore(ctx->builder, next_i, i_alloca); + LLVMBuildBr(ctx->builder, cond_bb); + // 出口 + LLVMPositionBuilderAtEnd(ctx->builder, exit_bb); + return LLVMBuildLoad2(ctx->builder, arr_ty, result, "list_val"); +} +CG_HANDLER(cg_list_comp) + void codegen_expr_init(void) { ast_dispatch_set(&cg_dispatch, AST_LITERAL_EXPR, cg_literal); ast_dispatch_set(&cg_dispatch, AST_IDENT_EXPR, cg_ident); @@ -416,6 +484,7 @@ void codegen_expr_init(void) { ast_dispatch_set(&cg_dispatch, AST_INDEX_EXPR, cg_index); ast_dispatch_set(&cg_dispatch, AST_BLOCK, cg_block); ast_dispatch_set(&cg_dispatch, AST_IF_STMT, cg_if_expr); + ast_dispatch_set(&cg_dispatch, AST_LIST_COMP, cg_list_comp); } // === 统一入口 === diff --git a/src/parser/expr.c b/src/parser/expr.c index c896ff0..22a0ae0 100644 --- a/src/parser/expr.c +++ b/src/parser/expr.c @@ -306,6 +306,21 @@ AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { } left = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); } + } else if (tok->kind == TOK_LBRACKET && (tok + 1)->kind == TOK_FOR) { + // 列表推导式: [for var in expr: body] + advance(p); advance(p); // 跳过 '[' 和 'for' + const Token* vname = expect(p, TOK_IDENT, error, "for 后应为变量名"); + if (!vname) return NULL; + if (!expect(p, TOK_IN, error, "缺少 'in'")) return NULL; + AstNode* arr = parse_expr(p, error); + if (!arr) return NULL; + if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL; + AstNode* body = parse_expr(p, error); + if (!body) return NULL; + if (!expect(p, TOK_RBRACKET, error, "缺少 ']'")) return NULL; + left = ast_make_list_comp(p->arena, + arena_strdup_impl(p->arena, vname->start, vname->length), + arr, body, tok_loc(tok)); } else if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) { left = parse_unary(p, error); } else if (tok->kind == TOK_LPAREN) { diff --git a/src/sema/typeck.c b/src/sema/typeck.c index 34078f4..dc0be43 100644 --- a/src/sema/typeck.c +++ b/src/sema/typeck.c @@ -536,6 +536,27 @@ SEMA_HANDLER(analyze_node) // if-expr / block 委托 static AstDispatch sema_dispatch; +static void analyze_list_comp(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) { + analyze_expr(node->as.list_comp.array, scope, errors, a); + TypeInfo* arr_ti = &node->as.list_comp.array->type; + if (arr_ti->kind != TYPE_ARRAY) { + error_add(errors, "", node->loc.line, node->loc.col, + "列表推导式需要数组类型, 得到 '%s'", type_name(arr_ti->kind)); + node->type.kind = TYPE_ERROR; return; + } + Scope* lc_scope = scope_new(a, scope); + TypeKind elem_k = arr_ti->element_type; + Symbol* var_sym = scope_insert(lc_scope, a, node->as.list_comp.var_name, + SYM_VARIABLE, elem_k); + if (var_sym) var_sym->struct_type_name = arr_ti->element_struct_name; + analyze_expr(node->as.list_comp.map_expr, lc_scope, errors, a); + node->type.kind = TYPE_ARRAY; + node->type.element_type = arr_ti->element_type; + node->type.element_struct_name = arr_ti->element_struct_name; + node->type.array_size = arr_ti->array_size; +} +SEMA_HANDLER(analyze_list_comp) + void analyze_expr_init(void) { sema_dispatch.ctx = NULL; // 由 analyze_expr 每次设置 // 新增表达式节点: 在此注册 handler, 编译器会警告缺失 @@ -550,6 +571,7 @@ void analyze_expr_init(void) { ast_dispatch_set(&sema_dispatch, AST_METHOD_CALL, analyze_method_call_wrap); ast_dispatch_set(&sema_dispatch, AST_IF_STMT, analyze_node_wrap); ast_dispatch_set(&sema_dispatch, AST_BLOCK, analyze_node_wrap); + ast_dispatch_set(&sema_dispatch, AST_LIST_COMP, analyze_list_comp_wrap); } void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) { diff --git a/test/programs/39_list_comp.l b/test/programs/39_list_comp.l new file mode 100644 index 0000000..96466ff --- /dev/null +++ b/test/programs/39_list_comp.l @@ -0,0 +1,10 @@ +fn main() -> i64 { + var src: i64[2] = src; + src[0] = 10; + src[1] = 20; + var dst: i64[2] = dst; + dst = [for x in src: x * 2]; + print_i64(dst[0]); // 20 + print_i64(dst[1]); // 40 + return 0; +}