From a7fca5964e9d8149e668cba568276887e015941c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Fri, 5 Jun 2026 13:12:00 +0800 Subject: [PATCH] =?UTF-8?q?fix:=205=E9=A1=B9=E7=AB=8B=E5=8D=B3=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=20+=202=E9=A1=B9=E5=B0=BD=E5=BF=AB=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 立即: - lexer: token数组容量改为src_len+16 + idx越界防御 - symbol: 4个函数arena_alloc加NULL检查 - codegen: verify_err fallback用arena_strdup替代静态字符串 - codegen: cleanup_list从固定64改为arena动态扩容 - lexer: 标识符/字符串字面量65535字符上限 尽快: - to_llvm_type: TYPE_STRUCT/TYPE_UNKNOWN/TYPE_ERROR显式case - LLVMGetValueType2不存在(LLVM 22仍用旧名), 保留GlobalGetValueType --- src/codegen/codegen.c | 20 ++++++++++++++++---- src/lexer/lexer.c | 16 ++++++++++++---- src/sema/symbol.c | 4 ++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c index c3bbf0c..4ff32c6 100644 --- a/src/codegen/codegen.c +++ b/src/codegen/codegen.c @@ -50,8 +50,9 @@ typedef struct { LLVMValueRef strlen_fn; LLVMValueRef memcpy_fn; // 自动内存管理: 追踪需要 free 的 str alloca - LLVMValueRef cleanup_list[64]; + LLVMValueRef* cleanup_list; size_t cleanup_count; + size_t cleanup_cap; } CgCtx; // === 类型映射(需要 Context)=== @@ -61,6 +62,9 @@ static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) { case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context); case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context); case TYPE_STR: return LLVMPointerType(LLVMInt8TypeInContext(ctx->context), 0); + case TYPE_STRUCT: + case TYPE_UNKNOWN: + case TYPE_ERROR: default: return LLVMVoidTypeInContext(ctx->context); } } @@ -350,9 +354,16 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { // === 自动内存管理: 作用域退出时释放 str 堆分配 === static void cleanup_add(CgCtx* ctx, LLVMValueRef alloca) { - if (ctx->cleanup_count < 64) { - ctx->cleanup_list[ctx->cleanup_count++] = alloca; + if (ctx->cleanup_count >= ctx->cleanup_cap) { + size_t new_cap = ctx->cleanup_cap ? ctx->cleanup_cap * 2 : 16; + LLVMValueRef* new_list = arena_alloc(ctx->arena, new_cap * sizeof(LLVMValueRef)); + if (!new_list) return; + if (ctx->cleanup_list) + memcpy(new_list, ctx->cleanup_list, ctx->cleanup_count * sizeof(LLVMValueRef)); + ctx->cleanup_list = new_list; + ctx->cleanup_cap = new_cap; } + ctx->cleanup_list[ctx->cleanup_count++] = alloca; } // 释放从 mark 位置开始的所有 str 变量 @@ -645,7 +656,8 @@ LLVMModuleRef codegen_module(AstNode* ast, Arena* codegen_arena, // 验证模块(使用 ReturnStatus 以获取完整错误消息) char* verify_err = NULL; if (LLVMVerifyModule(ctx.module, LLVMReturnStatusAction, &verify_err)) { - *error_msg = verify_err ? verify_err : "模块验证失败(错误消息为 NULL)"; + *error_msg = verify_err ? verify_err + : arena_strdup(ctx.arena, "LLVM 模块验证失败"); LLVMDisposeBuilder(ctx.builder); *out_context = ctx.context; return NULL; diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index dc6b321..d903713 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -70,7 +70,10 @@ static TokenKind check_keyword(const Token* tok) { static Token lex_ident_or_keyword(Lexer* l) { int start = l->pos; - while (isalnum(peek(l)) || peek(l) == '_') advance(l); + while (isalnum(peek(l)) || peek(l) == '_') { + if (l->pos - start > 65535) break; // 标识符长度上限 + advance(l); + } Token t = make_token(l, TOK_IDENT, start, l->pos - start); t.kind = check_keyword(&t); return t; @@ -79,8 +82,9 @@ static Token lex_ident_or_keyword(Lexer* l) { Token* lex(Arena* a, const char* source, const char* filename, size_t* count, ErrorInfo* error) { Lexer l = {.src = source, .filename = filename, .pos = 0, .line = 1, .col = 1}; - // 预估容量:源码长度的 1/3 - size_t cap = strlen(source) / 3 + 16; + // 容量上限: 极端情况每个字符一个 token (如 "(){}+-"), src_len 足够 + size_t src_len = strlen(source); + size_t cap = src_len + 16; Token* tokens = arena_alloc(a, cap * sizeof(Token)); if (!tokens) { *count = 0; return NULL; } size_t idx = 0; @@ -88,6 +92,7 @@ Token* lex(Arena* a, const char* source, const char* filename, while (peek(&l) != '\0') { skip_whitespace(&l); if (peek(&l) == '\0') break; + if (idx >= cap) { *count = 0; return NULL; } // 防御 int line = l.line, col = l.col; char c = peek(&l); @@ -96,7 +101,10 @@ Token* lex(Arena* a, const char* source, const char* filename, else if (c == '"') { advance(&l); // 跳过开头的 " int start = l.pos; - while (peek(&l) != '"' && peek(&l) != '\0' && peek(&l) != '\n') advance(&l); + while (peek(&l) != '"' && peek(&l) != '\0' && peek(&l) != '\n') { + if (l.pos - start > 65535) break; // 字符串长度上限 + advance(&l); + } int len = l.pos - start; if (peek(&l) != '"') { *error = (ErrorInfo){.message="未闭合的字符串", .filename=filename, .line=line, .col=col}; diff --git a/src/sema/symbol.c b/src/sema/symbol.c index ecdfba1..7bccd92 100644 --- a/src/sema/symbol.c +++ b/src/sema/symbol.c @@ -4,6 +4,7 @@ Scope* scope_new(void* alloc, Scope* parent) { Scope* s = (Scope*)arena_alloc_impl(alloc, sizeof(Scope)); + if (!s) return NULL; s->head = NULL; s->parent = parent; return s; @@ -26,6 +27,7 @@ Symbol* scope_insert(Scope* scope, void* alloc, const char* name, } } Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + if (!sym) return NULL; sym->name = name; sym->kind = kind; sym->type = type; sym->is_mut = false; sym->return_type = TYPE_VOID; sym->param_types = NULL; sym->param_count = 0; @@ -46,6 +48,7 @@ Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name, } } Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + if (!sym) return NULL; sym->name = name; sym->kind = SYM_FUNCTION; sym->type = TYPE_VOID; sym->return_type = ret; sym->param_types = pt; sym->param_count = pc; sym->struct_field_names = NULL; @@ -66,6 +69,7 @@ Symbol* scope_insert_struct(Scope* scope, void* alloc, const char* name, } } Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol)); + if (!sym) return NULL; sym->name = name; sym->kind = SYM_STRUCT; sym->type = TYPE_STRUCT; sym->is_mut = false; sym->return_type = TYPE_VOID; sym->param_types = NULL; sym->param_count = 0;