From 9a53d97274a9a7a3d5b5857ac919992ace7d6c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Fri, 5 Jun 2026 00:47:53 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AD=97=E7=AC=A6=E4=B8=B2=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=20+=20=E5=AD=97=E9=9D=A2=E9=87=8F=20+=20print=5Fstr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 TYPE_STR 类型 (i8* 指针) - lexer: 双引号字符串字面量 + str 关键字 - parser: TOK_STR_LIT → AST_LITERAL_EXPR(str_val) - sema: print_str 内置函数注册 + 字符串拼接类型检查 - codegen: GlobalStringPtr 生成字符串常量,print_str → printf("%s") - 新增集成测试 07_hello_str.l 基于 Codex 分析报告 P0 建议。 --- include/l_lang.h | 2 ++ src/ast/ast.c | 7 +++++++ src/ast/ast.h | 3 ++- src/codegen/codegen.c | 16 ++++++++++++++++ src/lexer/lexer.c | 15 ++++++++++++++- src/lexer/token.c | 6 +++--- src/lexer/token.h | 4 ++-- src/parser/parser.c | 14 +++++++++++--- src/sema/sema.c | 23 ++++++++++++++++++++++- test/programs/07_hello_str.l | 5 +++++ 10 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 test/programs/07_hello_str.l diff --git a/include/l_lang.h b/include/l_lang.h index 6cbc52b..988eeb9 100644 --- a/include/l_lang.h +++ b/include/l_lang.h @@ -10,6 +10,7 @@ typedef enum { TYPE_I64, TYPE_F64, TYPE_BOOL, + TYPE_STR, TYPE_VOID, TYPE_UNKNOWN, // 尚未推断 TYPE_ERROR, // 类型错误 @@ -20,6 +21,7 @@ static inline const char* type_name(TypeKind kind) { case TYPE_I64: return "i64"; case TYPE_F64: return "f64"; case TYPE_BOOL: return "bool"; + case TYPE_STR: return "str"; case TYPE_VOID: return "void"; default: return ""; } diff --git a/src/ast/ast.c b/src/ast/ast.c index 5eb6812..ec43f2b 100644 --- a/src/ast/ast.c +++ b/src/ast/ast.c @@ -112,6 +112,13 @@ AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col) { return n; } +AstNode* ast_make_literal_str(void* alloc, const char* val, int line, int col) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_STR; n->as.literal.str_val = val; + n->type.kind = TYPE_STR; + return n; +} + AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) { NEW(alloc, AST_IDENT_EXPR); n->as.ident.name = name; diff --git a/src/ast/ast.h b/src/ast/ast.h index 06d463d..9a2ae3d 100644 --- a/src/ast/ast.h +++ b/src/ast/ast.h @@ -71,7 +71,7 @@ struct AstNode { // AST_CALL_EXPR struct { const char* name; struct AstNode** args; size_t arg_count; } call; // AST_LITERAL_EXPR - struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; }; } literal; + struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; const char* str_val; }; } literal; // AST_IDENT_EXPR struct { const char* name; } ident; } as; @@ -95,6 +95,7 @@ AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t cou AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col); AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col); AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col); +AstNode* ast_make_literal_str(void* alloc, const char* val, int line, int col); AstNode* ast_make_ident(void* alloc, const char* name, int line, int col); #endif diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c index 664d417..f5364a9 100644 --- a/src/codegen/codegen.c +++ b/src/codegen/codegen.c @@ -39,6 +39,7 @@ static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) { case TYPE_I64: return LLVMInt64TypeInContext(ctx->context); case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context); case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context); + case TYPE_STR: return LLVMPointerType(LLVMInt8TypeInContext(ctx->context), 0); default: return LLVMVoidTypeInContext(ctx->context); } } @@ -94,6 +95,9 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { switch (node->kind) { case AST_LITERAL_EXPR: + if (node->type.kind == TYPE_STR) { + return LLVMBuildGlobalStringPtr(ctx->builder, node->as.literal.str_val, "str"); + } return to_llvm_const(to_llvm_type(ctx, node->type.kind), node); case AST_IDENT_EXPR: { @@ -119,6 +123,10 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { LLVMValueRef l = codegen_expr(ctx, node->as.binary.left); LLVMValueRef r = codegen_expr(ctx, node->as.binary.right); if (!l || !r) return NULL; + + // 字符串拼接:暂不支持运行时拼接,直接返回左操作数 + if (node->type.kind == TYPE_STR) return l; + bool is_float = (node->type.kind == TYPE_F64); switch (node->as.binary.op) { @@ -193,6 +201,14 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, (LLVMValueRef[]){selected}, 1, ""); } + if (strcmp(node->as.call.name, "print_str") == 0) { + LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]); + if (!arg) return NULL; + LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%s\n", "fmt_str"); + LLVMValueRef printf_args[] = { fmt, arg }; + return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, + printf_args, 2, ""); + } // === 常规函数调用 === LLVMValueRef fn = find_fn(ctx, node->as.call.name); diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index d5ac78b..d39677c 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -59,7 +59,8 @@ static TokenKind check_keyword(const Token* tok) { KW("if", TOK_IF); KW("else", TOK_ELSE); KW("while", TOK_WHILE); KW("return", TOK_RETURN); KW("i64", TOK_I64); KW("f64", TOK_F64); - KW("bool", TOK_BOOL); KW("void", TOK_VOID); + KW("bool", TOK_BOOL); KW("str", TOK_STR); + KW("void", TOK_VOID); KW("true", TOK_TRUE); KW("false", TOK_FALSE); #undef KW return TOK_IDENT; @@ -90,6 +91,18 @@ Token* lex(Arena* a, const char* source, const char* filename, char c = peek(&l); if (isdigit(c)) { tokens[idx++] = lex_number(&l); } + else if (c == '"') { + advance(&l); // 跳过开头的 " + int start = l.pos; + while (peek(&l) != '"' && peek(&l) != '\0' && peek(&l) != '\n') advance(&l); + int len = l.pos - start; + if (peek(&l) != '"') { + *error = (ErrorInfo){.message="未闭合的字符串", .filename=filename, .line=line, .col=col}; + return NULL; + } + advance(&l); // 跳过结尾的 " + tokens[idx++] = make_token(&l, TOK_STR_LIT, start, len); + } else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); } else if (c == '+' && peek_next(&l) != '=') { tokens[idx++] = make_token(&l, TOK_PLUS, l.pos, 1); advance(&l); } else if (c == '-' && peek_next(&l) != '>') { tokens[idx++] = make_token(&l, TOK_MINUS, l.pos, 1); advance(&l); } diff --git a/src/lexer/token.c b/src/lexer/token.c index dc59e8f..7e6171f 100644 --- a/src/lexer/token.c +++ b/src/lexer/token.c @@ -7,8 +7,8 @@ static const char* NAMES[] = { [TOK_FN] = "fn", [TOK_LET] = "let", [TOK_MUT] = "mut", [TOK_IF] = "if", [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_RETURN] = "return", - [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_VOID] = "void", - [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", + [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_STR] = "str", [TOK_VOID] = "void", + [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", [TOK_STR_LIT] = "字符串", [TOK_TRUE] = "true", [TOK_FALSE] = "false", [TOK_IDENT] = "标识符", [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*", @@ -29,7 +29,7 @@ const char* tok_name(TokenKind kind) { } bool tok_is_type(TokenKind kind) { - return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_VOID; + return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_STR || kind == TOK_VOID; } int64_t tok_int_value(const Token* tok) { diff --git a/src/lexer/token.h b/src/lexer/token.h index a74d471..9e63706 100644 --- a/src/lexer/token.h +++ b/src/lexer/token.h @@ -8,9 +8,9 @@ typedef enum { // 关键字 TOK_FN, TOK_LET, TOK_MUT, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_RETURN, // 类型关键字 - TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, + TOK_I64, TOK_F64, TOK_BOOL, TOK_STR, TOK_VOID, // 字面量 - TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, + TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT, // 标识符 TOK_IDENT, // 运算符 diff --git a/src/parser/parser.c b/src/parser/parser.c index bab032a..49e8d59 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -89,6 +89,12 @@ static AstNode* parse_literal(Parser* p) { case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col); case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col); case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col); + case TOK_STR_LIT: { + char* str = arena_alloc_impl(p->arena, t->length + 1); + memcpy(str, t->start, t->length); + str[t->length] = '\0'; + return ast_make_literal_str(p->arena, str, t->line, t->col); + } default: return NULL; } } @@ -131,7 +137,8 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error } else if (tok->kind == TOK_LPAREN) { left = parse_group(p, error); } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || - tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) { + tok->kind == TOK_TRUE || tok->kind == TOK_FALSE || + tok->kind == TOK_STR_LIT) { left = parse_literal(p); } else if (tok->kind == TOK_IDENT) { left = parse_ident_or_call(p, error); @@ -163,12 +170,13 @@ static AstNode* parse_expr(Parser* p, ErrorInfo* error) { // === 类型工具 === static bool is_type_token(TokenKind k) { - return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID; + return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_STR || k == TOK_VOID; } static TypeKind token_to_type(TokenKind k) { switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64; - case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; } + case TOK_BOOL: return TYPE_BOOL; case TOK_STR: return TYPE_STR; + default: return TYPE_VOID; } } // === 语句解析 === diff --git a/src/sema/sema.c b/src/sema/sema.c index 2cbd3d8..764436b 100644 --- a/src/sema/sema.c +++ b/src/sema/sema.c @@ -68,7 +68,26 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* if (l == TYPE_ERROR || r == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; } switch (node->as.binary.op) { - case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: + case OP_ADD: + if (l == TYPE_STR || r == TYPE_STR) { + // 字符串拼接:两边都必须是 str 类型 + if (l != TYPE_STR || r != TYPE_STR) { + error_add(errors, "", node->line, node->col, + "字符串拼接需要两边都是 str 类型,得到 '%s' + '%s'", + type_name(l), type_name(r)); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = TYPE_STR; + } + } else if (!is_numeric(l) || !is_numeric(r)) { + error_add(errors, "", node->line, node->col, + "算术运算需要数值类型"); + node->type.kind = TYPE_ERROR; + } else { + node->type.kind = promote(l, r); + } + break; + case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: if (!is_numeric(l) || !is_numeric(r)) { error_add(errors, "", node->line, node->col, "算术运算需要数值类型"); @@ -294,6 +313,8 @@ void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) { scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1); TypeKind params_bool[] = {TYPE_BOOL}; scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1); + TypeKind params_str[] = {TYPE_STR}; + scope_insert_function(global, arena, "print_str", TYPE_VOID, params_str, 1); analyze_node(ast, global, errors, arena); } diff --git a/test/programs/07_hello_str.l b/test/programs/07_hello_str.l new file mode 100644 index 0000000..498bca4 --- /dev/null +++ b/test/programs/07_hello_str.l @@ -0,0 +1,5 @@ +fn main() -> i64 { + let msg: str = "Hello, L Language!"; + print_str(msg); + return 0; +}