feat: 字符串类型 + 字面量 + print_str

- 新增 TYPE_STR 类型 (i8* 指针)
- lexer: 双引号字符串字面量 + str 关键字
- parser: TOK_STR_LIT → AST_LITERAL_EXPR(str_val)
- sema: print_str 内置函数注册 + 字符串拼接类型检查
- codegen: GlobalStringPtr 生成字符串常量,print_str → printf("%s")
- 新增集成测试 07_hello_str.l

基于 Codex 分析报告 P0 建议。
This commit is contained in:
2026-06-05 00:47:53 +08:00
parent bd02a4989e
commit 9a53d97274
10 changed files with 84 additions and 11 deletions
+7
View File
@@ -112,6 +112,13 @@ AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col) {
return n;
}
AstNode* ast_make_literal_str(void* alloc, const char* val, int line, int col) {
NEW(alloc, AST_LITERAL_EXPR);
n->as.literal.lit_type = TYPE_STR; n->as.literal.str_val = val;
n->type.kind = TYPE_STR;
return n;
}
AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) {
NEW(alloc, AST_IDENT_EXPR);
n->as.ident.name = name;
+2 -1
View File
@@ -71,7 +71,7 @@ struct AstNode {
// AST_CALL_EXPR
struct { const char* name; struct AstNode** args; size_t arg_count; } call;
// AST_LITERAL_EXPR
struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; }; } literal;
struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; const char* str_val; }; } literal;
// AST_IDENT_EXPR
struct { const char* name; } ident;
} as;
@@ -95,6 +95,7 @@ AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t cou
AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col);
AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col);
AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col);
AstNode* ast_make_literal_str(void* alloc, const char* val, int line, int col);
AstNode* ast_make_ident(void* alloc, const char* name, int line, int col);
#endif
+16
View File
@@ -39,6 +39,7 @@ static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) {
case TYPE_I64: return LLVMInt64TypeInContext(ctx->context);
case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context);
case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context);
case TYPE_STR: return LLVMPointerType(LLVMInt8TypeInContext(ctx->context), 0);
default: return LLVMVoidTypeInContext(ctx->context);
}
}
@@ -94,6 +95,9 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
switch (node->kind) {
case AST_LITERAL_EXPR:
if (node->type.kind == TYPE_STR) {
return LLVMBuildGlobalStringPtr(ctx->builder, node->as.literal.str_val, "str");
}
return to_llvm_const(to_llvm_type(ctx, node->type.kind), node);
case AST_IDENT_EXPR: {
@@ -119,6 +123,10 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
LLVMValueRef l = codegen_expr(ctx, node->as.binary.left);
LLVMValueRef r = codegen_expr(ctx, node->as.binary.right);
if (!l || !r) return NULL;
// 字符串拼接:暂不支持运行时拼接,直接返回左操作数
if (node->type.kind == TYPE_STR) return l;
bool is_float = (node->type.kind == TYPE_F64);
switch (node->as.binary.op) {
@@ -193,6 +201,14 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn,
(LLVMValueRef[]){selected}, 1, "");
}
if (strcmp(node->as.call.name, "print_str") == 0) {
LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]);
if (!arg) return NULL;
LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%s\n", "fmt_str");
LLVMValueRef printf_args[] = { fmt, arg };
return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn,
printf_args, 2, "");
}
// === 常规函数调用 ===
LLVMValueRef fn = find_fn(ctx, node->as.call.name);
+14 -1
View File
@@ -59,7 +59,8 @@ static TokenKind check_keyword(const Token* tok) {
KW("if", TOK_IF); KW("else", TOK_ELSE);
KW("while", TOK_WHILE); KW("return", TOK_RETURN);
KW("i64", TOK_I64); KW("f64", TOK_F64);
KW("bool", TOK_BOOL); KW("void", TOK_VOID);
KW("bool", TOK_BOOL); KW("str", TOK_STR);
KW("void", TOK_VOID);
KW("true", TOK_TRUE); KW("false", TOK_FALSE);
#undef KW
return TOK_IDENT;
@@ -90,6 +91,18 @@ Token* lex(Arena* a, const char* source, const char* filename,
char c = peek(&l);
if (isdigit(c)) { tokens[idx++] = lex_number(&l); }
else if (c == '"') {
advance(&l); // 跳过开头的 "
int start = l.pos;
while (peek(&l) != '"' && peek(&l) != '\0' && peek(&l) != '\n') advance(&l);
int len = l.pos - start;
if (peek(&l) != '"') {
*error = (ErrorInfo){.message="未闭合的字符串", .filename=filename, .line=line, .col=col};
return NULL;
}
advance(&l); // 跳过结尾的 "
tokens[idx++] = make_token(&l, TOK_STR_LIT, start, len);
}
else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); }
else if (c == '+' && peek_next(&l) != '=') { tokens[idx++] = make_token(&l, TOK_PLUS, l.pos, 1); advance(&l); }
else if (c == '-' && peek_next(&l) != '>') { tokens[idx++] = make_token(&l, TOK_MINUS, l.pos, 1); advance(&l); }
+3 -3
View File
@@ -7,8 +7,8 @@
static const char* NAMES[] = {
[TOK_FN] = "fn", [TOK_LET] = "let", [TOK_MUT] = "mut", [TOK_IF] = "if",
[TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_RETURN] = "return",
[TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_VOID] = "void",
[TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数",
[TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_STR] = "str", [TOK_VOID] = "void",
[TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", [TOK_STR_LIT] = "字符串",
[TOK_TRUE] = "true", [TOK_FALSE] = "false",
[TOK_IDENT] = "标识符",
[TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*",
@@ -29,7 +29,7 @@ const char* tok_name(TokenKind kind) {
}
bool tok_is_type(TokenKind kind) {
return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_VOID;
return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_STR || kind == TOK_VOID;
}
int64_t tok_int_value(const Token* tok) {
+2 -2
View File
@@ -8,9 +8,9 @@ typedef enum {
// 关键字
TOK_FN, TOK_LET, TOK_MUT, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_RETURN,
// 类型关键字
TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID,
TOK_I64, TOK_F64, TOK_BOOL, TOK_STR, TOK_VOID,
// 字面量
TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE,
TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT,
// 标识符
TOK_IDENT,
// 运算符
+11 -3
View File
@@ -89,6 +89,12 @@ static AstNode* parse_literal(Parser* p) {
case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col);
case TOK_TRUE: return ast_make_literal_bool(p->arena, true, t->line, t->col);
case TOK_FALSE: return ast_make_literal_bool(p->arena, false, t->line, t->col);
case TOK_STR_LIT: {
char* str = arena_alloc_impl(p->arena, t->length + 1);
memcpy(str, t->start, t->length);
str[t->length] = '\0';
return ast_make_literal_str(p->arena, str, t->line, t->col);
}
default: return NULL;
}
}
@@ -131,7 +137,8 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error
} else if (tok->kind == TOK_LPAREN) {
left = parse_group(p, error);
} else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) {
tok->kind == TOK_TRUE || tok->kind == TOK_FALSE ||
tok->kind == TOK_STR_LIT) {
left = parse_literal(p);
} else if (tok->kind == TOK_IDENT) {
left = parse_ident_or_call(p, error);
@@ -163,12 +170,13 @@ static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
// === 类型工具 ===
static bool is_type_token(TokenKind k) {
return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID;
return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_STR || k == TOK_VOID;
}
static TypeKind token_to_type(TokenKind k) {
switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64;
case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; }
case TOK_BOOL: return TYPE_BOOL; case TOK_STR: return TYPE_STR;
default: return TYPE_VOID; }
}
// === 语句解析 ===
+22 -1
View File
@@ -68,7 +68,26 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena*
if (l == TYPE_ERROR || r == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; }
switch (node->as.binary.op) {
case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD:
case OP_ADD:
if (l == TYPE_STR || r == TYPE_STR) {
// 字符串拼接:两边都必须是 str 类型
if (l != TYPE_STR || r != TYPE_STR) {
error_add(errors, "<sema>", node->line, node->col,
"字符串拼接需要两边都是 str 类型,得到 '%s' + '%s'",
type_name(l), type_name(r));
node->type.kind = TYPE_ERROR;
} else {
node->type.kind = TYPE_STR;
}
} else if (!is_numeric(l) || !is_numeric(r)) {
error_add(errors, "<sema>", node->line, node->col,
"算术运算需要数值类型");
node->type.kind = TYPE_ERROR;
} else {
node->type.kind = promote(l, r);
}
break;
case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD:
if (!is_numeric(l) || !is_numeric(r)) {
error_add(errors, "<sema>", node->line, node->col,
"算术运算需要数值类型");
@@ -294,6 +313,8 @@ void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) {
scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1);
TypeKind params_bool[] = {TYPE_BOOL};
scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1);
TypeKind params_str[] = {TYPE_STR};
scope_insert_function(global, arena, "print_str", TYPE_VOID, params_str, 1);
analyze_node(ast, global, errors, arena);
}