From 18172ca72467b638fdd43762879cc8f9b605ffc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Fri, 5 Jun 2026 20:47:44 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=20i32=20/=20u64=20/?= =?UTF-8?q?=20char=20=E7=B1=BB=E5=9E=8B=20+=20=E5=AD=97=E7=AC=A6=E5=AD=97?= =?UTF-8?q?=E9=9D=A2=E9=87=8F=20"'a'"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +++ README.md | 5 ++++- include/l_lang.h | 6 ++++++ src/ast/ast.c | 7 ++++++ src/ast/ast.h | 1 + src/codegen/codegen.c | 26 ++++++++++++++++++++++ src/lexer/lexer.c | 20 ++++++++++++++--- src/lexer/token.c | 10 ++++++--- src/lexer/token.h | 4 ++-- src/parser/parser.c | 29 ++++++++++++++++++++++--- src/sema/sema.c | 42 ++++++++++++++++++++++++++++++++---- test/programs/25_new_types.l | 9 ++++++++ 12 files changed, 146 insertions(+), 16 deletions(-) create mode 100644 test/programs/25_new_types.l diff --git a/.gitignore b/.gitignore index cefb27b..57bdca7 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ Thumbs.db *.zip *.tar.gz *.7z + +# 环境变量文件 +.env diff --git a/README.md b/README.md index d673d8c..1a3a1df 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,12 @@ graph TB | 类型 | 语法 | 示例 | |------|------|------| +| 32 位有符号整数 | `i32` | `100` | | 64 位有符号整数 | `i64` | `42`, `-7` | +| 64 位无符号整数 | `u64` | `999` | | 64 位浮点数 | `f64` | `3.14` | | 布尔值 | `bool` | `true`, `false` | +| 字符 | `char` | `'A'` | | 字符串 | `str` | `"hello"` | | 结构体 | `struct` | `Point { x: i64, y: i64 }` | | 枚举 | `enum` | `Color { Red, Green, Blue }` | @@ -92,7 +95,7 @@ graph TB | 类型别名 | `type` | `type Meters = i64;` | - `let` 不可变 + `var` 可变,类型推断 -- `i64` → `f64` 自动提升 +- `i32` → `i64` → `f64` 自动加宽,`char` 可隐式转为整数 ### 控制流 diff --git a/include/l_lang.h b/include/l_lang.h index 2625c9d..0e5b6ea 100644 --- a/include/l_lang.h +++ b/include/l_lang.h @@ -7,9 +7,12 @@ // === 类型系统 === typedef enum { + TYPE_I32, TYPE_I64, + TYPE_U64, TYPE_F64, TYPE_BOOL, + TYPE_CHAR, TYPE_STR, TYPE_VOID, TYPE_STRUCT, // 结构体类型 @@ -21,9 +24,12 @@ typedef enum { static inline const char* type_name(TypeKind kind) { switch (kind) { + case TYPE_I32: return "i32"; case TYPE_I64: return "i64"; + case TYPE_U64: return "u64"; case TYPE_F64: return "f64"; case TYPE_BOOL: return "bool"; + case TYPE_CHAR: return "char"; case TYPE_STR: return "str"; case TYPE_VOID: return "void"; case TYPE_STRUCT: return "struct"; diff --git a/src/ast/ast.c b/src/ast/ast.c index b63c1a0..5ca1d32 100644 --- a/src/ast/ast.c +++ b/src/ast/ast.c @@ -137,6 +137,13 @@ AstNode* ast_make_literal_bool(void* alloc, bool val, SourceLoc loc) { return n; } +AstNode* ast_make_literal_char(void* alloc, int val, SourceLoc loc) { + NEW(alloc, AST_LITERAL_EXPR); + n->as.literal.lit_type = TYPE_CHAR; n->as.literal.i64_val = val; + n->type.kind = TYPE_CHAR; + return n; +} + AstNode* ast_make_literal_str(void* alloc, const char* val, SourceLoc loc) { NEW(alloc, AST_LITERAL_EXPR); n->as.literal.lit_type = TYPE_STR; n->as.literal.str_val = val; diff --git a/src/ast/ast.h b/src/ast/ast.h index 863d451..cba368e 100644 --- a/src/ast/ast.h +++ b/src/ast/ast.h @@ -142,6 +142,7 @@ AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t cou AstNode* ast_make_literal_i64(void* alloc, int64_t val, SourceLoc loc); AstNode* ast_make_literal_f64(void* alloc, double val, SourceLoc loc); AstNode* ast_make_literal_bool(void* alloc, bool val, SourceLoc loc); +AstNode* ast_make_literal_char(void* alloc, int val, SourceLoc loc); AstNode* ast_make_literal_str(void* alloc, const char* val, SourceLoc loc); AstNode* ast_make_ident(void* alloc, const char* name, SourceLoc loc); AstNode* ast_make_struct_decl(void* alloc, const char* name, AstNode** fields, size_t count, SourceLoc loc); diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c index b4919be..0b5295f 100644 --- a/src/codegen/codegen.c +++ b/src/codegen/codegen.c @@ -59,9 +59,12 @@ typedef struct { // === 类型映射(需要 Context)=== static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) { switch (kind) { + case TYPE_I32: return LLVMInt32TypeInContext(ctx->context); case TYPE_I64: return LLVMInt64TypeInContext(ctx->context); + case TYPE_U64: return LLVMInt64TypeInContext(ctx->context); case TYPE_F64: return LLVMDoubleTypeInContext(ctx->context); case TYPE_BOOL: return LLVMInt1TypeInContext(ctx->context); + case TYPE_CHAR: return LLVMInt8TypeInContext(ctx->context); case TYPE_STR: return LLVMPointerType(LLVMInt8TypeInContext(ctx->context), 0); case TYPE_STRUCT: case TYPE_ENUM: return LLVMInt64TypeInContext(ctx->context); @@ -73,7 +76,10 @@ static LLVMTypeRef to_llvm_type(CgCtx* ctx, TypeKind kind) { static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) { switch (lit->as.literal.lit_type) { + case TYPE_I32: case TYPE_I64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true); + case TYPE_U64: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, false); + case TYPE_CHAR: return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, false); case TYPE_F64: return LLVMConstReal(ty, lit->as.literal.f64_val); case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false); default: return NULL; @@ -127,6 +133,18 @@ static LLVMTypeRef find_struct_type(CgCtx* ctx, const char* name) { return NULL; } +// 将整数值强制转换到目标 LLVM 类型(sext/zext/trunc) +static LLVMValueRef coerce_int(CgCtx* ctx, LLVMValueRef val, + LLVMTypeRef from_ty, LLVMTypeRef to_ty) { + if (from_ty == to_ty) return val; + int from_w = LLVMGetIntTypeWidth(from_ty); + int to_w = LLVMGetIntTypeWidth(to_ty); + if (from_w < to_w) + return LLVMBuildSExt(ctx->builder, val, to_ty, "sext"); + else + return LLVMBuildTrunc(ctx->builder, val, to_ty, "trunc"); +} + // 从 TypeInfo 生成 LLVM 类型(支持数组、结构体等复合类型) static LLVMTypeRef type_info_to_llvm(CgCtx* ctx, const TypeInfo* ti) { switch (ti->kind) { @@ -271,6 +289,8 @@ static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) { if (strcmp(node->as.call.name, "print_i64") == 0) { LLVMValueRef arg = codegen_expr(ctx, node->as.call.args[0]); if (!arg) return NULL; + LLVMTypeRef i64_ty = LLVMInt64TypeInContext(ctx->context); + arg = coerce_int(ctx, arg, LLVMTypeOf(arg), i64_ty); LLVMValueRef fmt = LLVMBuildGlobalStringPtr(ctx->builder, "%lld\n", "fmt_i64"); LLVMValueRef printf_args[] = { fmt, arg }; return LLVMBuildCall2(ctx->builder, ctx->printf_ty, ctx->printf_fn, @@ -492,6 +512,12 @@ static void codegen_stmt(CgCtx* ctx, AstNode* node) { // 尝试生成 init 值;数组类型可能 init 失败 (自引用占位符) LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init); if (init_val) { + // 若 init LLVM 类型与 alloca 类型不同,强制转换(如 i64→i32) + LLVMTypeRef init_ty = LLVMTypeOf(init_val); + if (init_ty != var_type && LLVMGetTypeKind(init_ty) == LLVMIntegerTypeKind + && LLVMGetTypeKind(var_type) == LLVMIntegerTypeKind) { + init_val = coerce_int(ctx, init_val, init_ty, var_type); + } LLVMBuildStore(ctx->builder, init_val, alloca); } else if (node->type.kind == TYPE_ARRAY) { // 数组声明: init 失败是预期的 (自引用), 存储零初始化 diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 080e4d5..08e9cb3 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -60,9 +60,10 @@ static TokenKind check_keyword(const Token* tok) { KW("while", TOK_WHILE); KW("for", TOK_FOR); KW("in", TOK_IN); KW("to", TOK_TO); KW("return", TOK_RETURN); - KW("i64", TOK_I64); KW("f64", TOK_F64); - KW("bool", TOK_BOOL); KW("str", TOK_STR); - KW("void", TOK_VOID); + KW("i32", TOK_I32); KW("i64", TOK_I64); + KW("u64", TOK_U64); KW("f64", TOK_F64); + KW("bool", TOK_BOOL); KW("char", TOK_CHAR); + KW("str", TOK_STR); KW("void", TOK_VOID); KW("struct", TOK_STRUCT); KW("type", TOK_TYPE); KW("enum", TOK_ENUM); KW("extend", TOK_EXTEND); KW("match", TOK_MATCH); KW("_", TOK_UNDERSCORE); @@ -101,6 +102,19 @@ Token* lex(Arena* a, const char* source, const char* filename, char c = peek(&l); if (isdigit(c)) { tokens[idx++] = lex_number(&l); } + else if (c == '\'') { + advance(&l); // 跳过开头的 ' + int char_start = l.pos; + if (peek(&l) == '\\') advance(&l); // 转义字符: \n \t \\ \' + advance(&l); // 跳过字符内容 + if (peek(&l) != '\'') { + *error = (ErrorInfo){.message="未闭合的字符字面量", .filename=filename, .line=line, .col=col}; + return NULL; + } + int char_len = l.pos - char_start; + advance(&l); // 跳过结尾的 ' + tokens[idx++] = make_token(&l, TOK_CHAR_LIT, char_start, char_len); + } else if (c == '"') { advance(&l); // 跳过开头的 " int start = l.pos; diff --git a/src/lexer/token.c b/src/lexer/token.c index 2176c0d..391ad43 100644 --- a/src/lexer/token.c +++ b/src/lexer/token.c @@ -9,8 +9,10 @@ static const char* NAMES[] = { [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_FOR] = "for", [TOK_IN] = "in", [TOK_RETURN] = "return", [TOK_STRUCT] = "struct", [TOK_TYPE] = "type", [TOK_ENUM] = "enum", [TOK_EXTEND] = "extend", [TOK_MATCH] = "match", - [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_STR] = "str", [TOK_VOID] = "void", - [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", [TOK_STR_LIT] = "字符串", + [TOK_I32] = "i32", [TOK_I64] = "i64", [TOK_U64] = "u64", [TOK_F64] = "f64", + [TOK_BOOL] = "bool", [TOK_CHAR] = "char", [TOK_STR] = "str", [TOK_VOID] = "void", + [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", + [TOK_CHAR_LIT] = "字符", [TOK_STR_LIT] = "字符串", [TOK_TRUE] = "true", [TOK_FALSE] = "false", [TOK_IDENT] = "标识符", [TOK_UNDERSCORE] = "_", [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*", @@ -34,7 +36,9 @@ const char* tok_name(TokenKind kind) { } bool tok_is_type(TokenKind kind) { - return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_STR || kind == TOK_VOID; + return kind == TOK_I32 || kind == TOK_I64 || kind == TOK_U64 + || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_CHAR + || kind == TOK_STR || kind == TOK_VOID; } int64_t tok_int_value(const Token* tok) { diff --git a/src/lexer/token.h b/src/lexer/token.h index 55cf139..39cb41a 100644 --- a/src/lexer/token.h +++ b/src/lexer/token.h @@ -9,9 +9,9 @@ typedef enum { TOK_FN, TOK_LET, TOK_VAR, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_FOR, TOK_IN, TOK_RETURN, TOK_STRUCT, TOK_TYPE, TOK_ENUM, TOK_EXTEND, TOK_MATCH, // 类型关键字 - TOK_I64, TOK_F64, TOK_BOOL, TOK_STR, TOK_VOID, + TOK_I32, TOK_I64, TOK_U64, TOK_F64, TOK_BOOL, TOK_CHAR, TOK_STR, TOK_VOID, // 字面量 - TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT, + TOK_INT_LIT, TOK_FLOAT_LIT, TOK_CHAR_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT, // 标识符 TOK_IDENT, TOK_UNDERSCORE, // 运算符 diff --git a/src/parser/parser.c b/src/parser/parser.c index 9acec1f..065ee21 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -94,6 +94,21 @@ static AstNode* parse_literal(Parser* p) { switch (t->kind) { case TOK_INT_LIT: return ast_make_literal_i64(p->arena, tok_int_value(t), tok_loc(t)); case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), tok_loc(t)); + case TOK_CHAR_LIT: { + int64_t val = 0; + if (t->length >= 2 && t->start[0] == '\\') { + switch (t->start[1]) { + case 'n': val = '\n'; break; + case 't': val = '\t'; break; + case '\\': val = '\\'; break; + case '\'': val = '\''; break; + default: val = t->start[1]; break; + } + } else { + val = (unsigned char)t->start[0]; + } + return ast_make_literal_char(p->arena, (int)val, tok_loc(t)); + } case TOK_TRUE: return ast_make_literal_bool(p->arena, true, tok_loc(t)); case TOK_FALSE: return ast_make_literal_bool(p->arena, false, tok_loc(t)); case TOK_STR_LIT: { @@ -204,6 +219,7 @@ static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error } else if (tok->kind == TOK_LPAREN) { left = parse_group(p, error); } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT || + tok->kind == TOK_CHAR_LIT || tok->kind == TOK_TRUE || tok->kind == TOK_FALSE || tok->kind == TOK_STR_LIT) { left = parse_literal(p); @@ -276,9 +292,16 @@ static AstNode* parse_expr(Parser* p, ErrorInfo* error) { // === 类型工具 === static TypeKind token_to_type(TokenKind k) { - switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64; - case TOK_BOOL: return TYPE_BOOL; case TOK_STR: return TYPE_STR; - default: return TYPE_VOID; } + switch (k) { + case TOK_I32: return TYPE_I32; + case TOK_I64: return TYPE_I64; + case TOK_U64: return TYPE_U64; + case TOK_F64: return TYPE_F64; + case TOK_BOOL: return TYPE_BOOL; + case TOK_CHAR: return TYPE_CHAR; + case TOK_STR: return TYPE_STR; + default: return TYPE_VOID; + } } // === 类型表达式解析(内置类型/结构体名/数组类型)=== diff --git a/src/sema/sema.c b/src/sema/sema.c index 071c20a..6a73296 100644 --- a/src/sema/sema.c +++ b/src/sema/sema.c @@ -10,13 +10,38 @@ static TypeKind promote(TypeKind a, TypeKind b) { // 枚举在算术运算中视为 i64 if (a == TYPE_ENUM) a = TYPE_I64; if (b == TYPE_ENUM) b = TYPE_I64; + // char 在算术中提升为 i32 + if (a == TYPE_CHAR) a = TYPE_I32; + if (b == TYPE_CHAR) b = TYPE_I32; if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64; if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64; + if (a == TYPE_U64 || b == TYPE_U64) return TYPE_U64; + if (a == TYPE_I32 || b == TYPE_I32) return TYPE_I32; if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL; return TYPE_ERROR; } -static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64 || t == TYPE_ENUM; } +static bool is_numeric(TypeKind t) { + return t == TYPE_I32 || t == TYPE_I64 || t == TYPE_U64 + || t == TYPE_F64 || t == TYPE_CHAR || t == TYPE_ENUM; +} +// 隐式类型转换规则: 无损加宽允许,有符号→无符号不允许 +static bool can_implicit_convert(TypeKind from, TypeKind to) { + if (from == to) return true; + // 枚举视为 i64 + if (from == TYPE_ENUM) from = TYPE_I64; + if (to == TYPE_ENUM) to = TYPE_I64; + // char 可转为任意整数 + if (from == TYPE_CHAR) return to == TYPE_I32 || to == TYPE_I64 || to == TYPE_U64 || to == TYPE_F64; + // i32 可加宽 + if (from == TYPE_I32) return to == TYPE_I64 || to == TYPE_F64; + // i64 可转 f64 + if (from == TYPE_I64) return to == TYPE_F64; + // u64 ↔ i64 双向允许(同一位宽,LLVM 同类型) + if (from == TYPE_U64) return to == TYPE_F64 || to == TYPE_I64; + if (from == TYPE_I64) return to == TYPE_F64 || to == TYPE_U64; + return false; +} static bool is_comparable(TypeKind a, TypeKind b) { if (a == b) return true; // 枚举可以参与整数比较 @@ -184,7 +209,10 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* actual_name ? actual_name : type_name(actual)); } } else if (actual != expected && - !(expected == TYPE_I64 && actual == TYPE_ENUM)) { + !(expected == TYPE_I64 && actual == TYPE_ENUM) && + !can_implicit_convert(actual, expected) && + !(actual == TYPE_I64 && node->as.call.args[i]->kind == AST_LITERAL_EXPR + && (expected == TYPE_I32 || expected == TYPE_U64 || expected == TYPE_CHAR))) { error_add(errors, "", node->loc.line, node->loc.col, "参数 %zu 类型不匹配: 期望 '%s',得到 '%s'", i + 1, type_name(expected), type_name(actual)); @@ -380,7 +408,8 @@ static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* TypeKind actual = node->as.method_call.args[i]->type.kind; TypeKind expected = sym->param_types[i + 1]; if (actual != TYPE_ERROR && actual != expected && - !(expected == TYPE_I64 && actual == TYPE_ENUM)) { + !(expected == TYPE_I64 && actual == TYPE_ENUM) && + !can_implicit_convert(actual, expected)) { if (expected == TYPE_STRUCT) { // 结构体类型参数:比较具体类型名 const char* actual_name = node->as.method_call.args[i]->type.struct_name; @@ -618,7 +647,12 @@ static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* } else { var_type = node->as.let_stmt.annot_type; } - if (inferred != TYPE_ERROR && inferred != var_type) { + bool literal_to_int = (inferred == TYPE_I64 + && node->as.let_stmt.init->kind == AST_LITERAL_EXPR + && (var_type == TYPE_I32 || var_type == TYPE_U64 || var_type == TYPE_CHAR)); + if (inferred != TYPE_ERROR && inferred != var_type + && !can_implicit_convert(inferred, var_type) + && !literal_to_int) { error_add(errors, "", node->loc.line, node->loc.col, "变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'", node->as.let_stmt.name, diff --git a/test/programs/25_new_types.l b/test/programs/25_new_types.l new file mode 100644 index 0000000..81055dd --- /dev/null +++ b/test/programs/25_new_types.l @@ -0,0 +1,9 @@ +fn main() -> i64 { + let a: i32 = 100; // i64 字面量 → i32 变量(隐式转换) + let b: u64 = 999; // i64 字面量 → u64 变量(隐式转换) + let c: char = 'A'; // 字符字面量 + print_i64(a); // 100 (i32 自动加宽为 i64) + print_i64(c); // 65 (char 自动加宽为 i64) + print_i64(b); // 999 (u64 传 i64 参数 — 允许) + return 0; +}