From a15cd9d56ee1d3a810da95f1d007709b428e00de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Fri, 5 Jun 2026 14:41:52 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20match=20=E8=A1=A8=E8=BE=BE=E5=BC=8F=20(?= =?UTF-8?q?P1=20#8=20=E6=94=B6=E5=AE=98)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - lexer: TOK_MATCH, TOK_MATCH_ARROW, TOK_UNDERSCORE - parser: parse_match_stmt() desugar → let+if-else链 - 零 sema/codegen 改动 - 4个集成测试: enum/int literal/wildcard match P1 全部完成: type alias + enum + array + impl + match --- src/lexer/lexer.c | 4 +- src/lexer/token.c | 5 +- src/lexer/token.h | 6 +- src/parser/parser.c | 93 +++++++++++++++++++++++++++++++ test/programs/20_match.l | 11 ++++ test/programs/21_match_red.l | 11 ++++ test/programs/22_match_int.l | 10 ++++ test/programs/23_match_wildcard.l | 9 +++ 8 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 test/programs/20_match.l create mode 100644 test/programs/21_match_red.l create mode 100644 test/programs/22_match_int.l create mode 100644 test/programs/23_match_wildcard.l diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 407e4b8..df00503 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -63,7 +63,8 @@ static TokenKind check_keyword(const Token* tok) { KW("bool", TOK_BOOL); KW("str", TOK_STR); KW("void", TOK_VOID); KW("struct", TOK_STRUCT); KW("type", TOK_TYPE); - KW("enum", TOK_ENUM); KW("impl", TOK_IMPL); + KW("enum", TOK_ENUM); KW("impl", TOK_IMPL); KW("match", TOK_MATCH); + KW("_", TOK_UNDERSCORE); KW("true", TOK_TRUE); KW("false", TOK_FALSE); #undef KW return TOK_IDENT; @@ -126,6 +127,7 @@ Token* lex(Arena* a, const char* source, const char* filename, else if (c == '/') { tokens[idx++] = make_token(&l, TOK_SLASH, l.pos, 1); advance(&l); } else if (c == '%') { tokens[idx++] = make_token(&l, TOK_PERCENT, l.pos, 1); advance(&l); } else if (c == '=' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_EQ_EQ, l.pos, 2); advance(&l); advance(&l); } + else if (c == '=' && peek_next(&l) == '>') { tokens[idx++] = make_token(&l, TOK_MATCH_ARROW, l.pos, 2); advance(&l); advance(&l); } else if (c == '=') { tokens[idx++] = make_token(&l, TOK_ASSIGN, l.pos, 1); advance(&l); } else if (c == '!' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_BANG_EQ, l.pos, 2); advance(&l); advance(&l); } else if (c == '!') { tokens[idx++] = make_token(&l, TOK_BANG, l.pos, 1); advance(&l); } diff --git a/src/lexer/token.c b/src/lexer/token.c index 2963ac1..89f035c 100644 --- a/src/lexer/token.c +++ b/src/lexer/token.c @@ -8,16 +8,17 @@ static const char* NAMES[] = { [TOK_FN] = "fn", [TOK_LET] = "let", [TOK_MUT] = "mut", [TOK_IF] = "if", [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_FOR] = "for", [TOK_IN] = "in", [TOK_RETURN] = "return", [TOK_STRUCT] = "struct", [TOK_TYPE] = "type", [TOK_ENUM] = "enum", [TOK_IMPL] = "impl", + [TOK_MATCH] = "match", [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_STR] = "str", [TOK_VOID] = "void", [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数", [TOK_STR_LIT] = "字符串", [TOK_TRUE] = "true", [TOK_FALSE] = "false", - [TOK_IDENT] = "标识符", + [TOK_IDENT] = "标识符", [TOK_UNDERSCORE] = "_", [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*", [TOK_SLASH] = "/", [TOK_PERCENT] = "%", [TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=", [TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=", [TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_BANG] = "!", - [TOK_ARROW] = "->", [TOK_DOT_DOT] = "..", + [TOK_ARROW] = "->", [TOK_DOT_DOT] = "..", [TOK_MATCH_ARROW] = "=>", [TOK_PLUS_EQ] = "+=", [TOK_MINUS_EQ] = "-=", [TOK_STAR_EQ] = "*=", [TOK_SLASH_EQ] = "/=", [TOK_LPAREN] = "(", [TOK_RPAREN] = ")", [TOK_LBRACE] = "{", [TOK_RBRACE] = "}", diff --git a/src/lexer/token.h b/src/lexer/token.h index 79cbef0..afafad7 100644 --- a/src/lexer/token.h +++ b/src/lexer/token.h @@ -7,18 +7,18 @@ typedef enum { // 关键字 TOK_FN, TOK_LET, TOK_MUT, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_FOR, TOK_IN, TOK_RETURN, - TOK_STRUCT, TOK_TYPE, TOK_ENUM, TOK_IMPL, + TOK_STRUCT, TOK_TYPE, TOK_ENUM, TOK_IMPL, TOK_MATCH, // 类型关键字 TOK_I64, TOK_F64, TOK_BOOL, TOK_STR, TOK_VOID, // 字面量 TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE, TOK_STR_LIT, // 标识符 - TOK_IDENT, + TOK_IDENT, TOK_UNDERSCORE, // 运算符 TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT, TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ, TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, - TOK_ARROW, TOK_DOT_DOT, + TOK_ARROW, TOK_DOT_DOT, TOK_MATCH_ARROW, TOK_PLUS_EQ, TOK_MINUS_EQ, TOK_STAR_EQ, TOK_SLASH_EQ, // 分隔符 TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE, diff --git a/src/parser/parser.c b/src/parser/parser.c index 13cc482..c951322 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -356,6 +356,95 @@ static AstNode* parse_struct_decl(Parser* p, ErrorInfo* error) { farr, fcount, tok_loc(s_tok)); } +// === match 语句解析(脱糖为 let + if-else 链)=== +// match { pat1 => { body1 }, pat2 => { body2 }, _ => { body_default } } +// → { let __match_val = ; if __match_val == pat1 { body1 } else if __match_val == pat2 { body2 } else { body_default } } +static AstNode* parse_match_stmt(Parser* p, ErrorInfo* error) { + const Token* match_tok = advance(p); // 跳过 'match' + + // 解析被匹配的表达式 + AstNode* matched = parse_expr(p, error); + if (!matched) return NULL; + + if (!expect(p, TOK_LBRACE, error, "match 后缺少 '{'")) return NULL; + + // 分配临时变量名 + const char* varname = arena_strdup_impl(p->arena, "__match_val", 12); + + // 收集所有分支 + enum { MAX_ARMS = 64 }; + bool arm_is_wildcard[MAX_ARMS]; + AstNode* arm_pattern[MAX_ARMS]; + AstNode* arm_body[MAX_ARMS]; + int arm_count = 0; + + while (peek(p)->kind != TOK_RBRACE && !error->message) { + if (arm_count >= MAX_ARMS) { + error->message = "match 分支过多 (最多64)"; + error->filename = p->filename; + error->line = peek(p)->line; error->col = peek(p)->col; + return NULL; + } + + if (peek(p)->kind == TOK_UNDERSCORE) { + arm_is_wildcard[arm_count] = true; + arm_pattern[arm_count] = NULL; + advance(p); // 跳过 '_' + } else { + arm_is_wildcard[arm_count] = false; + arm_pattern[arm_count] = parse_expr(p, error); + if (!arm_pattern[arm_count]) return NULL; + } + + // 解析 '=>' + if (!expect(p, TOK_MATCH_ARROW, error, "match 分支缺少 '=>'")) return NULL; + + // 解析分支体(必须是一个代码块) + arm_body[arm_count] = parse_block(p, error); + if (!arm_body[arm_count]) return NULL; + + arm_count++; + + // 跳过可选逗号 + if (peek(p)->kind == TOK_COMMA) advance(p); + } + + if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; + + if (arm_count == 0) { + error->message = "match 表达式至少需要一个分支"; + error->filename = p->filename; + error->line = match_tok->line; error->col = match_tok->col; + return NULL; + } + + // 从最后一个分支往前构建 if-else 链(最后一个分支 = 最内层 else) + AstNode* result = NULL; + for (int i = arm_count - 1; i >= 0; i--) { + if (arm_is_wildcard[i]) { + // 通配符分支:if (true) { body } else { result } + AstNode* true_cond = ast_make_literal_bool(p->arena, true, tok_loc(match_tok)); + result = ast_make_if(p->arena, true_cond, arm_body[i], result, tok_loc(match_tok)); + } else { + // if (__match_val == pattern) { body } else { result } + AstNode* cond = ast_make_binary(p->arena, OP_EQ, + ast_make_ident(p->arena, varname, tok_loc(match_tok)), + arm_pattern[i], tok_loc(match_tok)); + result = ast_make_if(p->arena, cond, arm_body[i], result, tok_loc(match_tok)); + } + } + + // 构建 let __match_val = ; + AstNode* let_stmt = ast_make_let(p->arena, varname, TYPE_UNKNOWN, + false, false, matched, NULL, 0, NULL, 0, tok_loc(match_tok)); + + // 包装为代码块: { let __match_val = ; } + AstNode* stmts_arr[2] = { let_stmt, result }; + AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); + memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*)); + return ast_make_block(p->arena, stmts, 2, tok_loc(match_tok)); +} + // === 语句解析 === static AstNode* parse_block(Parser* p, ErrorInfo* error) { @@ -507,6 +596,10 @@ static AstNode* parse_statement(Parser* p, ErrorInfo* error) { return ast_make_block(p->arena, stmts, 2, tok_loc(t)); } + if (t->kind == TOK_MATCH) { + return parse_match_stmt(p, error); + } + if (t->kind == TOK_RETURN) { advance(p); if (match(p, TOK_SEMICOLON)) { diff --git a/test/programs/20_match.l b/test/programs/20_match.l new file mode 100644 index 0000000..64152b4 --- /dev/null +++ b/test/programs/20_match.l @@ -0,0 +1,11 @@ +enum Color { Red, Green, Blue } + +fn main() -> i64 { + let c = Color::Green; + match c { + Color::Red => { print_i64(10); } + Color::Green => { print_i64(20); } + _ => { print_i64(0); } + } + return 0; +} diff --git a/test/programs/21_match_red.l b/test/programs/21_match_red.l new file mode 100644 index 0000000..42f84da --- /dev/null +++ b/test/programs/21_match_red.l @@ -0,0 +1,11 @@ +enum Color { Red, Green, Blue } + +fn main() -> i64 { + let c = Color::Red; + match c { + Color::Red => { print_i64(100); } + Color::Green => { print_i64(200); } + _ => { print_i64(0); } + } + return 0; +} diff --git a/test/programs/22_match_int.l b/test/programs/22_match_int.l new file mode 100644 index 0000000..5dc3aca --- /dev/null +++ b/test/programs/22_match_int.l @@ -0,0 +1,10 @@ +fn main() -> i64 { + let x = 5; + match x { + 1 => { print_i64(10); } + 2 => { print_i64(20); } + 5 => { print_i64(50); } + _ => { print_i64(99); } + } + return 0; +} diff --git a/test/programs/23_match_wildcard.l b/test/programs/23_match_wildcard.l new file mode 100644 index 0000000..eda4025 --- /dev/null +++ b/test/programs/23_match_wildcard.l @@ -0,0 +1,9 @@ +fn main() -> i64 { + let x = 99; + match x { + 1 => { print_i64(10); } + 2 => { print_i64(20); } + _ => { print_i64(999); } + } + return 0; +}