diff --git a/src/parser/desugar.c b/src/parser/desugar.c new file mode 100644 index 0000000..c3c3be1 --- /dev/null +++ b/src/parser/desugar.c @@ -0,0 +1,109 @@ +#include "desugar.h" +#include +#include + +// === match → let + if-else 链 === +AstNode* desugar_match(Parser* p, const Token* match_tok, + AstNode* matched_expr, + AstNode** arm_patterns, bool* arm_is_wildcard, + AstNode** arm_bodies, int arm_count) { + const char* varname = arena_strdup_impl(p->arena, "__match_val", 12); + + // 从最后一个分支往前构建 if-else 链 + AstNode* result = NULL; + for (int i = arm_count - 1; i >= 0; i--) { + if (arm_is_wildcard[i]) { + AstNode* true_cond = ast_make_literal_bool(p->arena, true, tok_loc(match_tok)); + result = ast_make_if(p->arena, true_cond, arm_bodies[i], result, tok_loc(match_tok)); + } else { + AstNode* cond = ast_make_binary(p->arena, OP_EQ, + ast_make_ident(p->arena, varname, tok_loc(match_tok)), + arm_patterns[i], tok_loc(match_tok)); + result = ast_make_if(p->arena, cond, arm_bodies[i], result, tok_loc(match_tok)); + } + } + + AstNode* let_stmt = ast_make_let(p->arena, varname, TYPE_UNKNOWN, + false, false, matched_expr, NULL, 0, NULL, 0, tok_loc(match_tok)); + + AstNode* stmts_arr[2] = { let_stmt, result }; + AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); + memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*)); + return ast_make_block(p->arena, stmts, 2, tok_loc(match_tok)); +} + +// === guard → if !(cond) { body } === +AstNode* desugar_guard(Parser* p, const Token* guard_tok, + AstNode* cond, AstNode* body) { + AstNode* not_cond = ast_make_unary(p->arena, OP_NOT, cond, tok_loc(guard_tok)); + return ast_make_if(p->arena, not_cond, body, NULL, tok_loc(guard_tok)); +} + +// === for i in start to end { body } → { var i = start; while i < end { body; i = i + 1; } } === +AstNode* desugar_for(Parser* p, const Token* for_tok, + const char* var_name, AstNode* start_expr, + AstNode* end_expr, AstNode* body) { + // var i = start; + AstNode* let_stmt = ast_make_let(p->arena, var_name, TYPE_UNKNOWN, + false, true, start_expr, NULL, 0, NULL, 0, tok_loc(for_tok)); + + // i < end + AstNode* cond = ast_make_binary(p->arena, OP_LT, + ast_make_ident(p->arena, var_name, tok_loc(for_tok)), + end_expr, tok_loc(for_tok)); + + // i = i + 1 + AstNode* incr = ast_make_assign(p->arena, var_name, + ast_make_binary(p->arena, OP_ADD, + ast_make_ident(p->arena, var_name, tok_loc(for_tok)), + ast_make_literal_i64(p->arena, 1, tok_loc(for_tok)), + tok_loc(for_tok)), + tok_loc(for_tok)); + + // 增量追加到循环体末尾 + AstNode** new_stmts = arena_alloc_impl(p->arena, + (body->as.block.stmt_count + 1) * sizeof(AstNode*)); + memcpy(new_stmts, body->as.block.stmts, body->as.block.stmt_count * sizeof(AstNode*)); + new_stmts[body->as.block.stmt_count] = incr; + AstNode* new_body = ast_make_block(p->arena, new_stmts, + body->as.block.stmt_count + 1, body->loc); + + // while i < end { body; i = i + 1; } + AstNode* while_loop = ast_make_while(p->arena, cond, new_body, tok_loc(for_tok)); + + AstNode* stmts_arr[2] = { let_stmt, while_loop }; + AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); + memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*)); + return ast_make_block(p->arena, stmts, 2, tok_loc(for_tok)); +} + +// === if let pattern = expr { then } else { else } === +AstNode* desugar_if_let(Parser* p, const Token* if_tok, + AstNode* pattern, AstNode* match_expr, + AstNode* then_block, AstNode* else_block) { + static int iflet_counter = 0; + char vname_buf[32]; + snprintf(vname_buf, sizeof(vname_buf), "__iflet_%d", iflet_counter++); + const char* vname = arena_strdup_impl(p->arena, vname_buf, strlen(vname_buf)); + + AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, + false, false, match_expr, NULL, 0, NULL, 0, tok_loc(if_tok)); + AstNode* cond = ast_make_binary(p->arena, OP_EQ, + ast_make_ident(p->arena, vname, tok_loc(if_tok)), + pattern, tok_loc(if_tok)); + AstNode* if_stmt = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); + + AstNode* stmts[2] = { let_stmt, if_stmt }; + AstNode** arr = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); + memcpy(arr, stmts, 2 * sizeof(AstNode*)); + return ast_make_block(p->arena, arr, 2, tok_loc(if_tok)); +} + +// === ident += expr → ident = ident + expr === +AstNode* desugar_compound_assign(Parser* p, const Token* name_tok, + BinaryOp binop, AstNode* rhs) { + const char* vname = arena_strdup_impl(p->arena, name_tok->start, name_tok->length); + AstNode* lhs_ident = ast_make_ident(p->arena, vname, tok_loc(name_tok)); + AstNode* bin_expr = ast_make_binary(p->arena, binop, lhs_ident, rhs, tok_loc(name_tok)); + return ast_make_assign(p->arena, vname, bin_expr, tok_loc(name_tok)); +} diff --git a/src/parser/desugar.h b/src/parser/desugar.h new file mode 100644 index 0000000..959571d --- /dev/null +++ b/src/parser/desugar.h @@ -0,0 +1,30 @@ +#ifndef DESUGAR_H +#define DESUGAR_H + +#include "parse_internal.h" + +// match expr { pat1 => body1, ... } → let + if-else 链 +AstNode* desugar_match(Parser* p, const Token* match_tok, + AstNode* matched_expr, + AstNode** arm_patterns, bool* arm_is_wildcard, + AstNode** arm_bodies, int arm_count); + +// guard cond else { body } → if !(cond) { body } +AstNode* desugar_guard(Parser* p, const Token* guard_tok, + AstNode* cond, AstNode* body); + +// for i in start to end { body } → { var i = start; while i < end { body; i = i + 1; } } +AstNode* desugar_for(Parser* p, const Token* for_tok, + const char* var_name, AstNode* start_expr, + AstNode* end_expr, AstNode* body); + +// if let pattern = expr { then } else { else } → { let __match = expr; if __match == pattern ... } +AstNode* desugar_if_let(Parser* p, const Token* if_tok, + AstNode* pattern, AstNode* match_expr, + AstNode* then_block, AstNode* else_block); + +// ident = expr → ident = ident expr +AstNode* desugar_compound_assign(Parser* p, const Token* name_tok, + BinaryOp binop, AstNode* rhs); + +#endif diff --git a/src/parser/expr.c b/src/parser/expr.c index 53e08b0..620292c 100644 --- a/src/parser/expr.c +++ b/src/parser/expr.c @@ -1,4 +1,5 @@ #include "parse_internal.h" +#include "desugar.h" #include #include @@ -287,22 +288,7 @@ AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) { else_block = parse_block(p, error); if (!else_block) return NULL; } - // 去糖: { let __match = expr; if __match == pattern { then } else { else } } - static int iflet_counter = 0; - char vname_buf[32]; - snprintf(vname_buf, sizeof(vname_buf), "__iflet_%d", iflet_counter++); - const char* vname = arena_strdup_impl(p->arena, vname_buf, strlen(vname_buf)); - AstNode* let_stmt = ast_make_let(p->arena, - vname, TYPE_UNKNOWN, - false, false, match_expr, NULL, 0, NULL, 0, tok_loc(if_tok)); - AstNode* cond = ast_make_binary(p->arena, OP_EQ, - ast_make_ident(p->arena, vname, tok_loc(if_tok)), - pattern, tok_loc(if_tok)); - AstNode* if_stmt = ast_make_if(p->arena, cond, then_block, else_block, tok_loc(if_tok)); - AstNode* stmts[2] = { let_stmt, if_stmt }; - AstNode** arr = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); - memcpy(arr, stmts, 2 * sizeof(AstNode*)); - left = ast_make_block(p->arena, arr, 2, tok_loc(if_tok)); + left = desugar_if_let(p, if_tok, pattern, match_expr, then_block, else_block); } else { // if-expr: if cond { then } else { else } AstNode* cond = parse_expr(p, error); diff --git a/src/parser/parser.c b/src/parser/parser.c index 28d5036..f62b772 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -1,4 +1,5 @@ #include "parse_internal.h" +#include "desugar.h" #include #include #include @@ -48,9 +49,6 @@ static AstNode* parse_match_stmt(Parser* p, ErrorInfo* error) { if (!expect(p, TOK_LBRACE, error, "match 后缺少 '{'")) return NULL; - // 分配临时变量名 - const char* varname = arena_strdup_impl(p->arena, "__match_val", 12); - // 收集所有分支 enum { MAX_ARMS = 64 }; bool arm_is_wildcard[MAX_ARMS]; @@ -65,64 +63,29 @@ static AstNode* parse_match_stmt(Parser* p, ErrorInfo* error) { error->line = peek(p)->line; error->col = peek(p)->col; return NULL; } - if (peek(p)->kind == TOK_UNDERSCORE) { arm_is_wildcard[arm_count] = true; arm_pattern[arm_count] = NULL; - advance(p); // 跳过 '_' + advance(p); } else { arm_is_wildcard[arm_count] = false; arm_pattern[arm_count] = parse_expr(p, error); if (!arm_pattern[arm_count]) return NULL; } - - // 解析 '=>' if (!expect(p, TOK_MATCH_ARROW, error, "match 分支缺少 '=>'")) return NULL; - - // 解析分支体(必须是一个代码块) arm_body[arm_count] = parse_block(p, error); if (!arm_body[arm_count]) return NULL; - arm_count++; - - // 跳过可选逗号 if (peek(p)->kind == TOK_COMMA) advance(p); } - if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL; - if (arm_count == 0) { error->message = "match 表达式至少需要一个分支"; error->filename = p->filename; error->line = match_tok->line; error->col = match_tok->col; return NULL; } - - // 从最后一个分支往前构建 if-else 链(最后一个分支 = 最内层 else) - AstNode* result = NULL; - for (int i = arm_count - 1; i >= 0; i--) { - if (arm_is_wildcard[i]) { - // 通配符分支:if (true) { body } else { result } - AstNode* true_cond = ast_make_literal_bool(p->arena, true, tok_loc(match_tok)); - result = ast_make_if(p->arena, true_cond, arm_body[i], result, tok_loc(match_tok)); - } else { - // if (__match_val == pattern) { body } else { result } - AstNode* cond = ast_make_binary(p->arena, OP_EQ, - ast_make_ident(p->arena, varname, tok_loc(match_tok)), - arm_pattern[i], tok_loc(match_tok)); - result = ast_make_if(p->arena, cond, arm_body[i], result, tok_loc(match_tok)); - } - } - - // 构建 let __match_val = ; - AstNode* let_stmt = ast_make_let(p->arena, varname, TYPE_UNKNOWN, - false, false, matched, NULL, 0, NULL, 0, tok_loc(match_tok)); - - // 包装为代码块: { let __match_val = ; } - AstNode* stmts_arr[2] = { let_stmt, result }; - AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); - memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*)); - return ast_make_block(p->arena, stmts, 2, tok_loc(match_tok)); + return desugar_match(p, match_tok, matched, arm_pattern, arm_is_wildcard, arm_body, arm_count); } // === 代码块解析 === @@ -201,67 +164,19 @@ AstNode* parse_statement(Parser* p, ErrorInfo* error) { } if (t->kind == TOK_FOR) { - advance(p); // 跳过 'for' - - // 解析循环变量名 + advance(p); const Token* var_name = expect(p, TOK_IDENT, error, "for 后应为变量名"); if (!var_name) return NULL; - - // 解析 'in' if (!expect(p, TOK_IN, error, "缺少 'in'")) return NULL; - - // 解析起始表达式 AstNode* start_expr = parse_expr(p, error); if (!start_expr) return NULL; - - // 解析 'to' if (!expect(p, TOK_TO, error, "缺少 'to'")) return NULL; - - // 解析结束表达式 AstNode* end_expr = parse_expr(p, error); if (!end_expr) return NULL; - - // 解析循环体 AstNode* body = parse_block(p, error); if (!body) return NULL; - - // 脱糖: for i in start to end { body; } - // → { var i = start; while i < end { body; i = i + 1; } } - const char* vname = arena_strdup_impl(p->arena, var_name->start, var_name->length); - - // 构建: var i = start; - AstNode* let_stmt = ast_make_let(p->arena, vname, TYPE_UNKNOWN, false, true, start_expr, NULL, 0, NULL, 0, tok_loc(var_name)); - - // 构建: i < end (while 条件) - AstNode* cond = ast_make_binary(p->arena, OP_LT, - ast_make_ident(p->arena, vname, tok_loc(var_name)), - end_expr, tok_loc(var_name)); - - // 构建: i = i + 1 (循环增量) - AstNode* incr = ast_make_assign(p->arena, vname, - ast_make_binary(p->arena, OP_ADD, - ast_make_ident(p->arena, vname, tok_loc(var_name)), - ast_make_literal_i64(p->arena, 1, tok_loc(var_name)), - tok_loc(var_name)), - tok_loc(var_name)); - - // 将增量追加到循环体末尾 - AstNode** new_stmts = arena_alloc_impl(p->arena, - (body->as.block.stmt_count + 1) * sizeof(AstNode*)); - memcpy(new_stmts, body->as.block.stmts, body->as.block.stmt_count * sizeof(AstNode*)); - new_stmts[body->as.block.stmt_count] = incr; - AstNode* new_body = ast_make_block(p->arena, new_stmts, - body->as.block.stmt_count + 1, body->loc); - - // 构建: while i < end { ... body ... ; i = i + 1; } - AstNode* while_loop = ast_make_while(p->arena, cond, new_body, tok_loc(t)); - - // 包装: { var i = start; while i < end { ... } } - AstNode* stmts_arr[2] = { let_stmt, while_loop }; - AstNode** stmts = arena_alloc_impl(p->arena, 2 * sizeof(AstNode*)); - memcpy(stmts, stmts_arr, 2 * sizeof(AstNode*)); - return ast_make_block(p->arena, stmts, 2, tok_loc(t)); + return desugar_for(p, t, vname, start_expr, end_expr, body); } if (t->kind == TOK_MATCH) { @@ -269,16 +184,13 @@ AstNode* parse_statement(Parser* p, ErrorInfo* error) { } if (t->kind == TOK_GUARD) { - // guard expr else { ... } → if !(expr) { ... } const Token* guard_tok = advance(p); AstNode* cond = parse_expr(p, error); if (!cond) return NULL; if (!expect(p, TOK_ELSE, error, "guard 缺少 'else'")) return NULL; AstNode* body = parse_block(p, error); if (!body) return NULL; - // 去糖: if !cond { body } - AstNode* not_cond = ast_make_unary(p->arena, OP_NOT, cond, tok_loc(guard_tok)); - return ast_make_if(p->arena, not_cond, body, NULL, tok_loc(guard_tok)); + return desugar_guard(p, guard_tok, cond, body); } if (t->kind == TOK_RETURN) { @@ -333,9 +245,8 @@ AstNode* parse_statement(Parser* p, ErrorInfo* error) { if (t->kind == TOK_IDENT) { TokenKind next_kind = (t + 1)->kind; if (next_kind >= TOK_PLUS_EQ && next_kind <= TOK_SLASH_EQ) { - const Token* name = advance(p); // 消费标识符 + const Token* name = advance(p); TokenKind comp_op = advance(p)->kind; - BinaryOp binop; switch (comp_op) { case TOK_PLUS_EQ: binop = OP_ADD; break; @@ -344,19 +255,10 @@ AstNode* parse_statement(Parser* p, ErrorInfo* error) { case TOK_SLASH_EQ: binop = OP_DIV; break; default: break; } - AstNode* rhs = parse_expr(p, error); if (!rhs) return NULL; if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL; - - AstNode* lhs_ident = ast_make_ident(p->arena, - arena_strdup_impl(p->arena, name->start, name->length), - tok_loc(name)); - AstNode* bin_expr = ast_make_binary(p->arena, binop, lhs_ident, rhs, - tok_loc(name)); - return ast_make_assign(p->arena, - arena_strdup_impl(p->arena, name->start, name->length), - bin_expr, tok_loc(name)); + return desugar_compound_assign(p, name, binop, rhs); } }