Files
l-language/docs/superpowers/plans/2026-06-04-l-lang-v0.1.md
Serendipity 3b7bab1e1b feat: L Language v0.1 编译器完整实现
5 阶段编译流水线: 词法分析 → 语法分析(Pratt) → 语义分析(类型推断) → LLVM IR → .exe

模块:
- lexer: 手写状态机, 40 种 Token, // 和 /* */ 注释
- parser: Pratt 表达式解析(9 级优先级) + 递归下降语句/函数
- ast: 14 种节点类型 + 工厂函数
- sema: 作用域链符号表 + 类型推断 + 类型检查
- codegen: AST → LLVM-C API, print_i64/f64/bool 内建
- driver: 命令行 + 流水线串联 + 错误报告
- util: Arena bump allocator (8MB)

测试: 65 单元测试(词法41+语法15+语义9) + 5 集成测试 全部通过

语言特性: i64/f64/bool/void, let不可变变量, if/else, while, 递归函数
2026-06-05 00:26:59 +08:00

78 KiB
Raw Permalink Blame History

L Language v0.1 实现计划

For agentic workers: REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (- [ ]) syntax for tracking.

Goal: 用 C + LLVM 构建 L Language 编译器的完整 v0.1(计算器级别 → 斐波那契可执行文件)

Architecture: 经典 5 阶段编译器流水线 — 词法分析(Token) → 语法解析(AST) → 语义分析(类型标注) → LLVM IR 生成 → 链接成可执行文件。手写递归下降+Pratt解析器,Arena 内存池简化内存管理。

Tech Stack: C17, GCC (MinGW), CMake ≥ 3.20, LLVM 19.x (C API), Windows 11


文件蓝图

L Language/
├── CMakeLists.txt                    构建系统
├── include/
│   └── l_lang.h                      全局类型定义(Token、AST、类型枚举共享)
├── src/
│   ├── util/
│   │   ├── arena.h                   内存池声明
│   │   └── arena.c                   内存池实现(bump allocator
│   ├── lexer/
│   │   ├── token.h                   Token 结构体 + 创建函数
│   │   ├── token.c                   Token 实现
│   │   ├── lexer.h                   词法分析器声明
│   │   └── lexer.c                   核心词法分析(状态机)
│   ├── ast/
│   │   ├── ast.h                     AST 节点类型枚举 + 结构体
│   │   └── ast.c                     节点创建/销毁
│   ├── parser/
│   │   ├── parser.h                  解析器声明
│   │   └── parser.c                  递归下降 + Pratt 表达式解析
│   ├── sema/
│   │   ├── symbol.h                  符号表声明
│   │   ├── symbol.c                  符号表实现(作用域链)
│   │   ├── sema.h                    语义分析声明
│   │   └── sema.c                    类型推断 + 类型检查
│   ├── codegen/
│   │   ├── codegen.h                 代码生成声明
│   │   └── codegen.c                 AST → LLVM IR
│   ├── driver/
│   │   ├── error.h                   错误报告声明
│   │   ├── error.c                   错误格式化输出
│   │   └── main.c                    入口 + 命令行 + 流水线串联
├── test/
│   ├── test_lexer.c                  词法分析单元测试
│   ├── test_parser.c                 语法分析单元测试
│   ├── test_sema.c                   语义分析单元测试
│   ├── test_utils.h                  测试断言宏
│   └── programs/
│       ├── 01_arithmetic.l           四则运算 + print_i64
│       ├── 02_if_else.l              if/else 控制流
│       ├── 03_while.l                递归 + if 控制流组合
│       ├── 04_fib_recursive.l        斐波那契递归
│       └── 05_float.l               浮点运算 + 多函数调用

Task 1: 项目骨架和构建系统

Files:

  • Create: CMakeLists.txt

  • Create: include/l_lang.h

  • Create: src/util/arena.h, src/util/arena.c

  • Create: src/driver/error.h, src/driver/error.c

  • Step 1: 编写 CMakeLists.txt

cmake_minimum_required(VERSION 3.20)
project(l_lang C)

set(CMAKE_C_STANDARD 17)
set(CMAKE_C_STANDARD_REQUIRED ON)

# 查找 LLVM
find_package(LLVM 19 REQUIRED CONFIG)

message(STATUS "LLVM found: ${LLVM_DIR}")
message(STATUS "LLVM includes: ${LLVM_INCLUDE_DIRS}")
message(STATUS "LLVM libraries: ${LLVM_AVAILABLE_LIBS}")

# 收集源文件
file(GLOB_RECURSE L_LANG_SOURCES "src/*.c")

# 编译器可执行文件
add_executable(l_lang ${L_LANG_SOURCES})

# 包含目录
target_include_directories(l_lang PRIVATE
    ${CMAKE_SOURCE_DIR}/include
    ${LLVM_INCLUDE_DIRS}
    src/util
    src/lexer
    src/ast
    src/parser
    src/sema
    src/codegen
    src/driver
)

target_link_libraries(l_lang
    LLVM
)

# 编译选项
target_compile_options(l_lang PRIVATE -Wall -Wextra -g)

# 测试可执行文件
file(GLOB TEST_SOURCES "test/test_*.c")
add_executable(l_lang_test ${TEST_SOURCES} ${L_LANG_SOURCES})
target_include_directories(l_lang_test PRIVATE
    ${CMAKE_SOURCE_DIR}/include
    ${LLVM_INCLUDE_DIRS}
    src/util src/lexer src/ast src/parser src/sema src/codegen src/driver
    test
)
target_link_libraries(l_lang_test LLVM)
  • Step 2: 编写公共头文件 include/l_lang.h
#ifndef L_LANG_H
#define L_LANG_H

#include <stddef.h>
#include <stdbool.h>
#include <stdint.h>

// === 类型系统 ===
typedef enum {
    TYPE_I64,
    TYPE_F64,
    TYPE_BOOL,
    TYPE_VOID,
    TYPE_UNKNOWN,   // 尚未推断
    TYPE_ERROR,      // 类型错误
} TypeKind;

static inline const char* type_name(TypeKind kind) {
    switch (kind) {
        case TYPE_I64:  return "i64";
        case TYPE_F64:  return "f64";
        case TYPE_BOOL: return "bool";
        case TYPE_VOID: return "void";
        default:        return "<unknown>";
    }
}

// === 向前声明 ===
typedef struct Token Token;
typedef struct AstNode AstNode;
typedef struct Scope Scope;
typedef struct Arena Arena;

// === 跨模块分配器接口(避免循环依赖,各模块通过 void* 使用 arena===
void* arena_alloc_impl(void* alloc, size_t size);
char* arena_strdup_impl(void* alloc, const char* src, size_t len);

#endif
  • Step 3: 编写 Arena 内存池 src/util/arena.h
#ifndef ARENA_H
#define ARENA_H

#include <stddef.h>

typedef struct {
    char*  memory;
    size_t capacity;
    size_t offset;
} Arena;

Arena arena_create(size_t capacity_mb);
void  arena_destroy(Arena* a);
void* arena_alloc(Arena* a, size_t size);
char* arena_strdup(Arena* a, const char* src);

#endif
  • Step 4: 编写 Arena 实现 src/util/arena.c
#include "arena.h"
#include <stdlib.h>
#include <string.h>

Arena arena_create(size_t capacity_mb) {
    Arena a;
    a.capacity = capacity_mb * 1024 * 1024;
    a.memory = (char*)malloc(a.capacity);
    a.offset = 0;
    return a;
}

void arena_destroy(Arena* a) {
    free(a->memory);
    a->memory = NULL;
    a->capacity = 0;
    a->offset = 0;
}

void* arena_alloc(Arena* a, size_t size) {
    size = (size + 7) & ~7;  // 8 字节对齐
    if (a->offset + size > a->capacity) return NULL;
    void* ptr = a->memory + a->offset;
    a->offset += size;
    return ptr;
}

char* arena_strdup(Arena* a, const char* src) {
    size_t len = strlen(src) + 1;
    char* dst = arena_alloc(a, len);
    memcpy(dst, src, len);
    return dst;
}
  • Step 5: 编写错误报告 src/driver/error.h
#ifndef ERROR_H
#define ERROR_H

#include <stddef.h>

typedef struct {
    const char* message;
    const char* filename;
    int         line;
    int         col;
} ErrorInfo;

typedef struct {
    ErrorInfo* errors;
    size_t     count;
    size_t     capacity;
} ErrorList;

void error_init(ErrorList* list);
void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...);
void error_print(const ErrorList* list);

#endif
  • Step 6: 编写错误实现 src/driver/error.c
#include "error.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>

void error_init(ErrorList* list) {
    list->capacity = 8;
    list->errors = malloc(list->capacity * sizeof(ErrorInfo));
    list->count = 0;
}

void error_add(ErrorList* list, const char* filename, int line, int col, const char* fmt, ...) {
    if (list->count >= list->capacity) {
        list->capacity *= 2;
        list->errors = realloc(list->errors, list->capacity * sizeof(ErrorInfo));
    }
    char buf[512];
    va_list args;
    va_start(args, fmt);
    vsnprintf(buf, sizeof(buf), fmt, args);
    va_end(args);
    list->errors[list->count++] = (ErrorInfo){
        .message = strdup(buf),
        .filename = filename,
        .line = line,
        .col = col,
    };
}

void error_print(const ErrorList* list) {
    for (size_t i = 0; i < list->count; i++) {
        ErrorInfo* e = &list->errors[i];
        fprintf(stderr, "\033[1;31m错误:\033[0m %s:%d:%d: %s\n",
                e->filename, e->line, e->col, e->message);
    }
}
  • Step 7: 配置构建并验证编译
cd "D:\Code\doing_exercises\programs\L Language"
mkdir -p build && cd build
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM" 2>&1

Expected: cmake 配置成功,输出 "LLVM found"。

  • Step 8: 构建骨架项目
mingw32-make -j4

Expected: 编译通过,生成 l_lang.exe(暂时无功能)。


Task 2: Token 数据结构

Files:

  • Create: src/lexer/token.h, src/lexer/token.c

  • Step 1: 编写 src/lexer/token.h

#ifndef TOKEN_H
#define TOKEN_H

#include "l_lang.h"

// === Token 类型枚举 ===
typedef enum {
    // 关键字
    TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE, TOK_RETURN,
    // 类型关键字
    TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID,
    // 字面量
    TOK_INT_LIT, TOK_FLOAT_LIT, TOK_TRUE, TOK_FALSE,
    // 标识符
    TOK_IDENT,
    // 运算符
    TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT,
    TOK_EQ, TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ,
    TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG,
    TOK_ARROW,
    // 分隔符
    TOK_LPAREN, TOK_RPAREN, TOK_LBRACE, TOK_RBRACE,
    TOK_COMMA, TOK_COLON, TOK_SEMICOLON, TOK_ASSIGN,
    // 特殊
    TOK_EOF, TOK_ERROR,
} TokenKind;

// === Token 结构体 ===
struct Token {
    TokenKind   kind;
    const char* start;      // 指向源码中 token 起始位置
    int         length;     // token 文本长度
    int         line;
    int         col;
};

// === 工具函数 ===
const char* tok_name(TokenKind kind);
bool        tok_is_type(TokenKind kind);

// 从 Token 提取值(必须在同一个 arena 中)
int64_t  tok_int_value(const Token* tok);
double   tok_float_value(const Token* tok);

#endif
  • Step 2: 编写 src/lexer/token.c
#include "token.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>

static const char* NAMES[] = {
    [TOK_FN] = "fn", [TOK_LET] = "let", [TOK_IF] = "if",
    [TOK_ELSE] = "else", [TOK_WHILE] = "while", [TOK_RETURN] = "return",
    [TOK_I64] = "i64", [TOK_F64] = "f64", [TOK_BOOL] = "bool", [TOK_VOID] = "void",
    [TOK_INT_LIT] = "整数", [TOK_FLOAT_LIT] = "浮点数",
    [TOK_TRUE] = "true", [TOK_FALSE] = "false",
    [TOK_IDENT] = "标识符",
    [TOK_PLUS] = "+", [TOK_MINUS] = "-", [TOK_STAR] = "*",
    [TOK_SLASH] = "/", [TOK_PERCENT] = "%",
    [TOK_EQ] = "=", [TOK_EQ_EQ] = "==", [TOK_BANG_EQ] = "!=",
    [TOK_LT] = "<", [TOK_GT] = ">", [TOK_LT_EQ] = "<=", [TOK_GT_EQ] = ">=",
    [TOK_AND_AND] = "&&", [TOK_PIPE_PIPE] = "||", [TOK_BANG] = "!",
    [TOK_ARROW] = "->",
    [TOK_LPAREN] = "(", [TOK_RPAREN] = ")",
    [TOK_LBRACE] = "{", [TOK_RBRACE] = "}",
    [TOK_COMMA] = ",", [TOK_COLON] = ":", [TOK_SEMICOLON] = ";",
    [TOK_ASSIGN] = "=",
    [TOK_EOF] = "EOF", [TOK_ERROR] = "错误",
};

const char* tok_name(TokenKind kind) {
    return NAMES[kind];
}

bool tok_is_type(TokenKind kind) {
    return kind == TOK_I64 || kind == TOK_F64 || kind == TOK_BOOL || kind == TOK_VOID;
}

int64_t tok_int_value(const Token* tok) {
    char buf[32];
    memcpy(buf, tok->start, tok->length);
    buf[tok->length] = '\0';
    return strtoll(buf, NULL, 10);
}

double tok_float_value(const Token* tok) {
    char buf[64];
    memcpy(buf, tok->start, tok->length);
    buf[tok->length] = '\0';
    return strtod(buf, NULL);
}

Task 3: 词法分析器

Files:

  • Create: src/lexer/lexer.h, src/lexer/lexer.c

  • Step 1: 编写 src/lexer/lexer.h

#ifndef LEXER_H
#define LEXER_H

#include "token.h"
#include "arena.h"
#include "error.h"

// 返回 Token 数组(分配在 arena 中),*count 为数量。
// 如遇错误,error 被填充并返回 NULL。
Token* lex(Arena* a, const char* source, const char* filename,
           size_t* count, ErrorInfo* error);

#endif
  • Step 2: 实现核心词法分析器 src/lexer/lexer.c
#include "lexer.h"
#include <ctype.h>
#include <string.h>

typedef struct {
    const char* src;
    const char* filename;
    int         pos;
    int         line;
    int         col;
} Lexer;

static char peek(const Lexer* l) { return l->src[l->pos]; }
static char peek_next(const Lexer* l) { return l->src[l->pos + 1]; }
static void advance(Lexer* l) {
    if (l->src[l->pos] == '\n') { l->line++; l->col = 1; }
    else { l->col++; }
    l->pos++;
}
static void skip_whitespace(Lexer* l) {
    while (1) {
        char c = peek(l);
        if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { advance(l); continue; }
        if (c == '/' && peek_next(l) == '/') {
            while (peek(l) != '\n' && peek(l) != '\0') advance(l);
            continue;
        }
        if (c == '/' && peek_next(l) == '*') {
            advance(l); advance(l);
            while (peek(l) != '\0' && !(peek(l) == '*' && peek_next(l) == '/')) advance(l);
            if (peek(l) != '\0') { advance(l); advance(l); }  // skip */
            continue;
        }
        break;
    }
}

static Token make_token(Lexer* l, TokenKind kind, int start_pos, int len) {
    return (Token){.kind = kind, .start = l->src + start_pos,
                   .length = len, .line = l->line, .col = l->col};
}

static Token lex_number(Lexer* l) {
    int start = l->pos;
    bool is_float = false;
    TokenKind kind = TOK_INT_LIT;
    while (isdigit(peek(l))) advance(l);
    if (peek(l) == '.') {
        is_float = true; kind = TOK_FLOAT_LIT; advance(l);
        while (isdigit(peek(l))) advance(l);
    }
    return make_token(l, kind, start, l->pos - start);
}

static TokenKind check_keyword(const Token* tok) {
    #define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k
    KW("fn",     TOK_FN);     KW("let",    TOK_LET);
    KW("if",     TOK_IF);     KW("else",   TOK_ELSE);
    KW("while",  TOK_WHILE);  KW("return", TOK_RETURN);
    KW("i64",    TOK_I64);    KW("f64",    TOK_F64);
    KW("bool",   TOK_BOOL);   KW("void",   TOK_VOID);
    KW("true",   TOK_TRUE);   KW("false",  TOK_FALSE);
    #undef KW
    return TOK_IDENT;
}

static Token lex_ident_or_keyword(Lexer* l) {
    int start = l->pos;
    while (isalnum(peek(l)) || peek(l) == '_') advance(l);
    Token t = make_token(l, TOK_IDENT, start, l->pos - start);
    t.kind = check_keyword(&t);
    return t;
}

Token* lex(Arena* a, const char* source, const char* filename,
           size_t* count, ErrorInfo* error) {
    Lexer l = {.src = source, .filename = filename, .pos = 0, .line = 1, .col = 1};
    // 预估容量:源码长度的 1/3
    size_t cap = strlen(source) / 3 + 16;
    Token* tokens = arena_alloc(a, cap * sizeof(Token));
    size_t idx = 0;

    while (peek(&l) != '\0') {
        skip_whitespace(&l);
        if (peek(&l) == '\0') break;

        int line = l.line, col = l.col;
        char c = peek(&l);

        #define TOK(k) (tokens[idx++] = make_token(&l, k, l.pos, 1), advance(&l))
        #define TOK2(k, len) (tokens[idx++] = make_token(&l, k, l.pos, len), advance(&l); advance(&l))

        if (isdigit(c)) { tokens[idx++] = lex_number(&l); }
        else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); }
        else if (c == '+' && peek_next(&l) != '=') TOK(TOK_PLUS)
        else if (c == '-' && peek_next(&l) != '>') TOK(TOK_MINUS)
        else if (c == '-' && peek_next(&l) == '>') TOK2(TOK_ARROW, 2)
        else if (c == '*') TOK(TOK_STAR)
        else if (c == '/') TOK(TOK_SLASH)
        else if (c == '%') TOK(TOK_PERCENT)
        else if (c == '=' && peek_next(&l) == '=') TOK2(TOK_EQ_EQ, 2)
        else if (c == '=') TOK(TOK_ASSIGN)
        else if (c == '!' && peek_next(&l) == '=') TOK2(TOK_BANG_EQ, 2)
        else if (c == '!') TOK(TOK_BANG)
        else if (c == '<' && peek_next(&l) == '=') TOK2(TOK_LT_EQ, 2)
        else if (c == '<') TOK(TOK_LT)
        else if (c == '>' && peek_next(&l) == '=') TOK2(TOK_GT_EQ, 2)
        else if (c == '>') TOK(TOK_GT)
        else if (c == '&' && peek_next(&l) == '&') TOK2(TOK_AND_AND, 2)
        else if (c == '|' && peek_next(&l) == '|') TOK2(TOK_PIPE_PIPE, 2)
        else if (c == '(') TOK(TOK_LPAREN)
        else if (c == ')') TOK(TOK_RPAREN)
        else if (c == '{') TOK(TOK_LBRACE)
        else if (c == '}') TOK(TOK_RBRACE)
        else if (c == ',') TOK(TOK_COMMA)
        else if (c == ':') TOK(TOK_COLON)
        else if (c == ';') TOK(TOK_SEMICOLON)
        else {
            *error = (ErrorInfo){
                .message = "无法识别的字符",
                .filename = filename, .line = line, .col = col
            };
            return NULL;
        }
        #undef TOK
        #undef TOK2
    }
    tokens[idx++] = make_token(&l, TOK_EOF, l.pos, 0);
    *count = idx;
    return tokens;
}
  • Step 3: 编写词法分析测试 test/test_lexer.c
#include "test_utils.h"
#include "lexer.h"
#include "arena.h"

void test_simple_tokens() {
    Arena a = arena_create(1);
    const char* src = "fn main() { return 42; }";
    size_t count; ErrorInfo error = {0};
    Token* tokens = lex(&a, src, "test", &count, &error);
    ASSERT(tokens != NULL);
    ASSERT(count >= 8);
    ASSERT(tokens[0].kind == TOK_FN);
    ASSERT(tokens[1].kind == TOK_IDENT);
    ASSERT(tokens[2].kind == TOK_LPAREN);
    ASSERT(tokens[3].kind == TOK_RPAREN);
    ASSERT(tokens[4].kind == TOK_LBRACE);
    ASSERT(tokens[5].kind == TOK_RETURN);
    ASSERT(tokens[6].kind == TOK_INT_LIT);
    ASSERT(tok_int_value(&tokens[6]) == 42);
    arena_destroy(&a);
}

void test_keywords() {
    Arena a = arena_create(1);
    const char* src = "fn let if else while return i64 f64 bool void true false";
    TokenKind expected[] = {TOK_FN, TOK_LET, TOK_IF, TOK_ELSE, TOK_WHILE,
        TOK_RETURN, TOK_I64, TOK_F64, TOK_BOOL, TOK_VOID, TOK_TRUE, TOK_FALSE, TOK_EOF};
    size_t count; ErrorInfo error = {0};
    Token* tokens = lex(&a, src, "test", &count, &error);
    ASSERT(tokens != NULL);
    for (int i = 0; i < 13; i++) ASSERT(tokens[i].kind == expected[i]);
    arena_destroy(&a);
}

void test_operators() {
    Arena a = arena_create(1);
    const char* src = "+ - * / % == != < > <= >= && || ! ->";
    TokenKind expected[] = {TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_PERCENT,
        TOK_EQ_EQ, TOK_BANG_EQ, TOK_LT, TOK_GT, TOK_LT_EQ, TOK_GT_EQ,
        TOK_AND_AND, TOK_PIPE_PIPE, TOK_BANG, TOK_ARROW, TOK_EOF};
    size_t count; ErrorInfo error = {0};
    Token* tokens = lex(&a, src, "test", &count, &error);
    ASSERT(tokens != NULL);
    for (int i = 0; i < 16; i++) ASSERT(tokens[i].kind == expected[i]);
    arena_destroy(&a);
}

int main(void) {
    TEST_RUN(test_simple_tokens);
    TEST_RUN(test_keywords);
    TEST_RUN(test_operators);
    return test_summary();
}
  • Step 4: 编写测试工具宏 test/test_utils.h
#ifndef TEST_UTILS_H
#define TEST_UTILS_H
#include <stdio.h>
#include <stdlib.h>

static int _tests_run = 0;
static int _tests_failed = 0;

#define ASSERT(expr) do { \
    _tests_run++; \
    if (!(expr)) { \
        fprintf(stderr, "\033[1;31mFAIL\033[0m %s:%d: %s\n", __FILE__, __LINE__, #expr); \
        _tests_failed++; \
    } \
} while(0)

#define TEST_RUN(func) do { \
    fprintf(stderr, "  RUN  %s\n", #func); \
    func(); \
} while(0)

static inline int test_summary(void) {
    fprintf(stderr, "\n%d tests, %d passed, %d failed\n",
            _tests_run, _tests_run - _tests_failed, _tests_failed);
    return _tests_failed > 0 ? 1 : 0;
}
#endif
  • Step 5: 构建并运行词法测试
cd "D:\Code\doing_exercises\programs\L Language\build"
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM"
mingw32-make -j4
./l_lang_test.exe

Expected: 3 个测试全部 PASS。


Task 4: AST 数据结构

Files:

  • Create: src/ast/ast.h, src/ast/ast.c

  • Step 1: 编写 src/ast/ast.h

#ifndef AST_H
#define AST_H

#include "l_lang.h"
#include <stddef.h>

typedef enum {
    AST_PROGRAM,
    AST_FUNCTION,
    AST_PARAMETER,
    AST_BLOCK,
    AST_LET_STMT,
    AST_IF_STMT,
    AST_WHILE_STMT,
    AST_RETURN_STMT,
    AST_EXPR_STMT,
    AST_BINARY_EXPR,
    AST_UNARY_EXPR,
    AST_CALL_EXPR,
    AST_LITERAL_EXPR,
    AST_IDENT_EXPR,
} AstKind;

typedef enum {
    OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD,
    OP_EQ, OP_NE, OP_LT, OP_GT, OP_LE, OP_GE,
    OP_AND, OP_OR,
    OP_NEG, OP_NOT,
} BinaryOp;

// 类型信息(语义分析阶段填充)
typedef struct {
    TypeKind kind;
} TypeInfo;

// AST 节点
struct AstNode {
    AstKind   kind;
    TypeInfo  type;         // 语义分析后填充,默认为 TYPE_UNKNOWN
    int       line;         // 源文件行号
    int       col;          // 源文件列号

    // 节点特有数据(按 kind 解释)
    union {
        // AST_PROGRAM
        struct { AstNode** functions; size_t fn_count; } program;
        // AST_FUNCTION
        struct { const char* name; AstNode** params; size_t param_count;
                 TypeKind return_type; AstNode* body; } function;
        // AST_PARAMETER
        struct { const char* name; TypeKind type; } parameter;
        // AST_BLOCK
        struct { AstNode** stmts; size_t stmt_count; } block;
        // AST_LET_STMT
        struct { const char* name; AstNode* init; } let_stmt;
        // AST_IF_STMT
        struct { AstNode* cond; AstNode* then_block; AstNode* else_block; } if_stmt;
        // AST_WHILE_STMT
        struct { AstNode* cond; AstNode* body; } while_stmt;
        // AST_RETURN_STMT
        struct { AstNode* expr; } return_stmt;
        // AST_EXPR_STMT
        struct { AstNode* expr; } expr_stmt;
        // AST_BINARY_EXPR
        struct { BinaryOp op; AstNode* left; AstNode* right; } binary;
        // AST_UNARY_EXPR
        struct { BinaryOp op; AstNode* operand; } unary;
        // AST_CALL_EXPR
        struct { const char* name; AstNode** args; size_t arg_count; } call;
        // AST_LITERAL_EXPR
        struct { TypeKind lit_type; union { int64_t i64_val; double f64_val; bool bool_val; }; } literal;
        // AST_IDENT_EXPR
        struct { const char* name; } ident;
    } as;
};

// 创建节点的辅助函数(内存来自 arena)
AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col);
AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount,
                           TypeKind ret, AstNode* body, int line, int col);
AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col);
AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col);
AstNode* ast_make_let(void* alloc, const char* name, AstNode* init, int line, int col);
AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col);
AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col);
AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col);
AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col);
AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col);
AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col);
AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col);
AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col);
AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col);
AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col);
AstNode* ast_make_ident(void* alloc, const char* name, int line, int col);

#endif
  • Step 2: 编写 src/ast/ast.c
#include "ast.h"
#include "arena.h"
#include <string.h>

// === 跨模块分配器(供 parser.c、symbol.c 等复用)===
void* arena_alloc_impl(void* alloc, size_t sz) {
    return arena_alloc((Arena*)alloc, sz);
}

char* arena_strdup_impl(void* alloc, const char* src, size_t len) {
    char* dst = arena_alloc_impl(alloc, len + 1);
    memcpy(dst, src, len);
    dst[len] = '\0';
    return dst;
}

// 使用宏简化节点创建
#define NEW(alloc, kind) AstNode* n = (AstNode*)arena_alloc_impl(alloc, sizeof(AstNode)); \
                         n->kind = (kind); n->type.kind = TYPE_UNKNOWN; \
                         n->line = line; n->col = col

static void* arena_alloc_impl(void* alloc, size_t sz);  // forward

AstNode* ast_make_program(void* alloc, AstNode** fns, size_t count, int line, int col) {
    NEW(alloc, AST_PROGRAM);
    n->as.program.functions = fns;
    n->as.program.fn_count = count;
    return n;
}

AstNode* ast_make_function(void* alloc, const char* name, AstNode** params, size_t pcount,
                           TypeKind ret, AstNode* body, int line, int col) {
    NEW(alloc, AST_FUNCTION);
    n->as.function.name = name; n->as.function.params = params;
    n->as.function.param_count = pcount; n->as.function.return_type = ret;
    n->as.function.body = body;
    return n;
}

AstNode* ast_make_parameter(void* alloc, const char* name, TypeKind type, int line, int col) {
    NEW(alloc, AST_PARAMETER);
    n->as.parameter.name = name; n->as.parameter.type = type;
    return n;
}

AstNode* ast_make_block(void* alloc, AstNode** stmts, size_t count, int line, int col) {
    NEW(alloc, AST_BLOCK);
    n->as.block.stmts = stmts; n->as.block.stmt_count = count;
    return n;
}

AstNode* ast_make_let(void* alloc, const char* name, AstNode* init, int line, int col) {
    NEW(alloc, AST_LET_STMT);
    n->as.let_stmt.name = name; n->as.let_stmt.init = init;
    return n;
}

AstNode* ast_make_if(void* alloc, AstNode* cond, AstNode* then_b, AstNode* else_b, int line, int col) {
    NEW(alloc, AST_IF_STMT);
    n->as.if_stmt.cond = cond; n->as.if_stmt.then_block = then_b;
    n->as.if_stmt.else_block = else_b;
    return n;
}

AstNode* ast_make_while(void* alloc, AstNode* cond, AstNode* body, int line, int col) {
    NEW(alloc, AST_WHILE_STMT);
    n->as.while_stmt.cond = cond; n->as.while_stmt.body = body;
    return n;
}

AstNode* ast_make_return(void* alloc, AstNode* expr, int line, int col) {
    NEW(alloc, AST_RETURN_STMT);
    n->as.return_stmt.expr = expr;
    return n;
}

AstNode* ast_make_expr_stmt(void* alloc, AstNode* expr, int line, int col) {
    NEW(alloc, AST_EXPR_STMT);
    n->as.expr_stmt.expr = expr;
    return n;
}

AstNode* ast_make_binary(void* alloc, BinaryOp op, AstNode* left, AstNode* right, int line, int col) {
    NEW(alloc, AST_BINARY_EXPR);
    n->as.binary.op = op; n->as.binary.left = left; n->as.binary.right = right;
    return n;
}

AstNode* ast_make_unary(void* alloc, BinaryOp op, AstNode* operand, int line, int col) {
    NEW(alloc, AST_UNARY_EXPR);
    n->as.unary.op = op; n->as.unary.operand = operand;
    return n;
}

AstNode* ast_make_call(void* alloc, const char* name, AstNode** args, size_t count, int line, int col) {
    NEW(alloc, AST_CALL_EXPR);
    n->as.call.name = name; n->as.call.args = args; n->as.call.arg_count = count;
    return n;
}

AstNode* ast_make_literal_i64(void* alloc, int64_t val, int line, int col) {
    NEW(alloc, AST_LITERAL_EXPR);
    n->as.literal.lit_type = TYPE_I64; n->as.literal.i64_val = val;
    n->type.kind = TYPE_I64;
    return n;
}

AstNode* ast_make_literal_f64(void* alloc, double val, int line, int col) {
    NEW(alloc, AST_LITERAL_EXPR);
    n->as.literal.lit_type = TYPE_F64; n->as.literal.f64_val = val;
    n->type.kind = TYPE_F64;
    return n;
}

AstNode* ast_make_literal_bool(void* alloc, bool val, int line, int col) {
    NEW(alloc, AST_LITERAL_EXPR);
    n->as.literal.lit_type = TYPE_BOOL; n->as.literal.bool_val = val;
    n->type.kind = TYPE_BOOL;
    return n;
}

AstNode* ast_make_ident(void* alloc, const char* name, int line, int col) {
    NEW(alloc, AST_IDENT_EXPR);
    n->as.ident.name = name;
    return n;
}
  • Step 3: 验证编译
cd "D:\Code\doing_exercises\programs\L Language\build"
mingw32-make -j4

Expected: 编译通过(含 AST 模块)。


Task 5: 语法分析器 — 表达式(Pratt Parser

Files:

  • Create: src/parser/parser.h, src/parser/parser.c

  • Step 1: 编写 src/parser/parser.h

#ifndef PARSER_H
#define PARSER_H

#include "ast.h"
#include "token.h"
#include "error.h"

// 解析 Token 数组,返回 Program 节点(内存来自 arena)。
// 出错时 error 被填充并返回 NULL。
AstNode* parse(Arena* a, const Token* tokens, size_t count,
               const char* filename, ErrorInfo* error);

#endif
  • Step 2: 实现解析器 src/parser/parser.c(第一部分:Pratt 表达式解析)
#include "parser.h"
#include <string.h>
#include <stdlib.h>

typedef struct {
    const Token* tokens;
    size_t       count;
    size_t       pos;
    const char*  filename;
    Arena*       arena;
} Parser;

// === 向前看 ===
static const Token* peek(const Parser* p) { return &p->tokens[p->pos]; }
static const Token* peek_n(const Parser* p, int n) { return &p->tokens[p->pos + n]; }
static const Token* advance(Parser* p) { return &p->tokens[p->pos++]; }
static bool match(Parser* p, TokenKind k) {
    if (peek(p)->kind == k) { p->pos++; return true; }
    return false;
}
static const Token* expect(Parser* p, TokenKind k, ErrorInfo* e, const char* msg) {
    if (peek(p)->kind == k) return advance(p);
    e->message = msg; e->filename = p->filename;
    e->line = peek(p)->line; e->col = peek(p)->col;
    return NULL;
}

// === 运算符优先级定义 ===
typedef enum {
    PREC_NONE = 0,
    PREC_ASSIGN = 10,
    PREC_OR = 20,
    PREC_AND = 30,
    PREC_COMPARE = 40,   // == != < > <= >=
    PREC_TERM = 50,      // + -
    PREC_FACTOR = 60,    // * / %
    PREC_UNARY = 70,     // - !
    PREC_CALL = 80,
} Precedence;

static Precedence tok_to_prec(TokenKind kind) {
    switch (kind) {
        case TOK_PIPE_PIPE: return PREC_OR;
        case TOK_AND_AND:   return PREC_AND;
        case TOK_EQ_EQ: case TOK_BANG_EQ:
        case TOK_LT: case TOK_GT: case TOK_LT_EQ: case TOK_GT_EQ: return PREC_COMPARE;
        case TOK_PLUS: case TOK_MINUS: return PREC_TERM;
        case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return PREC_FACTOR;
        default: return PREC_NONE;
    }
}

static BinaryOp tok_to_binop(TokenKind kind) {
    switch (kind) {
        case TOK_PLUS: return OP_ADD; case TOK_MINUS: return OP_SUB;
        case TOK_STAR: return OP_MUL; case TOK_SLASH: return OP_DIV;
        case TOK_PERCENT: return OP_MOD;
        case TOK_EQ_EQ: return OP_EQ; case TOK_BANG_EQ: return OP_NE;
        case TOK_LT: return OP_LT; case TOK_GT: return OP_GT;
        case TOK_LT_EQ: return OP_LE; case TOK_GT_EQ: return OP_GE;
        case TOK_AND_AND: return OP_AND; case TOK_PIPE_PIPE: return OP_OR;
        default: return OP_ADD;
    }
}

// 向前声明
static AstNode* parse_expr(Parser* p, ErrorInfo* error);
static AstNode* parse_expr_prec(Parser* p, Precedence prec, ErrorInfo* error);
static AstNode* parse_block(Parser* p, ErrorInfo* error);

// === 前缀解析:nil、前缀一元运算符、基本表达式 ===
static AstNode* parse_unary(Parser* p, ErrorInfo* error) {
    const Token* op = advance(p);
    AstNode* operand = parse_expr_prec(p, PREC_UNARY, error);
    if (!operand) return NULL;
    BinaryOp uop = (op->kind == TOK_MINUS) ? OP_NEG : OP_NOT;
    return ast_make_unary(p->arena, uop, operand, op->line, op->col);
}

static AstNode* parse_group(Parser* p, ErrorInfo* error) {
    advance(p);  // 跳过 (
    AstNode* expr = parse_expr(p, error);
    if (!expr) return NULL;
    if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
    return expr;
}

static AstNode* parse_literal(Parser* p) {
    const Token* t = advance(p);
    switch (t->kind) {
        case TOK_INT_LIT:   return ast_make_literal_i64(p->arena, tok_int_value(t), t->line, t->col);
        case TOK_FLOAT_LIT: return ast_make_literal_f64(p->arena, tok_float_value(t), t->line, t->col);
        case TOK_TRUE:      return ast_make_literal_bool(p->arena, true, t->line, t->col);
        case TOK_FALSE:     return ast_make_literal_bool(p->arena, false, t->line, t->col);
        default: return NULL;
    }
}

static AstNode* parse_ident_or_call(Parser* p, ErrorInfo* error) {
    const Token* name = advance(p);
    if (match(p, TOK_LPAREN)) {
        // 函数调用
        AstNode* args[16]; int arg_count = 0;
        while (peek(p)->kind != TOK_RPAREN && !error->message) {
            args[arg_count] = parse_expr(p, error);
            arg_count++;
            if (peek(p)->kind == TOK_COMMA) advance(p);
        }
        if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;
        AstNode** arg_arr = arena_alloc_impl(p->arena, arg_count * sizeof(AstNode*));
        memcpy(arg_arr, args, arg_count * sizeof(AstNode*));
        return ast_make_call(p->arena, arena_strdup_impl(p->arena, name->start, name->length),
                            arg_arr, arg_count, name->line, name->col);
    }
    return ast_make_ident(p->arena,
        arena_strdup_impl(p->arena, name->start, name->length),
        name->line, name->col);
}

// === Pratt 主循环 ===
static AstNode* parse_expr_prec(Parser* p, Precedence min_prec, ErrorInfo* error) {
    const Token* tok = peek(p);
    AstNode* left = NULL;

    // 前缀解析
    if (tok->kind == TOK_MINUS || tok->kind == TOK_BANG) {
        left = parse_unary(p, error);
    } else if (tok->kind == TOK_LPAREN) {
        left = parse_group(p, error);
    } else if (tok->kind == TOK_INT_LIT || tok->kind == TOK_FLOAT_LIT ||
               tok->kind == TOK_TRUE || tok->kind == TOK_FALSE) {
        left = parse_literal(p);
    } else if (tok->kind == TOK_IDENT) {
        left = parse_ident_or_call(p, error);
    } else {
        error->message = "无法识别的表达式"; error->filename = p->filename;
        error->line = tok->line; error->col = tok->col;
        return NULL;
    }
    if (!left) return NULL;

    // 中缀解析循环
    while (!error->message) {
        TokenKind kind = peek(p)->kind;
        Precedence prec = tok_to_prec(kind);
        if (prec <= min_prec) break;

        const Token* op = advance(p);
        AstNode* right = parse_expr_prec(p, prec, error);
        if (!right) return NULL;
        left = ast_make_binary(p->arena, tok_to_binop(kind), left, right, op->line, op->col);
    }

    return left;
}

static AstNode* parse_expr(Parser* p, ErrorInfo* error) {
    return parse_expr_prec(p, PREC_NONE, error);
}
  • Step 3: 解析器第二部分:语句解析
// === 语句解析(续 parser.c===

static bool is_type_token(TokenKind k) {
    return k == TOK_I64 || k == TOK_F64 || k == TOK_BOOL || k == TOK_VOID;
}

static TypeKind token_to_type(TokenKind k) {
    switch (k) { case TOK_I64: return TYPE_I64; case TOK_F64: return TYPE_F64;
                 case TOK_BOOL: return TYPE_BOOL; default: return TYPE_VOID; }
}

static AstNode* parse_statement(Parser* p, ErrorInfo* error);

static AstNode* parse_block(Parser* p, ErrorInfo* error) {
    const Token* open = peek(p);
    if (!expect(p, TOK_LBRACE, error, "缺少 '{'")) return NULL;
    AstNode* stmts[256]; int count = 0;
    while (peek(p)->kind != TOK_RBRACE && peek(p)->kind != TOK_EOF && !error->message) {
        // 块级表达式作为最后一条语句
        if (peek(p)->kind == TOK_RBRACE) break;
        AstNode* s = parse_statement(p, error);
        if (!s) return NULL;
        stmts[count++] = s;
    }
    if (!expect(p, TOK_RBRACE, error, "缺少 '}'")) return NULL;
    AstNode** arr = arena_alloc_impl(p->arena, count * sizeof(AstNode*));
    memcpy(arr, stmts, count * sizeof(AstNode*));
    return ast_make_block(p->arena, arr, count, open->line, open->col);
}

static AstNode* parse_statement(Parser* p, ErrorInfo* error) {
    const Token* t = peek(p);

    if (t->kind == TOK_LET) {
        advance(p);
        const Token* name = expect(p, TOK_IDENT, error, "let 后应为变量名");
        if (!name) return NULL;
        if (!expect(p, TOK_ASSIGN, error, "缺少 '='")) return NULL;
        AstNode* init = parse_expr(p, error);
        if (!init) return NULL;
        if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
        return ast_make_let(p->arena,
            arena_strdup_impl(p->arena, name->start, name->length),
            init, t->line, t->col);
    }

    if (t->kind == TOK_IF) {
        advance(p);
        AstNode* cond = parse_expr(p, error);
        if (!cond) return NULL;
        AstNode* then_block = parse_block(p, error);
        if (!then_block) return NULL;
        AstNode* else_block = NULL;
        if (match(p, TOK_ELSE)) {
            if (peek(p)->kind == TOK_IF) {
                else_block = parse_statement(p, error);
            } else {
                else_block = parse_block(p, error);
            }
            if (!else_block) return NULL;
        }
        return ast_make_if(p->arena, cond, then_block, else_block, t->line, t->col);
    }

    if (t->kind == TOK_WHILE) {
        advance(p);
        AstNode* cond = parse_expr(p, error);
        if (!cond) return NULL;
        AstNode* body = parse_block(p, error);
        if (!body) return NULL;
        return ast_make_while(p->arena, cond, body, t->line, t->col);
    }

    if (t->kind == TOK_RETURN) {
        advance(p);
        // void return
        if (match(p, TOK_SEMICOLON)) {
            return ast_make_return(p->arena, NULL, t->line, t->col);
        }
        AstNode* expr = parse_expr(p, error);
        if (!expr) return NULL;
        if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
        return ast_make_return(p->arena, expr, t->line, t->col);
    }

    // 表达式语句
    if (peek(p)->kind == TOK_IDENT && peek_n(p, 1)->kind == TOK_LPAREN) {
        // 函数调用表达式语句
        AstNode* expr = parse_expr(p, error);
        if (!expr) return NULL;
        if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
        return ast_make_expr_stmt(p->arena, expr, t->line, t->col);
    }

    // 表达式语句(不常见的非函数调用表达式语句)
    AstNode* expr = parse_expr(p, error);
    if (!expr) return NULL;
    if (!expect(p, TOK_SEMICOLON, error, "缺少 ';'")) return NULL;
    return ast_make_expr_stmt(p->arena, expr, t->line, t->col);
}

// === 函数和程序解析 ===

static AstNode* parse_function(Parser* p, ErrorInfo* error) {
    const Token* fn_tok = advance(p);  // fn
    const Token* name = expect(p, TOK_IDENT, error, "fn 后应为函数名");
    if (!name) return NULL;
    if (!expect(p, TOK_LPAREN, error, "缺少 '('")) return NULL;

    // 参数列表
    AstNode* params[64]; int pcount = 0;
    while (peek(p)->kind != TOK_RPAREN && !error->message) {
        const Token* pname = expect(p, TOK_IDENT, error, "参数名");
        if (!pname) return NULL;
        if (!expect(p, TOK_COLON, error, "缺少 ':'")) return NULL;
        const Token* ptype = advance(p);
        if (!is_type_token(ptype->kind)) {
            error->message = "无效的参数类型"; error->filename = p->filename;
            error->line = ptype->line; error->col = ptype->col; return NULL;
        }
        params[pcount++] = ast_make_parameter(p->arena,
            arena_strdup_impl(p->arena, pname->start, pname->length),
            token_to_type(ptype->kind), pname->line, pname->col);
        if (match(p, TOK_COMMA)) continue;
    }
    if (!expect(p, TOK_RPAREN, error, "缺少 ')'")) return NULL;

    // 返回类型
    TypeKind ret = TYPE_VOID;
    if (match(p, TOK_ARROW)) {
        const Token* rt = advance(p);
        if (!is_type_token(rt->kind)) {
            error->message = "无效的返回类型"; error->filename = p->filename;
            error->line = rt->line; error->col = rt->col; return NULL;
        }
        ret = token_to_type(rt->kind);
    }

    AstNode* body = parse_block(p, error);
    if (!body) return NULL;

    AstNode** parr = arena_alloc_impl(p->arena, pcount * sizeof(AstNode*));
    memcpy(parr, params, pcount * sizeof(AstNode*));
    return ast_make_function(p->arena,
        arena_strdup_impl(p->arena, name->start, name->length),
        parr, pcount, ret, body, fn_tok->line, fn_tok->col);
}

AstNode* parse(Arena* a, const Token* tokens, size_t count,
               const char* filename, ErrorInfo* error) {
    Parser p = {.tokens = tokens, .count = count, .pos = 0,
                .filename = filename, .arena = a};
    AstNode* functions[256]; int fn_count = 0;
    while (peek(&p)->kind != TOK_EOF && !error->message) {
        functions[fn_count++] = parse_function(&p, error);
    }
    if (error->message) return NULL;
    AstNode** arr = arena_alloc_impl(a, fn_count * sizeof(AstNode*));
    memcpy(arr, functions, fn_count * sizeof(AstNode*));
    return ast_make_program(a, arr, fn_count, 0, 0);
}
  • Step 4: 编写解析器测试 test/test_parser.c
#include "test_utils.h"
#include "parser.h"
#include "lexer.h"
#include "arena.h"

static AstNode* parse_string(const char* src, ErrorInfo* error) {
    Arena a = arena_create(1);
    size_t tcount;
    Token* tokens = lex(&a, src, "test", &tcount, error);
    if (!tokens) { arena_destroy(&a); return NULL; }
    AstNode* ast = parse(&a, tokens, tcount, "test", error);
    // Note: arena 必须保持存活直到 AST 不再需要
    return ast;
}

void test_simple_function() {
    ErrorInfo error = {0};
    AstNode* ast = parse_string("fn main() { return 42; }", &error);
    ASSERT(ast != NULL);
    ASSERT(ast->kind == AST_PROGRAM);
    ASSERT(ast->as.program.fn_count == 1);
    AstNode* fn = ast->as.program.functions[0];
    ASSERT(fn->kind == AST_FUNCTION);
}

void test_arithmetic_expr() {
    ErrorInfo error = {0};
    AstNode* ast = parse_string("fn main() { return 1 + 2 * 3; }", &error);
    ASSERT(ast != NULL);
    AstNode* body = ast->as.program.functions[0]->as.function.body;
    AstNode* ret = body->as.block.stmts[0];
    ASSERT(ret->kind == AST_RETURN_STMT);
    AstNode* expr = ret->as.return_stmt.expr;
    ASSERT(expr->kind == AST_BINARY_EXPR);
    ASSERT(expr->as.binary.op == OP_ADD);
}

void test_if_statement() {
    ErrorInfo error = {0};
    AstNode* ast = parse_string("fn main() { if true { return 1; } else { return 0; } }", &error);
    ASSERT(ast != NULL);
}

int main(void) {
    TEST_RUN(test_simple_function);
    TEST_RUN(test_arithmetic_expr);
    TEST_RUN(test_if_statement);
    return test_summary();
}
  • Step 5: 构建并运行解析测试
cd "D:\Code\doing_exercises\programs\L Language\build"
mingw32-make -j4
./l_lang_test.exe

Expected: 词法 3 个 + 解析 3 个 = 6 个测试 PASS。


Task 6: 语义分析 — 符号表

Files:

  • Create: src/sema/symbol.h, src/sema/symbol.c

  • Step 1: 编写 src/sema/symbol.h

#ifndef SYMBOL_H
#define SYMBOL_H

#include "l_lang.h"
#include "ast.h"

typedef enum { SYM_VARIABLE, SYM_PARAMETER, SYM_FUNCTION } SymbolKind;

typedef struct Symbol {
    const char* name;
    SymbolKind  kind;
    TypeKind    type;          // 变量/参数的类型
    // 函数特有
    TypeKind    return_type;
    TypeKind*   param_types;
    size_t       param_count;
    // 链表(同一作用域内的下一个符号)
    struct Symbol* next;
} Symbol;

typedef struct Scope {
    Symbol*       head;        // 符号链表头
    struct Scope* parent;      // 上级作用域
} Scope;

// 创建新作用域(子作用域)
Scope* scope_new(void* alloc, Scope* parent);

// 在当前作用域及其父作用域中查找符号
Symbol* scope_lookup(const Scope* scope, const char* name);

// 在当前作用域中插入符号(重复插入返回 NULL)
Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
                     SymbolKind kind, TypeKind type);

// 插入函数符号
Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
                              TypeKind ret, TypeKind* pt, size_t pc);

#endif
  • Step 2: 实现 src/sema/symbol.c
#include "symbol.h"
#include <string.h>

Scope* scope_new(void* alloc, Scope* parent) {
    Scope* s = (Scope*)arena_alloc_impl(alloc, sizeof(Scope));
    s->head = NULL;
    s->parent = parent;
    return s;
}

Symbol* scope_lookup(const Scope* scope, const char* name) {
    for (const Scope* s = scope; s; s = s->parent) {
        for (Symbol* sym = s->head; sym; sym = sym->next) {
            if (strcmp(sym->name, name) == 0) return sym;
        }
    }
    return NULL;
}

Symbol* scope_insert(Scope* scope, void* alloc, const char* name,
                     SymbolKind kind, TypeKind type) {
    if (scope_lookup(scope, name)) return NULL;  // 检查当前 scope 链
    Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol));
    sym->name = name; sym->kind = kind; sym->type = type;
    sym->next = scope->head;
    scope->head = sym;
    return sym;
}

Symbol* scope_insert_function(Scope* scope, void* alloc, const char* name,
                              TypeKind ret, TypeKind* pt, size_t pc) {
    if (scope_lookup(scope, name)) return NULL;
    Symbol* sym = (Symbol*)arena_alloc_impl(alloc, sizeof(Symbol));
    sym->name = name; sym->kind = SYM_FUNCTION; sym->type = TYPE_VOID;
    sym->return_type = ret; sym->param_types = pt; sym->param_count = pc;
    sym->next = scope->head;
    scope->head = sym;
    return sym;
}

Task 7: 语义分析 — 类型推断和检查

Files:

  • Create: src/sema/sema.h, src/sema/sema.c

  • Step 1: 编写 src/sema/sema.h

#ifndef SEMA_H
#define SEMA_H

#include "ast.h"
#include "error.h"
#include "symbol.h"

// 对 AST 进行语义分析(类型推断 + 类型检查)
// 为每个节点填充 type 字段,错误收集到 errors 列表中。
// 参数 arena 用于作用域分配。
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena);

#endif
  • Step 2: 实现语义分析 src/sema/sema.c
#include "sema.h"
#include <string.h>

// === 类型关系 ===
static TypeKind promote(TypeKind a, TypeKind b) {
    if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64;
    if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64;
    if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL;
    return TYPE_ERROR;
}

static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64; }
static bool is_comparable(TypeKind a, TypeKind b) { return a == b; }

// === 向前声明 ===
static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a);

// === 检查单个节点 ===
static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) {
    switch (node->kind) {
    case AST_LITERAL_EXPR:
        // 类型已在创建时设置,无需处理
        break;

    case AST_IDENT_EXPR: {
        Symbol* sym = scope_lookup(scope, node->as.ident.name);
        if (!sym) {
            error_add(errors, "<sema>", node->line, node->col,
                      "未定义的变量 '%s'", node->as.ident.name);
            node->type.kind = TYPE_ERROR;
        } else {
            node->type.kind = sym->type;
        }
        break;
    }

    case AST_UNARY_EXPR: {
        analyze_expr(node->as.unary.operand, scope, errors, a);
        TypeKind inner = node->as.unary.operand->type.kind;
        if (node->as.unary.op == OP_NEG && !is_numeric(inner)) {
            error_add(errors, "<sema>", node->line, node->col,
                      "一元 '-' 只能用于数值类型");
            node->type.kind = TYPE_ERROR;
        } else if (node->as.unary.op == OP_NOT && inner != TYPE_BOOL) {
            error_add(errors, "<sema>", node->line, node->col,
                      "'!' 只能用于布尔类型");
            node->type.kind = TYPE_ERROR;
        } else {
            node->type.kind = inner;
        }
        break;
    }

    case AST_BINARY_EXPR: {
        analyze_expr(node->as.binary.left, scope, errors, a);
        analyze_expr(node->as.binary.right, scope, errors, a);
        TypeKind l = node->as.binary.left->type.kind;
        TypeKind r = node->as.binary.right->type.kind;

        switch (node->as.binary.op) {
        case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD:
            if (!is_numeric(l) || !is_numeric(r)) {
                error_add(errors, "<sema>", node->line, node->col,
                          "算术运算需要数值类型");
                node->type.kind = TYPE_ERROR;
            } else {
                node->type.kind = promote(l, r);
            }
            break;
        case OP_EQ: case OP_NE: case OP_LT: case OP_GT: case OP_LE: case OP_GE:
            if (!is_comparable(l, r)) {
                error_add(errors, "<sema>", node->line, node->col,
                          "类型 '%s' 和 '%s' 无法比较", type_name(l), type_name(r));
                node->type.kind = TYPE_ERROR;
            } else {
                node->type.kind = TYPE_BOOL;
            }
            break;
        case OP_AND: case OP_OR:
            if (l != TYPE_BOOL || r != TYPE_BOOL) {
                error_add(errors, "<sema>", node->line, node->col,
                          "逻辑运算需要布尔类型");
                node->type.kind = TYPE_ERROR;
            } else {
                node->type.kind = TYPE_BOOL;
            }
            break;
        default: break;
        }
        break;
    }

    case AST_CALL_EXPR: {
        Symbol* sym = scope_lookup(scope, node->as.call.name);
        if (!sym || sym->kind != SYM_FUNCTION) {
            error_add(errors, "<sema>", node->line, node->col,
                      "未定义的函数 '%s'", node->as.call.name);
            node->type.kind = TYPE_ERROR;
            break;
        }
        if (node->as.call.arg_count != sym->param_count) {
            error_add(errors, "<sema>", node->line, node->col,
                      "函数 '%s' 需要 %zu 个参数,但提供了 %zu 个",
                      node->as.call.name, sym->param_count, node->as.call.arg_count);
            node->type.kind = TYPE_ERROR;
            break;
        }
        for (size_t i = 0; i < node->as.call.arg_count; i++) {
            analyze_expr(node->as.call.args[i], scope, errors, a);
            if (node->as.call.args[i]->type.kind != sym->param_types[i]) {
                error_add(errors, "<sema>", node->line, node->col,
                          "参数 %zu 类型不匹配: 期望 '%s',得到 '%s'",
                          i + 1, type_name(sym->param_types[i]),
                          type_name(node->as.call.args[i]->type.kind));
            }
        }
        node->type.kind = sym->return_type;
        break;
    }

    default: break;
    }
}

static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) {
    if (!node) return;

    switch (node->kind) {
    case AST_PROGRAM:
        // 第一遍:收集所有函数签名
        for (size_t i = 0; i < node->as.program.fn_count; i++) {
            AstNode* fn = node->as.program.functions[i];
            TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind));
            for (size_t j = 0; j < fn->as.function.param_count; j++) {
                pts[j] = fn->as.function.params[j]->as.parameter.type;
            }
            scope_insert_function(scope, a, fn->as.function.name,
                                  fn->as.function.return_type, pts,
                                  fn->as.function.param_count);
        }
        // 第二遍:分析每个函数体
        for (size_t i = 0; i < node->as.program.fn_count; i++) {
            analyze_node(node->as.program.functions[i], scope, errors, a);
        }
        break;

    case AST_FUNCTION: {
        Scope* fn_scope = scope_new(a, scope);
        // 注册参数
        for (size_t i = 0; i < node->as.function.param_count; i++) {
            AstNode* p = node->as.function.params[i];
            scope_insert(fn_scope, a, p->as.parameter.name, SYM_PARAMETER, p->as.parameter.type);
        }
        analyze_node(node->as.function.body, fn_scope, errors, a);
        break;
    }

    case AST_BLOCK:
        for (size_t i = 0; i < node->as.block.stmt_count; i++) {
            analyze_node(node->as.block.stmts[i], scope, errors, a);
        }
        break;

    case AST_LET_STMT: {
        analyze_expr(node->as.let_stmt.init, scope, errors, a);
        TypeKind inferred = node->as.let_stmt.init->type.kind;
        node->type.kind = inferred;
        if (!scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, inferred)) {
            error_add(errors, "<sema>", node->line, node->col,
                      "变量 '%s' 重复定义", node->as.let_stmt.name);
        }
        break;
    }

    case AST_IF_STMT:
        analyze_expr(node->as.if_stmt.cond, scope, errors, a);
        if (node->as.if_stmt.cond->type.kind != TYPE_BOOL) {
            error_add(errors, "<sema>", node->line, node->col, "if 条件必须是布尔类型");
        }
        analyze_node(node->as.if_stmt.then_block, scope, errors, a);
        if (node->as.if_stmt.else_block) {
            analyze_node(node->as.if_stmt.else_block, scope, errors, a);
        }
        break;

    case AST_WHILE_STMT:
        analyze_expr(node->as.while_stmt.cond, scope, errors, a);
        if (node->as.while_stmt.cond->type.kind != TYPE_BOOL) {
            error_add(errors, "<sema>", node->line, node->col, "while 条件必须是布尔类型");
        }
        analyze_node(node->as.while_stmt.body, scope, errors, a);
        break;

    case AST_RETURN_STMT:
        if (node->as.return_stmt.expr) {
            analyze_expr(node->as.return_stmt.expr, scope, errors, a);
            node->type.kind = node->as.return_stmt.expr->type.kind;
        }
        break;

    case AST_EXPR_STMT:
        analyze_expr(node->as.expr_stmt.expr, scope, errors, a);
        break;

    default:
        analyze_expr(node, scope, errors, a);
        break;
    }
}

void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) {
    Scope* global = scope_new(arena, NULL);

    // 注册内置函数
    TypeKind params_i64[] = {TYPE_I64};
    scope_insert_function(global, arena, "print_i64", TYPE_VOID, params_i64, 1);
    TypeKind params_f64[] = {TYPE_F64};
    scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1);
    TypeKind params_bool[] = {TYPE_BOOL};
    scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1);

    analyze_node(ast, global, errors, arena);
}
  • Step 3: 编写语义分析测试 test/test_sema.c
#include "test_utils.h"
#include "parser.h"
#include "lexer.h"
#include "sema.h"
#include "arena.h"

void test_type_error() {
    Arena a = arena_create(1);
    size_t tc; ErrorInfo lex_err = {0};
    Token* toks = lex(&a, "fn main() { let x = 1; let y = x + true; return; }",
                      "test", &tc, &lex_err);
    ASSERT(toks != NULL);
    ErrorInfo parse_err = {0};
    AstNode* ast = parse(&a, toks, tc, "test", &parse_err);
    ASSERT(ast != NULL);

    ErrorList errors; error_init(&errors);
    sema_analyze(ast, &errors, &a);
    ASSERT(errors.count > 0);
    arena_destroy(&a);
}

void test_undefined_var() {
    Arena a = arena_create(1);
    size_t tc; ErrorInfo lex_err = {0};
    Token* toks = lex(&a, "fn main() { let x = y; return; }", "test", &tc, &lex_err);
    ASSERT(toks != NULL);
    ErrorInfo parse_err = {0};
    AstNode* ast = parse(&a, toks, tc, "test", &parse_err);
    ASSERT(ast != NULL);

    ErrorList errors; error_init(&errors);
    sema_analyze(ast, &errors, &a);
    ASSERT(errors.count > 0);
    arena_destroy(&a);
}

int main(void) {
    TEST_RUN(test_type_error);
    TEST_RUN(test_undefined_var);
    return test_summary();
}
  • Step 4: 构建并运行语义测试
cd "D:\Code\doing_exercises\programs\L Language\build"
mingw32-make -j4
./l_lang_test.exe

Expected: 词法 3 + 解析 3 + 语义 2 = 8 个测试 PASS。


Task 8: LLVM 代码生成 — 基础设施和表达式

Files:

  • Create: src/codegen/codegen.h, src/codegen/codegen.c

  • Step 1: 编写 src/codegen/codegen.h

#ifndef CODEGEN_H
#define CODEGEN_H

#include "ast.h"

// 生成 LLVM Module。模块已 verify,可直接 dump 或写入文件。
// 出错时返回 NULL 并设置 *error_msg。
LLVMModuleRef codegen_module(AstNode* ast, const char* module_name,
                             const char** error_msg);

#endif
  • Step 2: 实现代码生成 src/codegen/codegen.c
#include "codegen.h"
#include <llvm-c/Core.h>
#include <llvm-c/Analysis.h>
#include <llvm-c/Types.h>
#include <string.h>
#include <stdio.h>

// === 内部状态 ===
typedef struct {
    LLVMModuleRef        module;
    LLVMBuilderRef       builder;
    // 符号表:变量名 → alloca 地址
    struct VarEntry {
        const char*    name;
        LLVMValueRef   alloca;
        struct VarEntry* next;
    } *var_table;
    const char*          error;
    // 内置函数声明
    LLVMValueRef         fn_print_i64;
    LLVMValueRef         fn_print_f64;
    LLVMValueRef         fn_print_bool;
    // 已声明的所有 L 函数(名称→LLVMValue
    struct FnEntry {
        const char*    name;
        LLVMValueRef   fn;
        TypeKind       ret;
        TypeKind*      params;
        size_t         pc;
        struct FnEntry* next;
    } *fn_table;
} CgCtx;

// === 类型映射 ===
static LLVMTypeRef to_llvm_type(TypeKind kind) {
    switch (kind) {
        case TYPE_I64:  return LLVMInt64Type();
        case TYPE_F64:  return LLVMDoubleType();
        case TYPE_BOOL: return LLVMInt1Type();
        default:        return LLVMVoidType();
    }
}

static LLVMValueRef to_llvm_const(LLVMTypeRef ty, AstNode* lit) {
    switch (lit->as.literal.lit_type) {
        case TYPE_I64:  return LLVMConstInt(ty, (unsigned long long)lit->as.literal.i64_val, true);
        case TYPE_F64:  return LLVMConstReal(ty, lit->as.literal.f64_val);
        case TYPE_BOOL: return LLVMConstInt(ty, lit->as.literal.bool_val ? 1 : 0, false);
        default:        return NULL;
    }
}

// === 变量表 ===
static LLVMValueRef find_var(CgCtx* ctx, const char* name) {
    for (struct VarEntry* e = ctx->var_table; e; e = e->next)
        if (strcmp(e->name, name) == 0) return e->alloca;
    return NULL;
}

static void add_var(CgCtx* ctx, const char* name, LLVMValueRef alloca) {
    struct VarEntry* e = malloc(sizeof(*e));
    e->name = name; e->alloca = alloca; e->next = ctx->var_table;
    ctx->var_table = e;
}

// === 函数表 ===
static LLVMValueRef find_fn(CgCtx* ctx, const char* name) {
    for (struct FnEntry* e = ctx->fn_table; e; e = e->next)
        if (strcmp(e->name, name) == 0) return e->fn;
    return NULL;
}

// === 向前声明 ===
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node);
static void codegen_stmt(CgCtx* ctx, AstNode* node);

// === 注册内置函数 ===
static void register_builtins(CgCtx* ctx) {
    // print_i64
    LLVMTypeRef pi64_args[] = {LLVMInt64Type()};
    LLVMTypeRef pi64_ty = LLVMFunctionType(LLVMVoidType(), pi64_args, 1, false);
    LLVMValueRef pi64 = LLVMAddFunction(ctx->module, "__builtin_print_i64", pi64_ty);

    // print_f64
    LLVMTypeRef pf64_args[] = {LLVMDoubleType()};
    LLVMTypeRef pf64_ty = LLVMFunctionType(LLVMVoidType(), pf64_args, 1, false);
    LLVMValueRef pf64 = LLVMAddFunction(ctx->module, "__builtin_print_f64", pf64_ty);

    // print_bool
    LLVMTypeRef pb_args[] = {LLVMInt1Type()};
    LLVMTypeRef pb_ty = LLVMFunctionType(LLVMVoidType(), pb_args, 1, false);
    LLVMValueRef pb = LLVMAddFunction(ctx->module, "__builtin_print_bool", pb_ty);

    ctx->fn_print_i64 = pi64;
    ctx->fn_print_f64 = pf64;
    ctx->fn_print_bool = pb;
}

// === 表达式代码生成 ===
static LLVMValueRef codegen_expr(CgCtx* ctx, AstNode* node) {
    switch (node->kind) {
    case AST_LITERAL_EXPR:
        return to_llvm_const(to_llvm_type(node->type.kind), node);

    case AST_IDENT_EXPR: {
        LLVMValueRef ptr = find_var(ctx, node->as.ident.name);
        return LLVMBuildLoad2(ctx->builder, to_llvm_type(node->type.kind), ptr, "load");
    }

    case AST_UNARY_EXPR: {
        LLVMValueRef operand = codegen_expr(ctx, node->as.unary.operand);
        if (node->as.unary.op == OP_NEG) {
            if (node->type.kind == TYPE_F64)
                return LLVMBuildFNeg(ctx->builder, operand, "fneg");
            else
                return LLVMBuildNeg(ctx->builder, operand, "ineg");
        } else {
            return LLVMBuildNot(ctx->builder, operand, "not");
        }
    }

    case AST_BINARY_EXPR: {
        LLVMValueRef l = codegen_expr(ctx, node->as.binary.left);
        LLVMValueRef r = codegen_expr(ctx, node->as.binary.right);
        bool is_float = node->type.kind == TYPE_F64;
        #define B(op_name, iop, fop) \
            if (is_float) return LLVMBuild##fop(ctx->builder, l, r, op_name); \
            else return LLVMBuild##iop(ctx->builder, l, r, op_name)

        switch (node->as.binary.op) {
            case OP_ADD: B("add",  Add,  FAdd);
            case OP_SUB: B("sub",  Sub,  FSub);
            case OP_MUL: B("mul",  Mul,  FMul);
            case OP_DIV:
                if (is_float) return LLVMBuildFDiv(ctx->builder, l, r, "fdiv");
                else return LLVMBuildSDiv(ctx->builder, l, r, "sdiv");
            case OP_MOD:
                return LLVMBuildSRem(ctx->builder, l, r, "srem");
            case OP_EQ:  B("eq",  ICmp, FCmp);
            case OP_NE:  B("ne",  ICmp, FCmp);
            case OP_LT:  B("lt",  ICmp, FCmp);
            case OP_GT:  B("gt",  ICmp, FCmp);
            case OP_LE:  B("le",  ICmp, FCmp);
            case OP_GE:  B("ge",  ICmp, FCmp);
            case OP_AND:
                return LLVMBuildAnd(ctx->builder, l, r, "and");
            case OP_OR:
                return LLVMBuildOr(ctx->builder, l, r, "or");
            default: return NULL;
        }
        #undef B
    }

    case AST_CALL_EXPR: {
        LLVMValueRef fn = find_fn(ctx, node->as.call.name);
        LLVMValueRef args[32];
        for (size_t i = 0; i < node->as.call.arg_count; i++) {
            args[i] = codegen_expr(ctx, node->as.call.args[i]);
        }
        return LLVMBuildCall2(ctx->builder,
            LLVMGetReturnType(LLVMTypeOf(fn)) == LLVMVoidType()
                ? LLVMVoidType() : LLVMGlobalGetValueType(fn),
            fn, args, (unsigned)node->as.call.arg_count, "call");
    }

    default: return NULL;
    }
}

// === 语句代码生成 ===
static void codegen_stmt(CgCtx* ctx, AstNode* node) {
    switch (node->kind) {
    case AST_LET_STMT: {
        LLVMValueRef init_val = codegen_expr(ctx, node->as.let_stmt.init);
        LLVMValueRef alloca = LLVMBuildAlloca(ctx->builder,
            to_llvm_type(node->type.kind), node->as.let_stmt.name);
        LLVMBuildStore(ctx->builder, init_val, alloca);
        add_var(ctx, node->as.let_stmt.name, alloca);
        break;
    }

    case AST_EXPR_STMT:
        codegen_expr(ctx, node->as.expr_stmt.expr);
        break;

    case AST_RETURN_STMT:
        if (node->as.return_stmt.expr) {
            LLVMValueRef val = codegen_expr(ctx, node->as.return_stmt.expr);
            LLVMBuildRet(ctx->builder, val);
        } else {
            LLVMBuildRetVoid(ctx->builder);
        }
        break;

    case AST_BLOCK:
        for (size_t i = 0; i < node->as.block.stmt_count; i++) {
            codegen_stmt(ctx, node->as.block.stmts[i]);
        }
        break;

    case AST_IF_STMT: {
        LLVMValueRef cond = codegen_expr(ctx, node->as.if_stmt.cond);
        LLVMBasicBlockRef then_bb = LLVMAppendBasicBlock(
            LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "then");
        LLVMBasicBlockRef else_bb = node->as.if_stmt.else_block
            ? LLVMAppendBasicBlock(LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "else")
            : NULL;
        LLVMBasicBlockRef merge_bb = LLVMAppendBasicBlock(
            LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "if_merge");

        if (else_bb)
            LLVMBuildCondBr(ctx->builder, cond, then_bb, else_bb);
        else
            LLVMBuildCondBr(ctx->builder, cond, then_bb, merge_bb);

        LLVMPositionBuilderAtEnd(ctx->builder, then_bb);
        codegen_stmt(ctx, node->as.if_stmt.then_block);
        LLVMBuildBr(ctx->builder, merge_bb);

        if (else_bb) {
            LLVMPositionBuilderAtEnd(ctx->builder, else_bb);
            codegen_stmt(ctx, node->as.if_stmt.else_block);
            LLVMBuildBr(ctx->builder, merge_bb);
        }

        LLVMPositionBuilderAtEnd(ctx->builder, merge_bb);
        break;
    }

    case AST_WHILE_STMT: {
        LLVMBasicBlockRef cond_bb = LLVMAppendBasicBlock(
            LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "while_cond");
        LLVMBasicBlockRef body_bb = LLVMAppendBasicBlock(
            LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "while_body");
        LLVMBasicBlockRef exit_bb = LLVMAppendBasicBlock(
            LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)), "while_exit");

        LLVMBuildBr(ctx->builder, cond_bb);
        LLVMPositionBuilderAtEnd(ctx->builder, cond_bb);
        LLVMValueRef cond = codegen_expr(ctx, node->as.while_stmt.cond);
        LLVMBuildCondBr(ctx->builder, cond, body_bb, exit_bb);

        LLVMPositionBuilderAtEnd(ctx->builder, body_bb);
        codegen_stmt(ctx, node->as.while_stmt.body);
        LLVMBuildBr(ctx->builder, cond_bb);

        LLVMPositionBuilderAtEnd(ctx->builder, exit_bb);
        break;
    }

    default: break;
    }
}

// === 程序级代码生成 ===
LLVMModuleRef codegen_module(AstNode* ast, const char* name, const char** error_msg) {
    CgCtx ctx = {0};
    ctx.module = LLVMModuleCreateWithName(name);
    ctx.builder = LLVMCreateBuilder();

    register_builtins(&ctx);

    // 第一遍:声明所有 L 函数
    for (size_t i = 0; i < ast->as.program.fn_count; i++) {
        AstNode* fn = ast->as.program.functions[i];
        LLVMTypeRef* ptypes = malloc(fn->as.function.param_count * sizeof(LLVMTypeRef));
        for (size_t j = 0; j < fn->as.function.param_count; j++)
            ptypes[j] = to_llvm_type(fn->as.function.params[j]->as.parameter.type);
        LLVMTypeRef fty = LLVMFunctionType(to_llvm_type(fn->as.function.return_type),
                                           ptypes, (unsigned)fn->as.function.param_count, false);
        LLVMValueRef lfn = LLVMAddFunction(ctx.module, fn->as.function.name, fty);
        struct FnEntry* entry = malloc(sizeof(*entry));
        entry->name = fn->as.function.name; entry->fn = lfn;
        entry->ret = fn->as.function.return_type;
        entry->next = ctx.fn_table;
        ctx.fn_table = entry;
        free(ptypes);
    }

    // 第二遍:生成各函数体
    for (size_t i = 0; i < ast->as.program.fn_count; i++) {
        AstNode* fn = ast->as.program.functions[i];
        LLVMValueRef lfn = find_fn(&ctx, fn->as.function.name);
        LLVMBasicBlockRef entry = LLVMAppendBasicBlock(lfn, "entry");
        LLVMPositionBuilderAtEnd(ctx.builder, entry);

        // 清空变量表
        ctx.var_table = NULL;

        // 注册参数变量
        for (size_t j = 0; j < fn->as.function.param_count; j++) {
            LLVMValueRef param = LLVMGetParam(lfn, (unsigned)j);
            LLVMValueRef alloca = LLVMBuildAlloca(ctx.builder,
                to_llvm_type(fn->as.function.params[j]->as.parameter.type),
                fn->as.function.params[j]->as.parameter.name);
            LLVMBuildStore(ctx.builder, param, alloca);
            add_var(&ctx, fn->as.function.params[j]->as.parameter.name, alloca);
        }

        codegen_stmt(&ctx, fn->as.function.body);
    }

    // 验证模块
    char* verify_err = NULL;
    LLVMVerifyModule(ctx.module, LLVMPrintMessageAction, &verify_err);
    if (verify_err) {
        *error_msg = verify_err;
        LLVMDisposeBuilder(ctx.builder);
        LLVMDisposeModule(ctx.module);
        return NULL;
    }

    LLVMDisposeBuilder(ctx.builder);
    return ctx.module;
}
  • Step 3: 更新 CMakeLists.txt 以链接 LLVM 所有必要库

  • Step 4: 更新 CMakeLists.txt 以链接 LLVM 所有必要库

# 替换原先的 LLVM 链接为:
llvm_map_components_to_libnames(LLVM_LIBS core analysis native)
target_link_libraries(l_lang ${LLVM_LIBS})
target_link_libraries(l_lang_test ${LLVM_LIBS})
  • Step 5: 构建验证
cd "D:\Code\doing_exercises\programs\L Language\build"
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM"
mingw32-make -j4

Expected: 编译通过。


Task 9: 驱动程序 — 入口和命令行

Files:

  • Create: src/driver/main.c

  • Step 1: 编写 src/driver/main.c

#include "l_lang.h"
#include "lexer.h"
#include "parser.h"
#include "sema.h"
#include "codegen.h"
#include "error.h"
#include "arena.h"

#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
#include <llvm-c/Target.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 读取整个文件到内存
static char* read_file(const char* path, size_t* size) {
    FILE* f = fopen(path, "rb");
    if (!f) { fprintf(stderr, "无法打开文件: %s\n", path); return NULL; }
    fseek(f, 0, SEEK_END);
    *size = ftell(f);
    fseek(f, 0, SEEK_SET);
    char* buf = malloc(*size + 1);
    fread(buf, 1, *size, f);
    buf[*size] = '\0';
    fclose(f);
    return buf;
}

// 写入字符串到文件
static bool write_file(const char* path, const char* data) {
    FILE* f = fopen(path, "w");
    if (!f) return false;
    fputs(data, f);
    fclose(f);
    return true;
}

int main(int argc, char** argv) {
    const char* input = NULL;
    const char* output = "a.exe";
    bool emit_ir = false;

    // 解析命令行参数
    for (int i = 1; i < argc; i++) {
        if (strcmp(argv[i], "--emit-ir") == 0) { emit_ir = true; }
        else if (strcmp(argv[i], "-o") == 0 && i + 1 < argc) { output = argv[++i]; }
        else if (argv[i][0] != '-') { input = argv[i]; }
    }

    if (!input) {
        fprintf(stderr, "用法: l_lang <文件.l> [-o <输出>] [--emit-ir]\n");
        return 1;
    }

    // 1. 读取源文件
    size_t src_size;
    char* source = read_file(input, &src_size);
    if (!source) return 1;

    // 2. 初始化
    Arena arena = arena_create(8);  // 8 MB
    ErrorInfo error = {0};
    ErrorList error_list; error_init(&error_list);

    // 3. 词法分析
    size_t token_count;
    Token* tokens = lex(&arena, source, input, &token_count, &error);
    if (!tokens) {
        fprintf(stderr, "词法错误: %s:%d:%d: %s\n",
                error.filename, error.line, error.col, error.message);
        free(source); arena_destroy(&arena);
        return 1;
    }

    // 4. 语法分析
    AstNode* ast = parse(&arena, tokens, token_count, input, &error);
    if (!ast) {
        fprintf(stderr, "语法错误: %s:%d:%d: %s\n",
                error.filename, error.line, error.col, error.message);
        free(source); arena_destroy(&arena);
        return 1;
    }

    // 5. 语义分析
    sema_analyze(ast, &error_list, &arena);
    if (error_list.count > 0) {
        error_print(&error_list);
        free(source); arena_destroy(&arena);
        return 1;
    }

    // 6. LLVM IR 生成
    const char* codegen_error = NULL;
    LLVMModuleRef module = codegen_module(ast, "l_module", &codegen_error);
    if (!module) {
        fprintf(stderr, "IR 生成错误: %s\n", codegen_error);
        free(source); arena_destroy(&arena);
        return 1;
    }

    if (emit_ir) {
        // 输出 LLVM IR 文本
        char* ir = LLVMPrintModuleToString(module);
        char ir_path[512];
        snprintf(ir_path, sizeof(ir_path), "%s.ll", input);
        write_file(ir_path, ir);
        printf("IR 已输出到: %s\n", ir_path);
        LLVMDisposeMessage(ir);
    } else {
        // 生成目标文件和可执行文件
        LLVMInitializeNativeTarget();
        LLVMInitializeNativeAsmPrinter();

        char* triple = LLVMGetDefaultTargetTriple();
        LLVMTargetRef target;
        char* target_error = NULL;
        if (LLVMGetTargetFromTriple(triple, &target, &target_error)) {
            fprintf(stderr, "目标平台错误: %s\n", target_error);
            LLVMDisposeMessage(target_error); LLVMDisposeMessage(triple);
            free(source); arena_destroy(&arena); LLVMDisposeModule(module);
            return 1;
        }

        LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
            target, triple, "generic", "",
            LLVMCodeGenLevelDefault, LLVMRelocDefault,
            LLVMCodeModelDefault);
        LLVMDisposeMessage(triple);

        // 输出目标文件
        char obj_path[512];
        snprintf(obj_path, sizeof(obj_path), "%s.o", input);
        char* obj_error = NULL;
        if (LLVMTargetMachineEmitToFile(tm, module, obj_path,
                                        LLVMObjectFile, &obj_error)) {
            fprintf(stderr, "目标代码生成错误: %s\n", obj_error);
            LLVMDisposeMessage(obj_error);
            free(source); arena_destroy(&arena);
            LLVMDisposeTargetMachine(tm); LLVMDisposeModule(module);
            return 1;
        }

        // 调用 clang 链接
        char cmd[1024];
        snprintf(cmd, sizeof(cmd),
                 "\"D:\\settings\\Language\\LLVM\\bin\\clang.exe\" \"%s\" -o \"%s\" -fuse-ld=lld",
                 obj_path, output);
        int ret = system(cmd);
        if (ret != 0) {
            fprintf(stderr, "链接失败 (exit code %d)\n", ret);
        } else {
            printf("编译成功: %s\n", output);
        }
        LLVMDisposeTargetMachine(tm);
    }

    // 清理
    LLVMDisposeModule(module);
    free(source);
    arena_destroy(&arena);
    return 0;
}
  • Step 2: 编写第一个测试程序 test/programs/01_arithmetic.l
fn main() -> i64 {
    let x = 1 + 2 * 3;
    print_i64(x);
    return 0;
}
  • Step 3: 构建并端到端测试
cd "D:\Code\doing_exercises\programs\L Language\build"
mingw32-make -j4
./l_lang.exe ../test/programs/01_arithmetic.l -o test_out.exe
./test_out.exe

Expected: 输出 7


Task 10: 集成测试 — 全部语言特性

Files:

  • Create: test/programs/02_if_else.l

  • Create: test/programs/03_while.l

  • Create: test/programs/04_fib_recursive.l

  • Create: test/programs/05_fib_iterative.l

  • Step 1: test/programs/02_if_else.l — if/else 控制流

fn main() -> i64 {
    let x = 10;
    if x > 5 {
        print_i64(1);
    } else {
        print_i64(0);
    }
    return 0;
}

Expected output: 1

  • Step 2: test/programs/03_while.l — while 循环 + 比较运算
fn countdown(n: i64) -> i64 {
    let remaining = n;
    if remaining > 0 {
        print_i64(remaining);
        return countdown(remaining - 1);
    }
    return 0;
}

fn main() -> i64 {
    print_i64(100);
    return countdown(5);
}

Expected output: 100 5 4 3 2 1(每行一个,测试函数递归 + if/else)

  • Step 3: test/programs/04_fib_recursive.l — 斐波那契递归
fn fib(n: i64) -> i64 {
    if n < 2 {
        return n;
    }
    return fib(n - 1) + fib(n - 2);
}

fn main() -> i64 {
    let result = fib(10);
    print_i64(result);
    return 0;
}

Expected output: 55

  • Step 4: test/programs/05_float.l — 浮点运算 + 多函数
fn square(x: f64) -> f64 {
    return x * x;
}

fn add_floats(a: f64, b: f64) -> f64 {
    return a + b;
}

fn main() -> i64 {
    let s = square(3.0);
    let sum = add_floats(s, 4.0);
    print_f64(sum);
    return 0;
}

Expected output: 13.0000003.0² + 4.0 = 13.0,测试浮点运算 + 多函数调用)

  • Step 5: 批量运行所有集成测试
cd "D:\Code\doing_exercises\programs\L Language"
for f in test/programs/01_arithmetic.l test/programs/02_if_else.l \
         test/programs/03_while.l test/programs/04_fib_recursive.l \
         test/programs/05_float.l; do
    echo "=== $f ==="
    ./build/l_lang.exe "$f" -o "./build/test_out.exe" 2>&1 && ./build/test_out.exe 2>&1
    echo ""
done

Expected: 5 个程序全部编译运行,输出正确——四则运算 7if/else 1,递归 5 4 3 2 1,斐波那契 55,浮点 13.000000


Task 11: 完成和文档

Files:

  • Create: README.md

  • Step 1: 编写 README.md

# L Language

一门用 C 语言实现的编译型编程语言,静态类型 + 类型推断,Rust 风格语法。

## 构建

```bash
mkdir build && cd build
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:/settings/Language/LLVM"
mingw32-make -j4

使用

./l_lang.exe <源文件.l> [-o <输出.exe>] [--emit-ir]

语言特性 (v0.1)

  • 类型: i64, f64, bool, void
  • 控制流: if/else, while
  • 函数: 递归 + 多参数
  • 变量: let 不可变声明,类型推断
  • 内置函数: print_i64, print_f64, print_bool

示例

fn fib(n: i64) -> i64 {
    if n < 2 { return n; }
    return fib(n - 1) + fib(n - 2);
}

fn main() -> i64 {
    print_i64(fib(10));
    return 0;
}

- [ ] **Step 2: 最终验证**

```bash
cd "D:\Code\doing_exercises\programs\L Language\build"
# 清理重建
rm -rf *
cmake .. -G "MinGW Makefiles" -DCMAKE_PREFIX_PATH="D:\settings\Language\LLVM"
mingw32-make -j4
# 运行单元测试
./l_lang_test.exe
# 运行集成测试
./l_lang.exe ../test/programs/04_fib_recursive.l -o fib.exe && ./fib.exe

Expected:

  • l_lang_test.exe 8 个测试全部 PASS
  • fib.exe 输出 55
  • 5 个集成测试全部通过:算术 ✓ | if/else ✓ | 递归 ✓ | 斐波那契 ✓ | 浮点多函数 ✓

依赖关系图

Task 1 (CMake + 骨架)
  └─ Task 2 (Token 数据结构)
       └─ Task 3 (词法分析器)
            └─ Task 4 (AST 数据结构)
                 ├─ Task 5 (解析器)
                 │    ├─ Task 6 (符号表)
                 │    └─ Task 7 (语义分析)
                 ├─ Task 8 (代码生成)
                 └─ Task 9 (驱动)
                      └─ Task 10 (集成测试)
                           └─ Task 11 (文档)

Tasks 1-4 严格顺序依赖。Task 5 和 Task 6 可并行(Task 5 依赖 4Task 6 独立)。Task 7 依赖 5+6。Task 8 依赖 Task 4(可独立于 5/6/7 先用硬编码 AST 测试 LLVM API)。Task 9 依赖全部前置任务。

积压问题(已知,v0.2 解决)

  1. 变量不可重新赋值 — v0.1 的 let 变量是不可变的,无法做 x = x + 1 这类赋值。因此 while 循环体无法修改循环变量。迭代算法需要改用递归实现。
  2. 作用域泄漏 — 变量表使用链表实现,离开作用域后未清理。