feat: L Language v0.1 编译器完整实现
5 阶段编译流水线: 词法分析 → 语法分析(Pratt) → 语义分析(类型推断) → LLVM IR → .exe 模块: - lexer: 手写状态机, 40 种 Token, // 和 /* */ 注释 - parser: Pratt 表达式解析(9 级优先级) + 递归下降语句/函数 - ast: 14 种节点类型 + 工厂函数 - sema: 作用域链符号表 + 类型推断 + 类型检查 - codegen: AST → LLVM-C API, print_i64/f64/bool 内建 - driver: 命令行 + 流水线串联 + 错误报告 - util: Arena bump allocator (8MB) 测试: 65 单元测试(词法41+语法15+语义9) + 5 集成测试 全部通过 语言特性: i64/f64/bool/void, let不可变变量, if/else, while, 递归函数
This commit is contained in:
+264
@@ -0,0 +1,264 @@
|
||||
#include "sema.h"
|
||||
#include <string.h>
|
||||
|
||||
// === 类型关系 ===
|
||||
static TypeKind promote(TypeKind a, TypeKind b) {
|
||||
if (a == TYPE_F64 || b == TYPE_F64) return TYPE_F64;
|
||||
if (a == TYPE_I64 || b == TYPE_I64) return TYPE_I64;
|
||||
if (a == TYPE_BOOL || b == TYPE_BOOL) return TYPE_BOOL;
|
||||
return TYPE_ERROR;
|
||||
}
|
||||
|
||||
static bool is_numeric(TypeKind t) { return t == TYPE_I64 || t == TYPE_F64; }
|
||||
static bool is_comparable(TypeKind a, TypeKind b) { return a == b; }
|
||||
|
||||
// === 向前声明 ===
|
||||
static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a);
|
||||
|
||||
// === 检查表达式 ===
|
||||
static void analyze_expr(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) {
|
||||
switch (node->kind) {
|
||||
case AST_LITERAL_EXPR:
|
||||
break; // 类型已在创建时设置
|
||||
|
||||
case AST_IDENT_EXPR: {
|
||||
Symbol* sym = scope_lookup(scope, node->as.ident.name);
|
||||
if (!sym) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"未定义的变量 '%s'", node->as.ident.name);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else if (sym->kind == SYM_FUNCTION) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"'%s' 是函数,不能作为表达式使用", node->as.ident.name);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = sym->type;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_UNARY_EXPR: {
|
||||
analyze_expr(node->as.unary.operand, scope, errors, a);
|
||||
TypeKind inner = node->as.unary.operand->type.kind;
|
||||
if (node->as.unary.op == OP_NEG) {
|
||||
if (!is_numeric(inner)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"一元 '-' 只能用于数值类型");
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = inner;
|
||||
}
|
||||
} else { // OP_NOT
|
||||
if (inner != TYPE_BOOL) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"'!' 只能用于布尔类型,得到 '%s'", type_name(inner));
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = TYPE_BOOL;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_BINARY_EXPR: {
|
||||
analyze_expr(node->as.binary.left, scope, errors, a);
|
||||
analyze_expr(node->as.binary.right, scope, errors, a);
|
||||
TypeKind l = node->as.binary.left->type.kind;
|
||||
TypeKind r = node->as.binary.right->type.kind;
|
||||
if (l == TYPE_ERROR || r == TYPE_ERROR) { node->type.kind = TYPE_ERROR; break; }
|
||||
|
||||
switch (node->as.binary.op) {
|
||||
case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD:
|
||||
if (!is_numeric(l) || !is_numeric(r)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"算术运算需要数值类型");
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = promote(l, r);
|
||||
}
|
||||
break;
|
||||
case OP_EQ: case OP_NE: case OP_LT: case OP_GT: case OP_LE: case OP_GE:
|
||||
if (!is_comparable(l, r)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"类型 '%s' 和 '%s' 无法比较", type_name(l), type_name(r));
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = TYPE_BOOL;
|
||||
}
|
||||
break;
|
||||
case OP_AND: case OP_OR:
|
||||
if (l != TYPE_BOOL || r != TYPE_BOOL) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"逻辑运算需要布尔类型");
|
||||
node->type.kind = TYPE_ERROR;
|
||||
} else {
|
||||
node->type.kind = TYPE_BOOL;
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_CALL_EXPR: {
|
||||
Symbol* sym = scope_lookup(scope, node->as.call.name);
|
||||
if (!sym || sym->kind != SYM_FUNCTION) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"未定义的函数 '%s'", node->as.call.name);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
// 即使函数未定义,也要分析参数表达式(它们可能有更多错误)
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
analyze_expr(node->as.call.args[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (node->as.call.arg_count != sym->param_count) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"函数 '%s' 需要 %zu 个参数,但提供了 %zu 个",
|
||||
node->as.call.name, sym->param_count, node->as.call.arg_count);
|
||||
node->type.kind = TYPE_ERROR;
|
||||
// 即使参数数量不匹配,也分析已有的参数
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
analyze_expr(node->as.call.args[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
}
|
||||
for (size_t i = 0; i < node->as.call.arg_count; i++) {
|
||||
analyze_expr(node->as.call.args[i], scope, errors, a);
|
||||
TypeKind actual = node->as.call.args[i]->type.kind;
|
||||
TypeKind expected = sym->param_types[i];
|
||||
if (actual != TYPE_ERROR && actual != expected) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"参数 %zu 类型不匹配: 期望 '%s',得到 '%s'",
|
||||
i + 1, type_name(expected), type_name(actual));
|
||||
}
|
||||
}
|
||||
node->type.kind = sym->return_type;
|
||||
break;
|
||||
}
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
static void analyze_node(AstNode* node, Scope* scope, ErrorList* errors, Arena* a) {
|
||||
if (!node) return;
|
||||
|
||||
switch (node->kind) {
|
||||
case AST_PROGRAM:
|
||||
// 第一遍:收集所有函数签名
|
||||
for (size_t i = 0; i < node->as.program.fn_count; i++) {
|
||||
AstNode* fn = node->as.program.functions[i];
|
||||
TypeKind* pts = (TypeKind*)arena_alloc_impl(a, fn->as.function.param_count * sizeof(TypeKind));
|
||||
for (size_t j = 0; j < fn->as.function.param_count; j++) {
|
||||
pts[j] = fn->as.function.params[j]->as.parameter.type;
|
||||
}
|
||||
scope_insert_function(scope, a, fn->as.function.name,
|
||||
fn->as.function.return_type, pts,
|
||||
fn->as.function.param_count);
|
||||
}
|
||||
// 第二遍:分析每个函数体
|
||||
for (size_t i = 0; i < node->as.program.fn_count; i++) {
|
||||
analyze_node(node->as.program.functions[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_FUNCTION: {
|
||||
Scope* fn_scope = scope_new(a, scope);
|
||||
// 注册参数
|
||||
for (size_t i = 0; i < node->as.function.param_count; i++) {
|
||||
AstNode* p = node->as.function.params[i];
|
||||
scope_insert(fn_scope, a, p->as.parameter.name, SYM_PARAMETER, p->as.parameter.type);
|
||||
}
|
||||
analyze_node(node->as.function.body, fn_scope, errors, a);
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_BLOCK:
|
||||
for (size_t i = 0; i < node->as.block.stmt_count; i++) {
|
||||
analyze_node(node->as.block.stmts[i], scope, errors, a);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_LET_STMT: {
|
||||
analyze_expr(node->as.let_stmt.init, scope, errors, a);
|
||||
TypeKind inferred = node->as.let_stmt.init->type.kind;
|
||||
TypeKind var_type;
|
||||
|
||||
if (node->as.let_stmt.has_type_annot) {
|
||||
// 使用显式类型标注
|
||||
var_type = node->as.let_stmt.annot_type;
|
||||
if (inferred != TYPE_ERROR && inferred != var_type) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"变量 '%s' 类型标注为 '%s',但初始化表达式类型为 '%s'",
|
||||
node->as.let_stmt.name, type_name(var_type), type_name(inferred));
|
||||
}
|
||||
} else {
|
||||
// 类型推断
|
||||
if (inferred == TYPE_ERROR || inferred == TYPE_VOID) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"无法从表达式推断变量 '%s' 的类型", node->as.let_stmt.name);
|
||||
break;
|
||||
}
|
||||
var_type = inferred;
|
||||
}
|
||||
|
||||
node->type.kind = var_type;
|
||||
if (!scope_insert(scope, a, node->as.let_stmt.name, SYM_VARIABLE, var_type)) {
|
||||
error_add(errors, "<sema>", node->line, node->col,
|
||||
"变量 '%s' 重复定义", node->as.let_stmt.name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AST_IF_STMT:
|
||||
analyze_expr(node->as.if_stmt.cond, scope, errors, a);
|
||||
if (node->as.if_stmt.cond->type.kind != TYPE_BOOL &&
|
||||
node->as.if_stmt.cond->type.kind != TYPE_ERROR) {
|
||||
error_add(errors, "<sema>", node->line, node->col, "if 条件必须是布尔类型");
|
||||
}
|
||||
analyze_node(node->as.if_stmt.then_block, scope, errors, a);
|
||||
if (node->as.if_stmt.else_block) {
|
||||
analyze_node(node->as.if_stmt.else_block, scope, errors, a);
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_WHILE_STMT:
|
||||
analyze_expr(node->as.while_stmt.cond, scope, errors, a);
|
||||
if (node->as.while_stmt.cond->type.kind != TYPE_BOOL &&
|
||||
node->as.while_stmt.cond->type.kind != TYPE_ERROR) {
|
||||
error_add(errors, "<sema>", node->line, node->col, "while 条件必须是布尔类型");
|
||||
}
|
||||
analyze_node(node->as.while_stmt.body, scope, errors, a);
|
||||
break;
|
||||
|
||||
case AST_RETURN_STMT:
|
||||
if (node->as.return_stmt.expr) {
|
||||
analyze_expr(node->as.return_stmt.expr, scope, errors, a);
|
||||
node->type.kind = node->as.return_stmt.expr->type.kind;
|
||||
}
|
||||
break;
|
||||
|
||||
case AST_EXPR_STMT:
|
||||
analyze_expr(node->as.expr_stmt.expr, scope, errors, a);
|
||||
break;
|
||||
|
||||
default:
|
||||
analyze_expr(node, scope, errors, a);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void sema_analyze(AstNode* ast, ErrorList* errors, Arena* arena) {
|
||||
Scope* global = scope_new(arena, NULL);
|
||||
|
||||
// 注册内置函数
|
||||
TypeKind params_i64[] = {TYPE_I64};
|
||||
scope_insert_function(global, arena, "print_i64", TYPE_VOID, params_i64, 1);
|
||||
TypeKind params_f64[] = {TYPE_F64};
|
||||
scope_insert_function(global, arena, "print_f64", TYPE_VOID, params_f64, 1);
|
||||
TypeKind params_bool[] = {TYPE_BOOL};
|
||||
scope_insert_function(global, arena, "print_bool", TYPE_VOID, params_bool, 1);
|
||||
|
||||
analyze_node(ast, global, errors, arena);
|
||||
}
|
||||
Reference in New Issue
Block a user