9a53d97274
- 新增 TYPE_STR 类型 (i8* 指针)
- lexer: 双引号字符串字面量 + str 关键字
- parser: TOK_STR_LIT → AST_LITERAL_EXPR(str_val)
- sema: print_str 内置函数注册 + 字符串拼接类型检查
- codegen: GlobalStringPtr 生成字符串常量,print_str → printf("%s")
- 新增集成测试 07_hello_str.l
基于 Codex 分析报告 P0 建议。
142 lines
6.4 KiB
C
142 lines
6.4 KiB
C
#include "lexer.h"
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
|
|
typedef struct {
|
|
const char* src;
|
|
const char* filename;
|
|
int pos;
|
|
int line;
|
|
int col;
|
|
} Lexer;
|
|
|
|
static char peek(const Lexer* l) { return l->src[l->pos]; }
|
|
static char peek_next(const Lexer* l) { return l->src[l->pos + 1]; }
|
|
static void advance(Lexer* l) {
|
|
if (l->src[l->pos] == '\n') { l->line++; l->col = 1; }
|
|
else { l->col++; }
|
|
l->pos++;
|
|
}
|
|
static void skip_whitespace(Lexer* l) {
|
|
while (1) {
|
|
char c = peek(l);
|
|
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { advance(l); continue; }
|
|
if (c == '/' && peek_next(l) == '/') {
|
|
while (peek(l) != '\n' && peek(l) != '\0') advance(l);
|
|
continue;
|
|
}
|
|
if (c == '/' && peek_next(l) == '*') {
|
|
advance(l); advance(l);
|
|
while (peek(l) != '\0' && !(peek(l) == '*' && peek_next(l) == '/')) advance(l);
|
|
if (peek(l) != '\0') { advance(l); advance(l); } // skip */
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static Token make_token(Lexer* l, TokenKind kind, int start_pos, int len) {
|
|
Token t = {.kind = kind, .start = l->src + start_pos,
|
|
.length = len, .line = l->line, .col = l->col};
|
|
return t;
|
|
}
|
|
|
|
static Token lex_number(Lexer* l) {
|
|
int start = l->pos;
|
|
TokenKind kind = TOK_INT_LIT;
|
|
while (isdigit(peek(l))) advance(l);
|
|
if (peek(l) == '.') {
|
|
kind = TOK_FLOAT_LIT; advance(l);
|
|
while (isdigit(peek(l))) advance(l);
|
|
}
|
|
return make_token(l, kind, start, l->pos - start);
|
|
}
|
|
|
|
static TokenKind check_keyword(const Token* tok) {
|
|
#define KW(s, k) if (tok->length == sizeof(s)-1 && memcmp(tok->start, s, sizeof(s)-1) == 0) return k
|
|
KW("fn", TOK_FN); KW("let", TOK_LET);
|
|
KW("mut", TOK_MUT);
|
|
KW("if", TOK_IF); KW("else", TOK_ELSE);
|
|
KW("while", TOK_WHILE); KW("return", TOK_RETURN);
|
|
KW("i64", TOK_I64); KW("f64", TOK_F64);
|
|
KW("bool", TOK_BOOL); KW("str", TOK_STR);
|
|
KW("void", TOK_VOID);
|
|
KW("true", TOK_TRUE); KW("false", TOK_FALSE);
|
|
#undef KW
|
|
return TOK_IDENT;
|
|
}
|
|
|
|
static Token lex_ident_or_keyword(Lexer* l) {
|
|
int start = l->pos;
|
|
while (isalnum(peek(l)) || peek(l) == '_') advance(l);
|
|
Token t = make_token(l, TOK_IDENT, start, l->pos - start);
|
|
t.kind = check_keyword(&t);
|
|
return t;
|
|
}
|
|
|
|
Token* lex(Arena* a, const char* source, const char* filename,
|
|
size_t* count, ErrorInfo* error) {
|
|
Lexer l = {.src = source, .filename = filename, .pos = 0, .line = 1, .col = 1};
|
|
// 预估容量:源码长度的 1/3
|
|
size_t cap = strlen(source) / 3 + 16;
|
|
Token* tokens = arena_alloc(a, cap * sizeof(Token));
|
|
if (!tokens) { *count = 0; return NULL; }
|
|
size_t idx = 0;
|
|
|
|
while (peek(&l) != '\0') {
|
|
skip_whitespace(&l);
|
|
if (peek(&l) == '\0') break;
|
|
|
|
int line = l.line, col = l.col;
|
|
char c = peek(&l);
|
|
|
|
if (isdigit(c)) { tokens[idx++] = lex_number(&l); }
|
|
else if (c == '"') {
|
|
advance(&l); // 跳过开头的 "
|
|
int start = l.pos;
|
|
while (peek(&l) != '"' && peek(&l) != '\0' && peek(&l) != '\n') advance(&l);
|
|
int len = l.pos - start;
|
|
if (peek(&l) != '"') {
|
|
*error = (ErrorInfo){.message="未闭合的字符串", .filename=filename, .line=line, .col=col};
|
|
return NULL;
|
|
}
|
|
advance(&l); // 跳过结尾的 "
|
|
tokens[idx++] = make_token(&l, TOK_STR_LIT, start, len);
|
|
}
|
|
else if (isalpha(c) || c == '_') { tokens[idx++] = lex_ident_or_keyword(&l); }
|
|
else if (c == '+' && peek_next(&l) != '=') { tokens[idx++] = make_token(&l, TOK_PLUS, l.pos, 1); advance(&l); }
|
|
else if (c == '-' && peek_next(&l) != '>') { tokens[idx++] = make_token(&l, TOK_MINUS, l.pos, 1); advance(&l); }
|
|
else if (c == '-' && peek_next(&l) == '>') { tokens[idx++] = make_token(&l, TOK_ARROW, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '*') { tokens[idx++] = make_token(&l, TOK_STAR, l.pos, 1); advance(&l); }
|
|
else if (c == '/') { tokens[idx++] = make_token(&l, TOK_SLASH, l.pos, 1); advance(&l); }
|
|
else if (c == '%') { tokens[idx++] = make_token(&l, TOK_PERCENT, l.pos, 1); advance(&l); }
|
|
else if (c == '=' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_EQ_EQ, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '=') { tokens[idx++] = make_token(&l, TOK_ASSIGN, l.pos, 1); advance(&l); }
|
|
else if (c == '!' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_BANG_EQ, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '!') { tokens[idx++] = make_token(&l, TOK_BANG, l.pos, 1); advance(&l); }
|
|
else if (c == '<' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_LT_EQ, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '<') { tokens[idx++] = make_token(&l, TOK_LT, l.pos, 1); advance(&l); }
|
|
else if (c == '>' && peek_next(&l) == '=') { tokens[idx++] = make_token(&l, TOK_GT_EQ, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '>') { tokens[idx++] = make_token(&l, TOK_GT, l.pos, 1); advance(&l); }
|
|
else if (c == '&' && peek_next(&l) == '&') { tokens[idx++] = make_token(&l, TOK_AND_AND, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '|' && peek_next(&l) == '|') { tokens[idx++] = make_token(&l, TOK_PIPE_PIPE, l.pos, 2); advance(&l); advance(&l); }
|
|
else if (c == '(') { tokens[idx++] = make_token(&l, TOK_LPAREN, l.pos, 1); advance(&l); }
|
|
else if (c == ')') { tokens[idx++] = make_token(&l, TOK_RPAREN, l.pos, 1); advance(&l); }
|
|
else if (c == '{') { tokens[idx++] = make_token(&l, TOK_LBRACE, l.pos, 1); advance(&l); }
|
|
else if (c == '}') { tokens[idx++] = make_token(&l, TOK_RBRACE, l.pos, 1); advance(&l); }
|
|
else if (c == ',') { tokens[idx++] = make_token(&l, TOK_COMMA, l.pos, 1); advance(&l); }
|
|
else if (c == ':') { tokens[idx++] = make_token(&l, TOK_COLON, l.pos, 1); advance(&l); }
|
|
else if (c == ';') { tokens[idx++] = make_token(&l, TOK_SEMICOLON, l.pos, 1); advance(&l); }
|
|
else {
|
|
*error = (ErrorInfo){
|
|
.message = "无法识别的字符",
|
|
.filename = filename, .line = line, .col = col
|
|
};
|
|
return NULL;
|
|
}
|
|
}
|
|
tokens[idx++] = make_token(&l, TOK_EOF, l.pos, 0);
|
|
*count = idx;
|
|
return tokens;
|
|
}
|