Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
travisdoor committed Mar 5, 2025
1 parent 08422c9 commit 5a495e6
Show file tree
Hide file tree
Showing 7 changed files with 294 additions and 90 deletions.
5 changes: 5 additions & 0 deletions lib/bl/api/std/math/math.bl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ F32_HALF_PI :: cast(f32) HALF_PI;
F32_EPSILON :: 0.00001f;
F64_EPSILON :: 0.00001;

F32_MAX :: 3.402823466e+38f;
F32_MIN :: 1.175494351e-38f;
F64_MAX :: 1.7976931348623158e+308;
F64_MIN :: 2.2250738585072014e-308;

S8_MIN : s8 : -128;
S8_MAX : s8 : 127;
S16_MIN : s16 : -32768;
Expand Down
243 changes: 153 additions & 90 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@

#define is_ident(c) (isalnum(c) || (c) == '_')

#define report_error(code, format, ...) \
{ \
builder_msg(MSG_ERR, ERR_##code, NULL, CARET_NONE, (format), ##__VA_ARGS__); \
longjmp((ctx)->jmp_error, ERR_##code); \
} \
(void)0

struct context {
struct assembly *assembly;
struct unit *unit;
Expand Down Expand Up @@ -47,7 +40,7 @@ static inline u32 add_token_value(struct context *ctx, union token_value value)
return index;
}

#define report_error2(ctx, code, ln, cl, len, cursor_position, format, ...) \
#define report_error(ctx, code, ln, cl, len, cursor_position, format, ...) \
{ \
_report((ctx), MSG_ERR, ERR_##code, (ln), (cl), (s32)(len), (cursor_position), (format), ##__VA_ARGS__); \
longjmp((ctx)->jmp_error, ERR_##code); \
Expand All @@ -69,7 +62,7 @@ static inline void _report(struct context *ctx, enum builder_msg_type type, s32

bool scan_comment(struct context *ctx, struct token *tok, const char *termminator) {
if (tok->sym == SYM_SHEBANG && ctx->line != 1) {
report_error2(ctx, INVALID_TOKEN, ctx->line, ctx->col, 1, CARET_WORD, "Shebang is allowed only on the first line of the file.");
report_error(ctx, INVALID_TOKEN, ctx->line, ctx->col, 1, CARET_WORD, "Shebang is allowed only on the first line of the file.");
}

const s32 start_ln = ctx->line;
Expand All @@ -86,7 +79,7 @@ bool scan_comment(struct context *ctx, struct token *tok, const char *termminato
return true;
}

report_error2(ctx, UNTERMINATED_COMMENT, start_ln, start_cl, 1, CARET_WORD, "Unterminated comment block starting here:");
report_error(ctx, UNTERMINATED_COMMENT, start_ln, start_cl, 1, CARET_WORD, "Unterminated comment block starting here:");
}

if (strncmp(ctx->c, termminator, terminator_len) == 0) break;
Expand Down Expand Up @@ -245,7 +238,7 @@ static char scan_specch(struct context *ctx) {
const s32 len = ctx->col - start_col;

if (base == 16) {
report_error2(
report_error(
ctx,
NUM_LIT_OVERFLOW,
ctx->line,
Expand All @@ -255,7 +248,7 @@ static char scan_specch(struct context *ctx) {
"Hexadecimal character notation overflow, maximum possible value is 0xFF.",
v);
} else if (base == 8) {
report_error2(
report_error(
ctx,
NUM_LIT_OVERFLOW,
ctx->line,
Expand Down Expand Up @@ -300,7 +293,7 @@ bool scan_string(struct context *ctx, struct token *tok) {
goto DONE;
}
case SYM_EOF: {
report_error2(ctx, UNTERMINATED_STRING, ctx->line, start_col, 1, CARET_WORD, "Unterminated string.");
report_error(ctx, UNTERMINATED_STRING, ctx->line, start_col, 1, CARET_WORD, "Unterminated string.");
}
case '\\':
++ctx->c; // Eat backslash.
Expand Down Expand Up @@ -337,12 +330,12 @@ bool scan_char(struct context *ctx, struct token *tok) {

switch (*ctx->c) {
case '\'': {
report_error2(ctx, EMPTY, ctx->line, ctx->col, 2, CARET_WORD, "Expected character in quotes.");
report_error(ctx, EMPTY, ctx->line, ctx->col, 2, CARET_WORD, "Expected character in quotes.");
}
case '\0': {
report_error2(ctx, UNTERMINATED_STRING, ctx->line, ctx->col, 1, CARET_WORD, "Unterminated character.");
report_error(ctx, UNTERMINATED_STRING, ctx->line, ctx->col, 1, CARET_WORD, "Unterminated character.");
}
case '\\':
case '\\': {
// special character
s32 start_col = ctx->col;

Expand All @@ -355,6 +348,7 @@ bool scan_char(struct context *ctx, struct token *tok) {

tok->location.len += ctx->col - start_col;
break;
}
default:
tok->value_index = add_token_value(ctx, (union token_value){.character = *ctx->c});
tok->location.len += 1;
Expand All @@ -363,7 +357,7 @@ bool scan_char(struct context *ctx, struct token *tok) {

// eat '
if (*ctx->c != '\'') {
report_error2(ctx, UNTERMINATED_STRING, ctx->line, ctx->col, 1, CARET_WORD, "Unterminated character.");
report_error(ctx, UNTERMINATED_STRING, ctx->line, ctx->col, 1, CARET_WORD, "Unterminated character.");
}
tok->location.len += 1;
++ctx->c;
Expand Down Expand Up @@ -408,112 +402,181 @@ inline s32 c_to_number(char c, s32 base) {
#endif
}

static bool is_real(char *c) {
while (*c != SYM_EOF) {
if (*c == '.') return true;
if (!isdigit(*c)) return false;
++c;
}
return false;
}

bool scan_number(struct context *ctx, struct token *tok) {
tok->location.line = ctx->line;
tok->location.col = ctx->col;

s32 start_col = ctx->col;

u64 n = 0;
s32 base = 10;
s32 buf = 0;

if (strncmp(ctx->c, "0x", 2) == 0) {
base = 16;
ctx->c += 2;
ctx->col += 2;
} else if (strncmp(ctx->c, "0b", 2) == 0) {
base = 2;
ctx->c += 2;
ctx->col += 2;
}
// Prescan to findout if we deal with floating point number.
const bool is_floating_point = is_real(ctx->c);

bool overflow = false;
if (is_floating_point) {

const u64 max_base = ULLONG_MAX / base;
double n = 0.0;
double fraction = 0.0;
double divider = 1.0;

while (true) {
if (*(ctx->c) == '.') {
if (!isdigit(*(ctx->c)) && *(ctx->c) != '.') {
return false;
}

// Integral part.
while (true) {
s32 d = c_to_number(*(ctx->c), 10);
if (d == -1) break;
n = n * 10.0 + (double)d;
++ctx->c;
++ctx->col;
}

bassert(*(ctx->c) == '.' && "Expected floating point dot character!");
++ctx->c;
++ctx->col;

if (base != 10) {
report_error2(ctx, INVALID_TOKEN, ctx->line, ctx->col, 1, CARET_WORD, "Unexpected decimal point in number.");
// Fractional part.
while (true) {
s32 d = c_to_number(*(ctx->c), 10);
if (d == -1) break;

fraction = fraction * 10.0 + (double)d;
divider *= 10.0;
++ctx->c;
++ctx->col;
}

// Exponent.
s32 exp_sign = 0;
u64 exp = 0;

if (*(ctx->c) == 'e' || *(ctx->c) == 'E') {
++ctx->c;
++ctx->col;

if (*(ctx->c) == '-') {
exp_sign = -1;
} else if (*(ctx->c) == '+') {
exp_sign = 1;
} else {
report_error(ctx, INVALID_TOKEN, ctx->line, ctx->col, 1, CARET_WORD, "Expected sign of floating point number exponent.");
}

++ctx->c;
++ctx->col;
goto SCAN_DOUBLE;

bool overflow = false;
const u64 max_div = ULLONG_MAX / 10;
const u64 max_mod = ULLONG_MAX % 10;

const s32 exp_start_col = ctx->col;

while (true) {
s32 d = c_to_number(*(ctx->c), 10);
if (d == -1) break;

if (exp > max_div || (exp == max_div && (u64)d > max_mod)) {
exp = 1;
overflow = true;
} else {
exp = exp * 10 + d;
}

++ctx->c;
++ctx->col;
}

const s32 exp_len = ctx->col - exp_start_col;
if (exp_len < 1) {
report_error(ctx, INVALID_TOKEN, ctx->line, exp_start_col, 1, CARET_WORD, "Expected floating point exponent value.");
}

if (overflow) {
report_error(ctx, NUM_LIT_OVERFLOW, ctx->line, exp_start_col, exp_len, CARET_WORD, "Exponent value is too big.");
}
}

buf = c_to_number(*(ctx->c), base);
if (buf == -1) break;
// Suffix.
if (*(ctx->c) == 'f') {
++ctx->c;
++ctx->col;

if (n > max_base || (n == max_base && (u64)buf > ULLONG_MAX % base)) {
n = ULLONG_MAX;
overflow = true;
tok->sym = SYM_FLOAT;
} else {
n = n * base + buf;
tok->sym = SYM_DOUBLE;
}

++ctx->c;
++ctx->col;
}
double number = n + fraction / divider;
if (exp_sign != 0) {
number *= pow(10.0, (double)exp_sign * (double)exp);
}
tok->value_index = add_token_value(ctx, (union token_value){.double_number = number});

const s32 len = ctx->col - start_col;
if (len < 1) return false;
tok->location.len = ctx->col - start_col;

tok->location.len = len;
tok->sym = SYM_NUM;
tok->value_index = add_token_value(ctx, (union token_value){.number = n});
return true;

if (overflow) {
builder_msg(MSG_ERR, ERR_NUM_LIT_OVERFLOW, &tok->location, CARET_WORD, "Number literal exceeds 64bit boundary.");
}
} else {

return true;
s32 base = 10;

if (strncmp(ctx->c, "0x", 2) == 0) {
base = 16;
ctx->c += 2;
ctx->col += 2;
} else if (strncmp(ctx->c, "0b", 2) == 0) {
base = 2;
ctx->c += 2;
ctx->col += 2;
} else if (*ctx->c == '0') {
base = 8;
++ctx->c;
++ctx->col;
}

SCAN_DOUBLE: {
u64 e = 1;
u64 prev_n = 0;
bool overflow = false;
const u64 max_div = ULLONG_MAX / base;

while (true) {
buf = c_to_number(*(ctx->c), 10);
if (buf == -1) break;
u64 n = 0;

prev_n = n;
n = n * 10 + buf;
e *= 10;
while (true) {
s32 d = c_to_number(*(ctx->c), base);
if (d == -1) break;

++ctx->c;
++ctx->col;
if (n > max_div || (n == max_div && (u64)d > ULLONG_MAX % base)) {
n = ULLONG_MAX;
overflow = true;
} else {
n = n * base + d;
}

if (n < prev_n) overflow = true;
}
++ctx->c;
++ctx->col;
}

// valid d. or .d -> minimal 2 characters
const s32 len = ctx->col - start_col;
if (len < 2) return false;
const s32 len = ctx->col - start_col;
if (len < 1) return false;

if (*(ctx->c) == 'f') {
++ctx->c;
++ctx->col;
tok->location.len = len;
tok->sym = SYM_NUM;
tok->value_index = add_token_value(ctx, (union token_value){.number = n});

tok->sym = SYM_FLOAT;
} else {
tok->sym = SYM_DOUBLE;
}
if (overflow) {
builder_msg(MSG_ERR, ERR_NUM_LIT_OVERFLOW, &tok->location, CARET_WORD, "Number literal is too big for u64 type.");
}

const double number = n / (f64)e;
if (number > FLT_MAX) overflow = true;
if (overflow) {
builder_msg(MSG_ERR, ERR_NUM_LIT_OVERFLOW, &tok->location, CARET_WORD, "Number value overflow.");
return true;
}
tok->value_index = add_token_value(ctx, (union token_value){.double_number = number});

tok->location.len = len;
ctx->col += len;

return true;
}
return false;
}

void scan(struct context *ctx) {
Expand Down Expand Up @@ -587,7 +650,7 @@ void scan(struct context *ctx) {
scan_comment(ctx, &tok, sym_strings[SYM_RBCOMMENT]);
goto SCAN;
case SYM_RBCOMMENT: {
report_error(INVALID_TOKEN, STR_FMT " %d:%d unexpected token.", STR_ARG(ctx->unit->name), ctx->line, ctx->col);
report_error(ctx, INVALID_TOKEN, ctx->line, ctx->col, 1, CARET_WORD, "Unexpected end of the comment block.");
}
default:
ctx->col += (s32)len;
Expand All @@ -606,7 +669,7 @@ void scan(struct context *ctx) {
if (scan_char(ctx, &tok)) goto PUSH_TOKEN;

// When symbol is unknown report error
report_error(INVALID_TOKEN, STR_FMT " %d:%d Unexpected token '%c' (%d)", STR_ARG(ctx->unit->name), ctx->line, ctx->col, *ctx->c, *ctx->c);
report_error(ctx, INVALID_TOKEN, ctx->line, ctx->col, 1, CARET_WORD, "Unexpected symbol.");
PUSH_TOKEN:
tok.location.unit = ctx->unit;
tokens_push(ctx->tokens, tok);
Expand Down
5 changes: 5 additions & 0 deletions tests/src/expect_fail/float_invalid1.bl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// @ERR_INVALID_TOKEN@
main :: fn () s32 {
1.0e10;
return 0;
}
5 changes: 5 additions & 0 deletions tests/src/expect_fail/float_invalid2.bl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// @ERR_INVALID_TOKEN@
main :: fn () s32 {
1.0e;
return 0;
}
Loading

0 comments on commit 5a495e6

Please sign in to comment.