-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathirc_lexer.c
218 lines (175 loc) · 5.9 KB
/
irc_lexer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#include <sts_queue/sts_queue.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
#include <stdio.h>
#include "irc_lexer.h"
#include "irc_token.h"
#include "irc_logger.h"
struct irc_lexer {
StsHeader *line_buffer;
char *current_line;
char current_character;
size_t current_line_length;
size_t current_column;
};
static bool __is_space(char character);
static bool __is_colon(char character);
static bool __is_digit(char character);
static bool __is_nospcrlfcl(char character);
static bool __is_letter(char character);
static bool __is_cr(char character);
static bool __is_ln(char character);
static void __advance(struct irc_lexer *lexer);
struct irc_lexer *allocate_irc_lexer(StsHeader *line_buffer) {
struct irc_lexer *lexer;
if ((lexer = (struct irc_lexer *) malloc(sizeof(struct irc_lexer))) == NULL) {
return NULL;
}
lexer->line_buffer = line_buffer;
lexer->current_line = NULL;
lexer->current_line_length = 0;
lexer->current_column = 0;
return lexer;
}
void deallocate_irc_lexer(struct irc_lexer *lexer) {
free(lexer->current_line);
StsQueue.destroy(lexer->line_buffer);
free(lexer);
}
struct irc_token *irc_lexer_get_next_token(struct irc_lexer *lexer) {
struct irc_token *tok;
union irc_token_value tok_value;
char *current_line;
if (lexer->current_line != NULL && lexer->current_column > lexer->current_line_length - 1) {
free(lexer->current_line);
lexer->current_line = NULL;
lexer->current_column = 0;
lexer->current_line_length = 0;
tok_value.character = '\n';
return allocate_irc_token(IRC_TOKEN_EOL, tok_value);
}
if (lexer->current_line == NULL) {
current_line = StsQueue.pop(lexer->line_buffer);
if (current_line == NULL) {
tok_value.character = '\0';
tok = allocate_irc_token(IRC_TOKEN_EOF, tok_value);
return tok;
}
lexer->current_line = current_line;
lexer->current_line_length = strlen(current_line);
lexer->current_character = lexer->current_line[0];
}
if (__is_colon(lexer->current_character)) {
tok_value.character = lexer->current_character;
__advance(lexer);
return allocate_irc_token(IRC_TOKEN_COLON, tok_value);
}
if (__is_digit(lexer->current_character)) {
tok_value.integer = (int) lexer->current_character - (int) '0';
__advance(lexer);
return allocate_irc_token(IRC_TOKEN_DIGIT, tok_value);
}
if (__is_letter(lexer->current_character)) {
tok_value.character = lexer->current_character;
__advance(lexer);
return allocate_irc_token(IRC_TOKEN_LETTER, tok_value);
}
if (__is_nospcrlfcl(lexer->current_character)) {
tok_value.character = lexer->current_character;
__advance(lexer);
return allocate_irc_token(IRC_TOKEN_NOSPCRLFCL, tok_value);
}
if (__is_space(lexer->current_character)) {
tok_value.character = '\x20';
__advance(lexer);
return allocate_irc_token(IRC_TOKEN_SPACE, tok_value);
}
if (__is_cr(lexer->current_character)) {
__advance(lexer);
if (__is_ln(lexer->current_character)) {
tok_value.character = '\0';
return allocate_irc_token(IRC_TOKEN_EOL, tok_value);
} else {
log_error("Lexer: Expecting EOL\n");
}
}
tok_value.character = '\0';
__advance(lexer);
return allocate_irc_token(IRC_TOKEN_NONE, tok_value);
}
char *irc_lexer_get_current_line(struct irc_lexer *lexer) {
char *dest = (char *) calloc(lexer->current_line_length + 1, sizeof(char));
if (dest == NULL)
return NULL;
strncpy(dest, lexer->current_line, lexer->current_line_length);
return dest;
}
struct irc_token *irc_lexer_peek_next_token(struct irc_lexer *lexer) {
struct irc_token *next_token;
char *current_line;
size_t current_line_length;
size_t current_column;
char current_character;
// copy current state
current_line_length = lexer->current_line_length;
current_column = lexer->current_column;
current_character = lexer->current_character;
current_line = (char *) calloc(lexer->current_line_length + 1, sizeof(char));
strncpy(current_line, lexer->current_line, lexer->current_line_length);
// get next token
next_token = irc_lexer_get_next_token(lexer);
// free current line to reset state
free(lexer->current_line);
// Reset state
// There's a problem. We hanlded the case where the next token is on the next
// line of the buffer, and we successfully revert back. The problem is, we
// need a way to put the next line back into the first queue if it switched.
lexer->current_line = current_line;
lexer->current_line_length = current_line_length;
lexer->current_column = current_column;
lexer->current_character = current_character;
return next_token;
}
size_t irc_lexer_get_current_column(struct irc_lexer *lexer) {
return lexer->current_column;
}
size_t irc_lexer_get_current_line_length(struct irc_lexer *lexer) {
return lexer->current_line_length;
}
static bool __is_space(char character) {
return character == '\x20';
}
static bool __is_colon(char character) {
return character == ':';
}
static bool __is_digit(char character) {
return character >= '\x30' && character <= '\x39';
}
static bool __is_nospcrlfcl(char character) {
return
(character >= '\x01' && character <= '\x09') ||
(character >= '\x0B' && character <= '\x0C') ||
(character >= '\x0E' && character <= '\x1F') ||
(character >= '\x21' && character <= '\x39') ||
(character >= '\x3B' && character <= '\x7F');
}
static bool __is_letter(char character) {
return (character >= '\x41' && character <= '\x5A') ||
(character >= '\x61' && character <= '\x7A');
}
void __advance(struct irc_lexer *lexer) {
lexer->current_column++;
if (lexer->current_column > lexer->current_line_length) {
lexer->current_character = '\0';
return;
}
lexer->current_character = lexer->current_line[lexer->current_column];
}
bool __is_cr(char character) {
return character == '\r';
}
bool __is_ln(char character) {
return character == '\n';
}