|
| 1 | +/* |
| 2 | +* Copyright (c) 2003, Peter Strand <peter@zarquon.se> |
| 3 | +* |
| 4 | +* This source code is released for free distribution under the terms of the |
| 5 | +* GNU General Public License version 2 or (at your opinion) any later version. |
| 6 | +* |
| 7 | +* This module contains functions for generating tags for Haskell language |
| 8 | +* files (https://en.wikipedia.org/wiki/Haskell_(programming_language)). |
| 9 | +* |
| 10 | +* Does not handle operators or infix definitions like: |
| 11 | +* a `f` b = ... |
| 12 | +* |
| 13 | +*/ |
| 14 | + |
| 15 | + |
| 16 | +/* |
| 17 | +* INCLUDE FILES |
| 18 | +*/ |
| 19 | + |
| 20 | +#include "general.h" /* must always come first */ |
| 21 | + |
| 22 | +#include <string.h> |
| 23 | + |
| 24 | +#include "parse.h" |
| 25 | +#include "read.h" |
| 26 | +#include "vstring.h" |
| 27 | +#include "routines.h" |
| 28 | + |
| 29 | + |
| 30 | +/* |
| 31 | +* DATA DEFINITIONS |
| 32 | +*/ |
| 33 | +typedef enum { |
| 34 | + K_TYPE, K_CONSTRUCTOR, K_FUNCTION, K_MODULE |
| 35 | +} haskellKind; |
| 36 | + |
| 37 | +static kindDefinition HaskellKinds [] = { |
| 38 | + { true, 't', "type", "types" }, |
| 39 | + { true, 'c', "constructor", "type constructors" }, |
| 40 | + { true, 'f', "function", "functions" }, |
| 41 | + { true, 'm', "module", "modules"} |
| 42 | +}; |
| 43 | + |
| 44 | + |
| 45 | +typedef const unsigned char *custr; |
| 46 | + |
| 47 | +/* |
| 48 | +* FUNCTION DEFINITIONS |
| 49 | +*/ |
| 50 | + |
| 51 | + |
| 52 | +static void skip_rest_of_line(void) |
| 53 | +{ |
| 54 | + int c; |
| 55 | + do { |
| 56 | + c = getcFromInputFile(); |
| 57 | + } while (c != EOF && c != '\n'); |
| 58 | +} |
| 59 | + |
| 60 | +static int get_line(char *buf) |
| 61 | +{ |
| 62 | + int i = 0; |
| 63 | + int c; |
| 64 | + do { |
| 65 | + c = getcFromInputFile(); |
| 66 | + buf[i++] = c; |
| 67 | + } while (c != EOF && c != '\n' && i < 1000); |
| 68 | + buf[i] = '\0'; |
| 69 | + return i; |
| 70 | +} |
| 71 | + |
| 72 | +static int get_next_char(void) |
| 73 | +{ |
| 74 | + int c, nxt; |
| 75 | + c = getcFromInputFile(); |
| 76 | + if (c == EOF) |
| 77 | + return c; |
| 78 | + nxt = getcFromInputFile(); |
| 79 | + if (nxt == EOF) |
| 80 | + return c; |
| 81 | + ungetcToInputFile(nxt); |
| 82 | + |
| 83 | + if (c == '-' && nxt == '-') { |
| 84 | + skip_rest_of_line(); |
| 85 | + return get_next_char(); |
| 86 | + } |
| 87 | + if (c == '{' && nxt == '-') { |
| 88 | + int last = '\0'; |
| 89 | + do { |
| 90 | + last = c; |
| 91 | + c = get_next_char(); |
| 92 | + } while (! (c == EOF || (last == '-' && c == '}'))); |
| 93 | + return get_next_char(); |
| 94 | + } |
| 95 | + return c; |
| 96 | +} |
| 97 | + |
| 98 | +static void add_tag(const char *token, haskellKind kind, vString *name) |
| 99 | +{ |
| 100 | + int i; |
| 101 | + for (i = 0; token[i] != '\0'; ++i) |
| 102 | + vStringPut(name, token[i]); |
| 103 | + |
| 104 | + makeSimpleTag(name, kind); |
| 105 | + vStringClear(name); |
| 106 | +} |
| 107 | + |
| 108 | +static int isident(char c) |
| 109 | +{ |
| 110 | + return isalnum(c) || c == '_' || c == '\'' || c == '$'; |
| 111 | +} |
| 112 | + |
| 113 | +static int get_token(char *token, int n) |
| 114 | +{ |
| 115 | + int c = getcFromInputFile(); |
| 116 | + int i = n; |
| 117 | + while (c != EOF && isident(c) && i < 1000) { |
| 118 | + token[i] = c; |
| 119 | + i++; |
| 120 | + c = getcFromInputFile(); |
| 121 | + } |
| 122 | + token[i] = '\0'; |
| 123 | + if (c == EOF) |
| 124 | + return 0; |
| 125 | + if (i != n) { |
| 126 | + ungetcToInputFile(c); |
| 127 | + return 1; |
| 128 | + } else { |
| 129 | + return 0; |
| 130 | + } |
| 131 | +} |
| 132 | + |
| 133 | +enum Find_State { Find_Eq, Find_Constr, Get_Extr, Find_Extr, Find_Bar }; |
| 134 | + |
| 135 | +static int inside_datatype(vString *name) |
| 136 | +{ |
| 137 | + enum Find_State st = Find_Eq; |
| 138 | + int c; |
| 139 | + char token[1001]; |
| 140 | + |
| 141 | + while (1) { |
| 142 | + if (st == Find_Eq) |
| 143 | + { |
| 144 | + do { |
| 145 | + c = get_next_char(); |
| 146 | + if (c == '\n') { |
| 147 | + c = get_next_char(); |
| 148 | + if (! (c == ' ' || c == '\t')) { |
| 149 | + return c; |
| 150 | + } |
| 151 | + } |
| 152 | + } while (c != EOF && c != '='); |
| 153 | + st = Find_Constr; |
| 154 | + } |
| 155 | + else if (st == Find_Constr) |
| 156 | + { |
| 157 | + do { |
| 158 | + c = get_next_char(); |
| 159 | + } while (isspace(c)); |
| 160 | + if (!isupper(c)) { |
| 161 | + skip_rest_of_line(); |
| 162 | + return '\n'; |
| 163 | + } |
| 164 | + token[0] = c; |
| 165 | + if (!get_token(token, 1)) |
| 166 | + return '\n'; |
| 167 | + add_tag(token, K_CONSTRUCTOR, name); |
| 168 | + st = Find_Extr; |
| 169 | + } |
| 170 | + else if (st == Find_Extr) |
| 171 | + { |
| 172 | + c = get_next_char(); |
| 173 | + if (c == '{') |
| 174 | + st = Get_Extr; |
| 175 | + else if (c == '|') |
| 176 | + st = Find_Constr; |
| 177 | + else if (c == '\n') { |
| 178 | + c = get_next_char(); |
| 179 | + if (! (c == ' ' || c == '\t')) { |
| 180 | + return c; |
| 181 | + } |
| 182 | + } |
| 183 | + else if (!isspace(c)) |
| 184 | + st = Find_Bar; |
| 185 | + } |
| 186 | + else if (st == Get_Extr) |
| 187 | + { |
| 188 | + do { |
| 189 | + c = get_next_char(); |
| 190 | + } while (isspace(c)); |
| 191 | + if (c == EOF) |
| 192 | + return c; |
| 193 | + token[0] = c; |
| 194 | + get_token(token, 1); |
| 195 | + add_tag(token, K_FUNCTION, name); |
| 196 | + do { |
| 197 | + c = get_next_char(); |
| 198 | + if (c == '}') { |
| 199 | + st = Find_Bar; |
| 200 | + break; |
| 201 | + } |
| 202 | + } while (c != EOF && c != ','); |
| 203 | + } |
| 204 | + else if (st == Find_Bar) |
| 205 | + { |
| 206 | + do { |
| 207 | + c = get_next_char(); |
| 208 | + if (c == '\n') { |
| 209 | + c = get_next_char(); |
| 210 | + if (! (c == ' ' || c == '\t')) { |
| 211 | + return c; |
| 212 | + } |
| 213 | + } |
| 214 | + } while (c != EOF && c != '|'); |
| 215 | + st = Find_Constr; |
| 216 | + } |
| 217 | + } |
| 218 | + return '\n'; |
| 219 | +} |
| 220 | + |
| 221 | +static void findHaskellTags (int is_literate) |
| 222 | +{ |
| 223 | + vString *name = vStringNew (); |
| 224 | + char token[1001], arg[1001]; |
| 225 | + int c; |
| 226 | + int in_tex_lit_code = 0; |
| 227 | + c = get_next_char(); |
| 228 | + |
| 229 | + while (c != EOF) |
| 230 | + { |
| 231 | + if (c == '\n') { |
| 232 | + c = get_next_char(); |
| 233 | + continue; |
| 234 | + } |
| 235 | + |
| 236 | + if (isspace(c)) { |
| 237 | + skip_rest_of_line(); |
| 238 | + c = get_next_char(); |
| 239 | + continue; |
| 240 | + } |
| 241 | + if (is_literate && !in_tex_lit_code) { |
| 242 | + if (c == '>') { |
| 243 | + c = getcFromInputFile(); |
| 244 | + if (c == ' ') { |
| 245 | + c = get_next_char(); |
| 246 | + if (!isident(c)) { |
| 247 | + skip_rest_of_line(); |
| 248 | + c = get_next_char(); |
| 249 | + continue; |
| 250 | + } |
| 251 | + } else { |
| 252 | + skip_rest_of_line(); |
| 253 | + c = get_next_char(); |
| 254 | + continue; |
| 255 | + } |
| 256 | + } else if (c == '\\') { |
| 257 | + int n = get_line(token); |
| 258 | + if (strncmp(token, "begin{code}", 11) == 0) { |
| 259 | + in_tex_lit_code = 1; |
| 260 | + c = get_next_char(); |
| 261 | + continue; |
| 262 | + } else { |
| 263 | + if (n > 0 && token[n-1] != '\n') |
| 264 | + skip_rest_of_line(); |
| 265 | + else |
| 266 | + c = get_next_char(); |
| 267 | + } |
| 268 | + continue; |
| 269 | + } else { |
| 270 | + skip_rest_of_line(); |
| 271 | + c = get_next_char(); |
| 272 | + continue; |
| 273 | + } |
| 274 | + } |
| 275 | + if (is_literate && in_tex_lit_code && c == '\\') { |
| 276 | + if (strncmp(token, "end{code}", 9) == 0) { |
| 277 | + in_tex_lit_code = 0; |
| 278 | + c = get_next_char(); |
| 279 | + continue; |
| 280 | + } |
| 281 | + } |
| 282 | + token[0] = c; |
| 283 | + if (!isident(c)) { |
| 284 | + skip_rest_of_line(); |
| 285 | + c = get_next_char(); |
| 286 | + continue; |
| 287 | + } |
| 288 | + if (!get_token(token, 1)) { |
| 289 | + c = get_next_char(); |
| 290 | + continue; |
| 291 | + } |
| 292 | + do { |
| 293 | + if ((c = getcFromInputFile()) == EOF) |
| 294 | + return; |
| 295 | + } while (c == ' ' || c == '\t'); |
| 296 | + arg[0] = c; |
| 297 | + get_token(arg, 1); |
| 298 | + if (strcmp(token, "data") == 0 || strcmp(token, "newtype") == 0) { |
| 299 | + add_tag(arg, K_TYPE, name); |
| 300 | + c = inside_datatype(name); |
| 301 | + continue; |
| 302 | + } |
| 303 | + if (strcmp(token, "type") == 0) |
| 304 | + add_tag(arg, K_TYPE, name); |
| 305 | + else if (strcmp(token, "module") == 0) |
| 306 | + add_tag(arg, K_MODULE, name); |
| 307 | + else if (strcmp(token, "instance") == 0 || |
| 308 | + strcmp(token, "foreign") == 0 || |
| 309 | + strcmp(token, "import") == 0) |
| 310 | + ; |
| 311 | + else { |
| 312 | + if (arg[0] != ':') |
| 313 | + add_tag(token, K_FUNCTION, name); |
| 314 | + } |
| 315 | + skip_rest_of_line(); |
| 316 | + c = get_next_char(); |
| 317 | + } |
| 318 | + vStringDelete(name); |
| 319 | +} |
| 320 | + |
| 321 | +static void findNormalHaskellTags (void) |
| 322 | +{ |
| 323 | + findHaskellTags (0); |
| 324 | +} |
| 325 | + |
| 326 | +static void findLiterateHaskellTags (void) |
| 327 | +{ |
| 328 | + findHaskellTags (1); |
| 329 | +} |
| 330 | + |
| 331 | +extern parserDefinition* HaskellParser (void) |
| 332 | +{ |
| 333 | + static const char *const extensions [] = { "hs", NULL }; |
| 334 | + parserDefinition* def = parserNew ("Haskell"); |
| 335 | + |
| 336 | + def->kindTable = HaskellKinds; |
| 337 | + def->kindCount = ARRAY_SIZE(HaskellKinds); |
| 338 | + def->extensions = extensions; |
| 339 | + def->parser = findNormalHaskellTags; |
| 340 | + return def; |
| 341 | +} |
| 342 | + |
| 343 | +extern parserDefinition* LiterateHaskellParser (void) |
| 344 | +{ |
| 345 | + static const char *const extensions [] = { "lhs", NULL }; |
| 346 | + parserDefinition* def = parserNew ("LiterateHaskell"); |
| 347 | + def->kindTable = HaskellKinds; |
| 348 | + def->kindCount = ARRAY_SIZE(HaskellKinds); |
| 349 | + def->extensions = extensions; |
| 350 | + def->parser = findLiterateHaskellTags; |
| 351 | + return def; |
| 352 | +} |
0 commit comments