Line data Source code
1 : /*
2 : ** $Id: llex.c,v 2.96.1.1 2017/04/19 17:20:42 roberto Exp $
3 : ** Lexical Analyzer
4 : ** See Copyright Notice in lua.h
5 : */
6 :
7 : #define llex_c
8 : #define LUA_CORE
9 :
10 : #include "lprefix.h"
11 :
12 :
13 : #include <locale.h>
14 : #include <string.h>
15 :
16 : #include "lua.h"
17 :
18 : #include "lctype.h"
19 : #include "ldebug.h"
20 : #include "ldo.h"
21 : #include "lgc.h"
22 : #include "llex.h"
23 : #include "lobject.h"
24 : #include "lparser.h"
25 : #include "lstate.h"
26 : #include "lstring.h"
27 : #include "ltable.h"
28 : #include "lzio.h"
29 :
30 :
31 :
32 : #define next(ls) (ls->current = zgetc(ls->z))
33 :
34 :
35 :
36 : #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
37 :
38 :
39 : /* ORDER RESERVED */
40 : static const char *const luaX_tokens [] = {
41 : "and", "break", "do", "else", "elseif",
42 : "end", "false", "for", "function", "goto", "if",
43 : "in", "local", "nil", "not", "or", "repeat",
44 : "return", "then", "true", "until", "while",
45 : "//", "..", "...", "==", ">=", "<=", "~=",
46 : "<<", ">>", "::", "<eof>",
47 : "<number>", "<integer>", "<name>", "<string>"
48 : };
49 :
50 :
51 : #define save_and_next(ls) (save(ls, ls->current), next(ls))
52 :
53 :
54 : static l_noret lexerror (LexState *ls, const char *msg, int token);
55 :
56 :
57 345315 : static void save (LexState *ls, int c) {
58 345315 : Mbuffer *b = ls->buff;
59 345315 : if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
60 : size_t newsize;
61 76 : if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
62 0 : lexerror(ls, "lexical element too long", 0);
63 76 : newsize = luaZ_sizebuffer(b) * 2;
64 76 : luaZ_resizebuffer(ls->L, b, newsize);
65 : }
66 345315 : b->buffer[luaZ_bufflen(b)++] = cast(char, c);
67 345315 : }
68 :
69 :
70 106 : void luaX_init (lua_State *L) {
71 : int i;
72 106 : TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */
73 106 : luaC_fix(L, obj2gco(e)); /* never collect this name */
74 2438 : for (i=0; i<NUM_RESERVED; i++) {
75 2332 : TString *ts = luaS_new(L, luaX_tokens[i]);
76 2332 : luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
77 2332 : ts->extra = cast_byte(i+1); /* reserved word */
78 : }
79 106 : }
80 :
81 :
82 35 : const char *luaX_token2str (LexState *ls, int token) {
83 35 : if (token < FIRST_RESERVED) { /* single-byte symbols? */
84 : lua_assert(token == cast_uchar(token));
85 13 : return luaO_pushfstring(ls->L, "'%c'", token);
86 : }
87 : else {
88 22 : const char *s = luaX_tokens[token - FIRST_RESERVED];
89 22 : if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
90 11 : return luaO_pushfstring(ls->L, "'%s'", s);
91 : else /* names, strings, and numerals */
92 11 : return s;
93 : }
94 : }
95 :
96 :
97 37 : static const char *txtToken (LexState *ls, int token) {
98 37 : switch (token) {
99 14 : case TK_NAME: case TK_STRING:
100 : case TK_FLT: case TK_INT:
101 14 : save(ls, '\0');
102 14 : return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
103 23 : default:
104 23 : return luaX_token2str(ls, token);
105 : }
106 : }
107 :
108 :
109 41 : static l_noret lexerror (LexState *ls, const char *msg, int token) {
110 41 : msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
111 41 : if (token)
112 37 : luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
113 41 : luaD_throw(ls->L, LUA_ERRSYNTAX);
114 : }
115 :
116 :
117 26 : l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
118 26 : lexerror(ls, msg, ls->t.token);
119 : }
120 :
121 :
122 : /*
123 : ** creates a new string and anchors it in scanner's table so that
124 : ** it will not be collected until the end of the compilation
125 : ** (by that time it should be anchored somewhere)
126 : */
127 55514 : TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
128 55514 : lua_State *L = ls->L;
129 : TValue *o; /* entry for 'str' */
130 55514 : TString *ts = luaS_newlstr(L, str, l); /* create new string */
131 55514 : setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */
132 55514 : o = luaH_set(L, ls->h, L->top - 1);
133 55514 : if (ttisnil(o)) { /* not in use yet? */
134 : /* boolean value does not need GC barrier;
135 : table has no metatable, so it does not need to invalidate cache */
136 14702 : setbvalue(o, 1); /* t[string] = true */
137 14702 : luaC_checkGC(L);
138 : }
139 : else { /* string already present */
140 40812 : ts = tsvalue(keyfromval(o)); /* re-use value previously stored */
141 : }
142 55514 : L->top--; /* remove string from stack */
143 55514 : return ts;
144 : }
145 :
146 :
147 : /*
148 : ** increment line number and skips newline sequence (any of
149 : ** \n, \r, \n\r, or \r\n)
150 : */
151 26264 : static void inclinenumber (LexState *ls) {
152 26264 : int old = ls->current;
153 : lua_assert(currIsNewline(ls));
154 26264 : next(ls); /* skip '\n' or '\r' */
155 26264 : if (currIsNewline(ls) && ls->current != old)
156 0 : next(ls); /* skip '\n\r' or '\r\n' */
157 26264 : if (++ls->linenumber >= MAX_INT)
158 0 : lexerror(ls, "chunk has too many lines", 0);
159 26264 : }
160 :
161 :
162 508 : void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
163 : int firstchar) {
164 508 : ls->t.token = 0;
165 508 : ls->L = L;
166 508 : ls->current = firstchar;
167 508 : ls->lookahead.token = TK_EOS; /* no look-ahead token */
168 508 : ls->z = z;
169 508 : ls->fs = NULL;
170 508 : ls->linenumber = 1;
171 508 : ls->lastline = 1;
172 508 : ls->source = source;
173 508 : ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */
174 508 : luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
175 508 : }
176 :
177 :
178 :
179 : /*
180 : ** =======================================================
181 : ** LEXICAL ANALYZER
182 : ** =======================================================
183 : */
184 :
185 :
186 13007 : static int check_next1 (LexState *ls, int c) {
187 13007 : if (ls->current == c) {
188 3199 : next(ls);
189 3199 : return 1;
190 : }
191 9808 : else return 0;
192 : }
193 :
194 :
195 : /*
196 : ** Check whether current char is in set 'set' (with two chars) and
197 : ** saves it
198 : */
199 6384 : static int check_next2 (LexState *ls, const char *set) {
200 : lua_assert(set[2] == '\0');
201 6384 : if (ls->current == set[0] || ls->current == set[1]) {
202 121 : save_and_next(ls);
203 121 : return 1;
204 : }
205 6263 : else return 0;
206 : }
207 :
208 :
209 : /* LUA_NUMBER */
210 : /*
211 : ** this function is quite liberal in what it accepts, as 'luaO_str2num'
212 : ** will reject ill-formed numerals.
213 : */
214 3710 : static int read_numeral (LexState *ls, SemInfo *seminfo) {
215 : TValue obj;
216 3710 : const char *expo = "Ee";
217 3710 : int first = ls->current;
218 : lua_assert(lisdigit(ls->current));
219 3710 : save_and_next(ls);
220 3710 : if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */
221 108 : expo = "Pp";
222 : for (;;) {
223 5466 : if (check_next2(ls, expo)) /* exponent part? */
224 10 : check_next2(ls, "-+"); /* optional exponent sign */
225 5466 : if (lisxdigit(ls->current))
226 1546 : save_and_next(ls);
227 3920 : else if (ls->current == '.')
228 210 : save_and_next(ls);
229 3710 : else break;
230 : }
231 3710 : save(ls, '\0');
232 3710 : if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */
233 1 : lexerror(ls, "malformed number", TK_FLT);
234 3709 : if (ttisinteger(&obj)) {
235 3493 : seminfo->i = ivalue(&obj);
236 3493 : return TK_INT;
237 : }
238 : else {
239 : lua_assert(ttisfloat(&obj));
240 216 : seminfo->r = fltvalue(&obj);
241 216 : return TK_FLT;
242 : }
243 : }
244 :
245 :
246 : /*
247 : ** reads a sequence '[=*[' or ']=*]', leaving the last bracket.
248 : ** If sequence is well formed, return its number of '='s + 2; otherwise,
249 : ** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...').
250 : */
251 2043 : static size_t skip_sep (LexState *ls) {
252 2043 : size_t count = 0;
253 2043 : int s = ls->current;
254 : lua_assert(s == '[' || s == ']');
255 2043 : save_and_next(ls);
256 2056 : while (ls->current == '=') {
257 13 : save_and_next(ls);
258 13 : count++;
259 : }
260 2043 : return (ls->current == s) ? count + 2
261 2796 : : (count == 0) ? 1
262 753 : : 0;
263 :
264 : }
265 :
266 :
267 645 : static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
268 645 : int line = ls->linenumber; /* initial line (for error message) */
269 645 : save_and_next(ls); /* skip 2nd '[' */
270 645 : if (currIsNewline(ls)) /* string starts with a newline? */
271 273 : inclinenumber(ls); /* skip it */
272 : for (;;) {
273 47176 : switch (ls->current) {
274 2 : case EOZ: { /* error */
275 2 : const char *what = (seminfo ? "string" : "comment");
276 2 : const char *msg = luaO_pushfstring(ls->L,
277 : "unfinished long %s (starting at line %d)", what, line);
278 2 : lexerror(ls, msg, TK_EOS);
279 : break; /* to avoid warnings */
280 : }
281 650 : case ']': {
282 650 : if (skip_sep(ls) == sep) {
283 643 : save_and_next(ls); /* skip 2nd ']' */
284 643 : goto endloop;
285 : }
286 7 : break;
287 : }
288 2015 : case '\n': case '\r': {
289 2015 : save(ls, '\n');
290 2015 : inclinenumber(ls);
291 2015 : if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
292 2015 : break;
293 : }
294 44509 : default: {
295 44509 : if (seminfo) save_and_next(ls);
296 38555 : else next(ls);
297 : }
298 : }
299 643 : } endloop:
300 643 : if (seminfo)
301 157 : seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
302 157 : luaZ_bufflen(ls->buff) - 2 * sep);
303 643 : }
304 :
305 :
306 585 : static void esccheck (LexState *ls, int c, const char *msg) {
307 585 : if (!c) {
308 6 : if (ls->current != EOZ)
309 6 : save_and_next(ls); /* add current to buffer for error message */
310 6 : lexerror(ls, msg, TK_STRING);
311 : }
312 579 : }
313 :
314 :
315 187 : static int gethexa (LexState *ls) {
316 187 : save_and_next(ls);
317 187 : esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
318 185 : return luaO_hexavalue(ls->current);
319 : }
320 :
321 :
322 66 : static int readhexaesc (LexState *ls) {
323 66 : int r = gethexa(ls);
324 65 : r = (r << 4) + gethexa(ls);
325 65 : luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */
326 65 : return r;
327 : }
328 :
329 :
330 56 : static unsigned long readutf8esc (LexState *ls) {
331 : unsigned long r;
332 56 : int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */
333 56 : save_and_next(ls); /* skip 'u' */
334 56 : esccheck(ls, ls->current == '{', "missing '{'");
335 56 : r = gethexa(ls); /* must have at least one digit */
336 211 : while ((save_and_next(ls), lisxdigit(ls->current))) {
337 157 : i++;
338 157 : r = (r << 4) + luaO_hexavalue(ls->current);
339 157 : esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
340 : }
341 54 : esccheck(ls, ls->current == '}', "missing '}'");
342 53 : next(ls); /* skip '}' */
343 53 : luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */
344 53 : return r;
345 : }
346 :
347 :
348 56 : static void utf8esc (LexState *ls) {
349 : char buff[UTF8BUFFSZ];
350 56 : int n = luaO_utf8esc(buff, readutf8esc(ls));
351 204 : for (; n > 0; n--) /* add 'buff' to string */
352 151 : save(ls, buff[UTF8BUFFSZ - n]);
353 53 : }
354 :
355 :
356 65 : static int readdecesc (LexState *ls) {
357 : int i;
358 65 : int r = 0; /* result accumulator */
359 172 : for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */
360 107 : r = 10*r + ls->current - '0';
361 107 : save_and_next(ls);
362 : }
363 65 : esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
364 64 : luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */
365 64 : return r;
366 : }
367 :
368 :
369 7610 : static void read_string (LexState *ls, int del, SemInfo *seminfo) {
370 7610 : save_and_next(ls); /* keep delimiter (for error messages) */
371 91925 : while (ls->current != del) {
372 84326 : switch (ls->current) {
373 3 : case EOZ:
374 3 : lexerror(ls, "unfinished string", TK_EOS);
375 : break; /* to avoid warnings */
376 2 : case '\n':
377 : case '\r':
378 2 : lexerror(ls, "unfinished string", TK_STRING);
379 : break; /* to avoid warnings */
380 511 : case '\\': { /* escape sequences */
381 : int c; /* final character to be saved */
382 511 : save_and_next(ls); /* keep '\\' for error messages */
383 511 : switch (ls->current) {
384 2 : case 'a': c = '\a'; goto read_save;
385 8 : case 'b': c = '\b'; goto read_save;
386 12 : case 'f': c = '\f'; goto read_save;
387 59 : case 'n': c = '\n'; goto read_save;
388 17 : case 'r': c = '\r'; goto read_save;
389 182 : case 't': c = '\t'; goto read_save;
390 2 : case 'v': c = '\v'; goto read_save;
391 66 : case 'x': c = readhexaesc(ls); goto read_save;
392 56 : case 'u': utf8esc(ls); goto no_save;
393 1 : case '\n': case '\r':
394 1 : inclinenumber(ls); c = '\n'; goto only_save;
395 37 : case '\\': case '\"': case '\'':
396 37 : c = ls->current; goto read_save;
397 1 : case EOZ: goto no_save; /* will raise an error next loop */
398 2 : case 'z': { /* zap following span of spaces */
399 2 : luaZ_buffremove(ls->buff, 1); /* remove '\\' */
400 2 : next(ls); /* skip the 'z' */
401 8 : while (lisspace(ls->current)) {
402 6 : if (currIsNewline(ls)) inclinenumber(ls);
403 5 : else next(ls);
404 : }
405 2 : goto no_save;
406 : }
407 66 : default: {
408 66 : esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
409 65 : c = readdecesc(ls); /* digital escape '\ddd' */
410 64 : goto only_save;
411 : }
412 : }
413 384 : read_save:
414 384 : next(ls);
415 : /* go through */
416 449 : only_save:
417 449 : luaZ_buffremove(ls->buff, 1); /* remove '\\' */
418 449 : save(ls, c);
419 : /* go through */
420 505 : no_save: break;
421 : }
422 83810 : default:
423 83810 : save_and_next(ls);
424 : }
425 : }
426 7599 : save_and_next(ls); /* skip delimiter */
427 15198 : seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
428 7599 : luaZ_bufflen(ls->buff) - 2);
429 7599 : }
430 :
431 :
432 104605 : static int llex (LexState *ls, SemInfo *seminfo) {
433 104605 : luaZ_resetbuffer(ls->buff);
434 : for (;;) {
435 272913 : switch (ls->current) {
436 23974 : case '\n': case '\r': { /* line breaks */
437 23974 : inclinenumber(ls);
438 23974 : break;
439 : }
440 141699 : case ' ': case '\f': case '\t': case '\v': { /* spaces */
441 141699 : next(ls);
442 141699 : break;
443 : }
444 3025 : case '-': { /* '-' or '--' (comment) */
445 3025 : next(ls);
446 3025 : if (ls->current != '-') return '-';
447 : /* else is a comment */
448 2636 : next(ls);
449 2636 : if (ls->current == '[') { /* long comment? */
450 487 : size_t sep = skip_sep(ls);
451 487 : luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */
452 487 : if (sep >= 2) {
453 487 : read_long_string(ls, NULL, sep); /* skip long comment */
454 486 : luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
455 486 : break;
456 : }
457 : }
458 : /* else short comment */
459 43425 : while (!currIsNewline(ls) && ls->current != EOZ)
460 41276 : next(ls); /* skip until end of line (or end of file) */
461 2149 : break;
462 : }
463 906 : case '[': { /* long string or simply '[' */
464 906 : size_t sep = skip_sep(ls);
465 906 : if (sep >= 2) {
466 158 : read_long_string(ls, seminfo, sep);
467 157 : return TK_STRING;
468 : }
469 748 : else if (sep == 0) /* '[=...' missing second bracket */
470 1 : lexerror(ls, "invalid long string delimiter", TK_STRING);
471 747 : return '[';
472 : }
473 5013 : case '=': {
474 5013 : next(ls);
475 5013 : if (check_next1(ls, '=')) return TK_EQ;
476 4588 : else return '=';
477 : }
478 147 : case '<': {
479 147 : next(ls);
480 147 : if (check_next1(ls, '=')) return TK_LE;
481 65 : else if (check_next1(ls, '<')) return TK_SHL;
482 45 : else return '<';
483 : }
484 253 : case '>': {
485 253 : next(ls);
486 253 : if (check_next1(ls, '=')) return TK_GE;
487 56 : else if (check_next1(ls, '>')) return TK_SHR;
488 36 : else return '>';
489 : }
490 68 : case '/': {
491 68 : next(ls);
492 68 : if (check_next1(ls, '/')) return TK_IDIV;
493 39 : else return '/';
494 : }
495 265 : case '~': {
496 265 : next(ls);
497 265 : if (check_next1(ls, '=')) return TK_NE;
498 32 : else return '~';
499 : }
500 569 : case ':': {
501 569 : next(ls);
502 569 : if (check_next1(ls, ':')) return TK_DBCOLON;
503 553 : else return ':';
504 : }
505 7610 : case '"': case '\'': { /* short literal strings */
506 7610 : read_string(ls, ls->current, seminfo);
507 7599 : return TK_STRING;
508 : }
509 4433 : case '.': { /* '.', '..', '...', or number */
510 4433 : save_and_next(ls);
511 4433 : if (check_next1(ls, '.')) {
512 2138 : if (check_next1(ls, '.'))
513 39 : return TK_DOTS; /* '...' */
514 2099 : else return TK_CONCAT; /* '..' */
515 : }
516 2295 : else if (!lisdigit(ls->current)) return '.';
517 1 : else return read_numeral(ls, seminfo);
518 : }
519 3709 : case '0': case '1': case '2': case '3': case '4':
520 : case '5': case '6': case '7': case '8': case '9': {
521 3709 : return read_numeral(ls, seminfo);
522 : }
523 475 : case EOZ: {
524 475 : return TK_EOS;
525 : }
526 80767 : default: {
527 80767 : if (lislalpha(ls->current)) { /* identifier or reserved word? */
528 : TString *ts;
529 : do {
530 219561 : save_and_next(ls);
531 219561 : } while (lislalnum(ls->current));
532 46778 : ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
533 46778 : luaZ_bufflen(ls->buff));
534 46778 : seminfo->ts = ts;
535 46778 : if (isreserved(ts)) /* reserved word? */
536 15525 : return ts->extra - 1 + FIRST_RESERVED;
537 : else {
538 31253 : return TK_NAME;
539 : }
540 : }
541 : else { /* single-char tokens (+ - / ...) */
542 33989 : int c = ls->current;
543 33989 : next(ls);
544 33989 : return c;
545 : }
546 : }
547 : }
548 : }
549 : }
550 :
551 :
552 104605 : void luaX_next (LexState *ls) {
553 104605 : ls->lastline = ls->linenumber;
554 104605 : if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
555 516 : ls->t = ls->lookahead; /* use this one */
556 516 : ls->lookahead.token = TK_EOS; /* and discharge it */
557 : }
558 : else
559 104089 : ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
560 104590 : }
561 :
562 :
563 516 : int luaX_lookahead (LexState *ls) {
564 : lua_assert(ls->lookahead.token == TK_EOS);
565 516 : ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
566 516 : return ls->lookahead.token;
567 : }
568 :
|