Line data Source code
1 : /*
2 : ** $Id: llex.c,v 2.63.1.3 2015/02/09 17:56:34 roberto Exp $
3 : ** Lexical Analyzer
4 : ** See Copyright Notice in lua.h
5 : */
6 :
7 :
8 : #include <locale.h>
9 : #include <string.h>
10 :
11 : #define llex_c
12 : #define LUA_CORE
13 :
14 : #include "lua.h"
15 :
16 : #include "lctype.h"
17 : #include "ldo.h"
18 : #include "llex.h"
19 : #include "lobject.h"
20 : #include "lparser.h"
21 : #include "lstate.h"
22 : #include "lstring.h"
23 : #include "ltable.h"
24 : #include "lzio.h"
25 :
26 :
27 :
28 : #define next(ls) (ls->current = zgetc(ls->z))
29 :
30 :
31 :
32 : #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
33 :
34 :
35 : /* ORDER RESERVED */
36 : static const char *const luaX_tokens [] = {
37 : "and", "break", "do", "else", "elseif",
38 : "end", "false", "for", "function", "goto", "if",
39 : "in", "local", "nil", "not", "or", "repeat",
40 : "return", "then", "true", "until", "while",
41 : "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
42 : "<number>", "<name>", "<string>"
43 : };
44 :
45 :
46 : #define save_and_next(ls) (save(ls, ls->current), next(ls))
47 :
48 :
49 : static l_noret lexerror (LexState *ls, const char *msg, int token);
50 :
51 :
52 330250 : static void save (LexState *ls, int c) {
53 330250 : Mbuffer *b = ls->buff;
54 330250 : if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
55 : size_t newsize;
56 63 : if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
57 0 : lexerror(ls, "lexical element too long", 0);
58 63 : newsize = luaZ_sizebuffer(b) * 2;
59 63 : luaZ_resizebuffer(ls->L, b, newsize);
60 : }
61 330250 : b->buffer[luaZ_bufflen(b)++] = cast(char, c);
62 330250 : }
63 :
64 :
65 104 : void luaX_init (lua_State *L) {
66 : int i;
67 2392 : for (i=0; i<NUM_RESERVED; i++) {
68 2288 : TString *ts = luaS_new(L, luaX_tokens[i]);
69 2288 : luaS_fix(ts); /* reserved words are never collected */
70 2288 : ts->tsv.extra = cast_byte(i+1); /* reserved word */
71 : }
72 104 : }
73 :
74 :
75 35 : const char *luaX_token2str (LexState *ls, int token) {
76 35 : if (token < FIRST_RESERVED) { /* single-byte symbols? */
77 : lua_assert(token == cast(unsigned char, token));
78 13 : return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
79 0 : luaO_pushfstring(ls->L, "char(%d)", token);
80 : }
81 : else {
82 22 : const char *s = luaX_tokens[token - FIRST_RESERVED];
83 22 : if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
84 11 : return luaO_pushfstring(ls->L, LUA_QS, s);
85 : else /* names, strings, and numerals */
86 11 : return s;
87 : }
88 : }
89 :
90 :
91 34 : static const char *txtToken (LexState *ls, int token) {
92 34 : switch (token) {
93 11 : case TK_NAME:
94 : case TK_STRING:
95 : case TK_NUMBER:
96 11 : save(ls, '\0');
97 11 : return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
98 23 : default:
99 23 : return luaX_token2str(ls, token);
100 : }
101 : }
102 :
103 :
104 38 : static l_noret lexerror (LexState *ls, const char *msg, int token) {
105 : char buff[LUA_IDSIZE];
106 38 : luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
107 38 : msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
108 38 : if (token)
109 34 : luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
110 38 : luaD_throw(ls->L, LUA_ERRSYNTAX);
111 : }
112 :
113 :
114 26 : l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
115 26 : lexerror(ls, msg, ls->t.token);
116 : }
117 :
118 :
119 : /*
120 : ** creates a new string and anchors it in function's table so that
121 : ** it will not be collected until the end of the function's compilation
122 : ** (by that time it should be anchored in function's prototype)
123 : */
124 54104 : TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
125 54104 : lua_State *L = ls->L;
126 : TValue *o; /* entry for `str' */
127 54104 : TString *ts = luaS_newlstr(L, str, l); /* create new string */
128 54104 : setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */
129 54104 : o = luaH_set(L, ls->fs->h, L->top - 1);
130 54104 : if (ttisnil(o)) { /* not in use yet? (see 'addK') */
131 : /* boolean value does not need GC barrier;
132 : table has no metatable, so it does not need to invalidate cache */
133 27647 : setbvalue(o, 1); /* t[string] = true */
134 27647 : luaC_checkGC(L);
135 : }
136 : else { /* string already present */
137 26457 : ts = rawtsvalue(keyfromval(o)); /* re-use value previously stored */
138 : }
139 54104 : L->top--; /* remove string from stack */
140 54104 : return ts;
141 : }
142 :
143 :
144 : /*
145 : ** increment line number and skips newline sequence (any of
146 : ** \n, \r, \n\r, or \r\n)
147 : */
148 25120 : static void inclinenumber (LexState *ls) {
149 25120 : int old = ls->current;
150 : lua_assert(currIsNewline(ls));
151 25120 : next(ls); /* skip `\n' or `\r' */
152 25120 : if (currIsNewline(ls) && ls->current != old)
153 0 : next(ls); /* skip `\n\r' or `\r\n' */
154 25120 : if (++ls->linenumber >= MAX_INT)
155 0 : lexerror(ls, "chunk has too many lines", 0);
156 25120 : }
157 :
158 :
159 495 : void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
160 : int firstchar) {
161 495 : ls->decpoint = '.';
162 495 : ls->L = L;
163 495 : ls->current = firstchar;
164 495 : ls->lookahead.token = TK_EOS; /* no look-ahead token */
165 495 : ls->z = z;
166 495 : ls->fs = NULL;
167 495 : ls->linenumber = 1;
168 495 : ls->lastline = 1;
169 495 : ls->source = source;
170 495 : ls->envn = luaS_new(L, LUA_ENV); /* create env name */
171 495 : luaS_fix(ls->envn); /* never collect this name */
172 495 : luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
173 495 : }
174 :
175 :
176 :
177 : /*
178 : ** =======================================================
179 : ** LEXICAL ANALYZER
180 : ** =======================================================
181 : */
182 :
183 :
184 :
185 12314 : static int check_next (LexState *ls, const char *set) {
186 12314 : if (ls->current == '\0' || !strchr(set, ls->current))
187 10042 : return 0;
188 2272 : save_and_next(ls);
189 2272 : return 1;
190 : }
191 :
192 :
193 : /*
194 : ** change all characters 'from' in buffer to 'to'
195 : */
196 3379 : static void buffreplace (LexState *ls, char from, char to) {
197 3379 : size_t n = luaZ_bufflen(ls->buff);
198 3379 : char *p = luaZ_buffer(ls->buff);
199 11827 : while (n--)
200 8448 : if (p[n] == from) p[n] = to;
201 3379 : }
202 :
203 :
204 : #if !defined(getlocaledecpoint)
205 : #define getlocaledecpoint() (localeconv()->decimal_point[0])
206 : #endif
207 :
208 :
209 : #define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
210 :
211 : /*
212 : ** in case of format error, try to change decimal point separator to
213 : ** the one defined in the current locale and check again
214 : */
215 1 : static void trydecpoint (LexState *ls, SemInfo *seminfo) {
216 1 : char old = ls->decpoint;
217 1 : ls->decpoint = getlocaledecpoint();
218 1 : buffreplace(ls, old, ls->decpoint); /* try new decimal separator */
219 1 : if (!buff2d(ls->buff, &seminfo->r)) {
220 : /* format error with correct decimal point: no more options */
221 1 : buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */
222 1 : lexerror(ls, "malformed number", TK_NUMBER);
223 : }
224 0 : }
225 :
226 :
227 : /* LUA_NUMBER */
228 : /*
229 : ** this function is quite liberal in what it accepts, as 'luaO_str2d'
230 : ** will reject ill-formed numerals.
231 : */
232 3377 : static void read_numeral (LexState *ls, SemInfo *seminfo) {
233 3377 : const char *expo = "Ee";
234 3377 : int first = ls->current;
235 : lua_assert(lisdigit(ls->current));
236 3377 : save_and_next(ls);
237 3377 : if (first == '0' && check_next(ls, "Xx")) /* hexadecimal? */
238 82 : expo = "Pp";
239 : for (;;) {
240 4957 : if (check_next(ls, expo)) /* exponent part? */
241 10 : check_next(ls, "+-"); /* optional exponent sign */
242 4957 : if (lisxdigit(ls->current) || ls->current == '.')
243 1580 : save_and_next(ls);
244 : else break;
245 : }
246 3377 : save(ls, '\0');
247 3377 : buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */
248 3377 : if (!buff2d(ls->buff, &seminfo->r)) /* format error? */
249 1 : trydecpoint(ls, seminfo); /* try to update decimal point separator */
250 3376 : }
251 :
252 :
253 : /*
254 : ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
255 : ** -1 if sequence is malformed
256 : */
257 1935 : static int skip_sep (LexState *ls) {
258 1935 : int count = 0;
259 1935 : int s = ls->current;
260 : lua_assert(s == '[' || s == ']');
261 1935 : save_and_next(ls);
262 1948 : while (ls->current == '=') {
263 13 : save_and_next(ls);
264 13 : count++;
265 : }
266 1935 : return (ls->current == s) ? count : (-count) - 1;
267 : }
268 :
269 :
270 592 : static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
271 592 : save_and_next(ls); /* skip 2nd `[' */
272 592 : if (currIsNewline(ls)) /* string starts with a newline? */
273 223 : inclinenumber(ls); /* skip it */
274 : for (;;) {
275 44177 : switch (ls->current) {
276 2 : case EOZ:
277 2 : lexerror(ls, (seminfo) ? "unfinished long string" :
278 : "unfinished long comment", TK_EOS);
279 : break; /* to avoid warnings */
280 597 : case ']': {
281 597 : if (skip_sep(ls) == sep) {
282 590 : save_and_next(ls); /* skip 2nd `]' */
283 590 : goto endloop;
284 : }
285 7 : break;
286 : }
287 1915 : case '\n': case '\r': {
288 1915 : save(ls, '\n');
289 1915 : inclinenumber(ls);
290 1915 : if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
291 1915 : break;
292 : }
293 41663 : default: {
294 41663 : if (seminfo) save_and_next(ls);
295 35755 : else next(ls);
296 : }
297 : }
298 590 : } endloop:
299 590 : if (seminfo)
300 154 : seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
301 154 : luaZ_bufflen(ls->buff) - 2*(2 + sep));
302 590 : }
303 :
304 :
305 3 : static void escerror (LexState *ls, int *c, int n, const char *msg) {
306 : int i;
307 3 : luaZ_resetbuffer(ls->buff); /* prepare error message */
308 3 : save(ls, '\\');
309 9 : for (i = 0; i < n && c[i] != EOZ; i++)
310 6 : save(ls, c[i]);
311 3 : lexerror(ls, msg, TK_STRING);
312 : }
313 :
314 :
315 46 : static int readhexaesc (LexState *ls) {
316 : int c[3], i; /* keep input for error message */
317 46 : int r = 0; /* result accumulator */
318 46 : c[0] = 'x'; /* for error message */
319 136 : for (i = 1; i < 3; i++) { /* read two hexadecimal digits */
320 91 : c[i] = next(ls);
321 91 : if (!lisxdigit(c[i]))
322 1 : escerror(ls, c, i + 1, "hexadecimal digit expected");
323 90 : r = (r << 4) + luaO_hexavalue(c[i]);
324 : }
325 45 : return r;
326 : }
327 :
328 :
329 63 : static int readdecesc (LexState *ls) {
330 : int c[3], i;
331 63 : int r = 0; /* result accumulator */
332 168 : for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */
333 105 : c[i] = ls->current;
334 105 : r = 10*r + c[i] - '0';
335 105 : next(ls);
336 : }
337 63 : if (r > UCHAR_MAX)
338 1 : escerror(ls, c, i, "decimal escape too large");
339 62 : return r;
340 : }
341 :
342 :
343 7165 : static void read_string (LexState *ls, int del, SemInfo *seminfo) {
344 7165 : save_and_next(ls); /* keep delimiter (for error messages) */
345 84192 : while (ls->current != del) {
346 77035 : switch (ls->current) {
347 3 : case EOZ:
348 3 : lexerror(ls, "unfinished string", TK_EOS);
349 : break; /* to avoid warnings */
350 2 : case '\n':
351 : case '\r':
352 2 : lexerror(ls, "unfinished string", TK_STRING);
353 : break; /* to avoid warnings */
354 433 : case '\\': { /* escape sequences */
355 : int c; /* final character to be saved */
356 433 : next(ls); /* do not save the `\' */
357 433 : switch (ls->current) {
358 2 : case 'a': c = '\a'; goto read_save;
359 8 : case 'b': c = '\b'; goto read_save;
360 12 : case 'f': c = '\f'; goto read_save;
361 59 : case 'n': c = '\n'; goto read_save;
362 17 : case 'r': c = '\r'; goto read_save;
363 182 : case 't': c = '\t'; goto read_save;
364 2 : case 'v': c = '\v'; goto read_save;
365 46 : case 'x': c = readhexaesc(ls); goto read_save;
366 1 : case '\n': case '\r':
367 1 : inclinenumber(ls); c = '\n'; goto only_save;
368 37 : case '\\': case '\"': case '\'':
369 37 : c = ls->current; goto read_save;
370 1 : case EOZ: goto no_save; /* will raise an error next loop */
371 2 : case 'z': { /* zap following span of spaces */
372 2 : next(ls); /* skip the 'z' */
373 8 : while (lisspace(ls->current)) {
374 6 : if (currIsNewline(ls)) inclinenumber(ls);
375 5 : else next(ls);
376 : }
377 2 : goto no_save;
378 : }
379 64 : default: {
380 64 : if (!lisdigit(ls->current))
381 1 : escerror(ls, &ls->current, 1, "invalid escape sequence");
382 : /* digital escape \ddd */
383 63 : c = readdecesc(ls);
384 62 : goto only_save;
385 : }
386 : }
387 364 : read_save: next(ls); /* read next character */
388 427 : only_save: save(ls, c); /* save 'c' */
389 430 : no_save: break;
390 : }
391 76597 : default:
392 76597 : save_and_next(ls);
393 : }
394 : }
395 7157 : save_and_next(ls); /* skip delimiter */
396 14314 : seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
397 7157 : luaZ_bufflen(ls->buff) - 2);
398 7157 : }
399 :
400 :
401 100863 : static int llex (LexState *ls, SemInfo *seminfo) {
402 100863 : luaZ_resetbuffer(ls->buff);
403 : for (;;) {
404 263746 : switch (ls->current) {
405 22980 : case '\n': case '\r': { /* line breaks */
406 22980 : inclinenumber(ls);
407 22980 : break;
408 : }
409 137464 : case ' ': case '\f': case '\t': case '\v': { /* spaces */
410 137464 : next(ls);
411 137464 : break;
412 : }
413 2800 : case '-': { /* '-' or '--' (comment) */
414 2800 : next(ls);
415 2800 : if (ls->current != '-') return '-';
416 : /* else is a comment */
417 2440 : next(ls);
418 2440 : if (ls->current == '[') { /* long comment? */
419 437 : int sep = skip_sep(ls);
420 437 : luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */
421 437 : if (sep >= 0) {
422 437 : read_long_string(ls, NULL, sep); /* skip long comment */
423 436 : luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
424 436 : break;
425 : }
426 : }
427 : /* else short comment */
428 40384 : while (!currIsNewline(ls) && ls->current != EOZ)
429 38381 : next(ls); /* skip until end of line (or end of file) */
430 2003 : break;
431 : }
432 901 : case '[': { /* long string or simply '[' */
433 901 : int sep = skip_sep(ls);
434 901 : if (sep >= 0) {
435 155 : read_long_string(ls, seminfo, sep);
436 154 : return TK_STRING;
437 : }
438 746 : else if (sep == -1) return '[';
439 1 : else lexerror(ls, "invalid long string delimiter", TK_STRING);
440 : }
441 4936 : case '=': {
442 4936 : next(ls);
443 4936 : if (ls->current != '=') return '=';
444 425 : else { next(ls); return TK_EQ; }
445 : }
446 127 : case '<': {
447 127 : next(ls);
448 127 : if (ls->current != '=') return '<';
449 82 : else { next(ls); return TK_LE; }
450 : }
451 227 : case '>': {
452 227 : next(ls);
453 227 : if (ls->current != '=') return '>';
454 191 : else { next(ls); return TK_GE; }
455 : }
456 233 : case '~': {
457 233 : next(ls);
458 233 : if (ls->current != '=') return '~';
459 233 : else { next(ls); return TK_NE; }
460 : }
461 556 : case ':': {
462 556 : next(ls);
463 556 : if (ls->current != ':') return ':';
464 16 : else { next(ls); return TK_DBCOLON; }
465 : }
466 7165 : case '"': case '\'': { /* short literal strings */
467 7165 : read_string(ls, ls->current, seminfo);
468 7157 : return TK_STRING;
469 : }
470 4340 : case '.': { /* '.', '..', '...', or number */
471 4340 : save_and_next(ls);
472 4340 : if (check_next(ls, ".")) {
473 2138 : if (check_next(ls, "."))
474 39 : return TK_DOTS; /* '...' */
475 2099 : else return TK_CONCAT; /* '..' */
476 : }
477 2202 : else if (!lisdigit(ls->current)) return '.';
478 : /* else go through */
479 : }
480 : case '0': case '1': case '2': case '3': case '4':
481 : case '5': case '6': case '7': case '8': case '9': {
482 3377 : read_numeral(ls, seminfo);
483 3376 : return TK_NUMBER;
484 : }
485 465 : case EOZ: {
486 465 : return TK_EOS;
487 : }
488 78176 : default: {
489 78176 : if (lislalpha(ls->current)) { /* identifier or reserved word? */
490 : TString *ts;
491 : do {
492 212985 : save_and_next(ls);
493 212985 : } while (lislalnum(ls->current));
494 45676 : ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
495 45676 : luaZ_bufflen(ls->buff));
496 45676 : seminfo->ts = ts;
497 45676 : if (isreserved(ts)) /* reserved word? */
498 15004 : return ts->tsv.extra - 1 + FIRST_RESERVED;
499 : else {
500 30672 : return TK_NAME;
501 : }
502 : }
503 : else { /* single-char tokens (+ - / ...) */
504 32500 : int c = ls->current;
505 32500 : next(ls);
506 32500 : return c;
507 : }
508 : }
509 : }
510 : }
511 : }
512 :
513 :
514 100863 : void luaX_next (LexState *ls) {
515 100863 : ls->lastline = ls->linenumber;
516 100863 : if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
517 464 : ls->t = ls->lookahead; /* use this one */
518 464 : ls->lookahead.token = TK_EOS; /* and discharge it */
519 : }
520 : else
521 100399 : ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
522 100851 : }
523 :
524 :
525 464 : int luaX_lookahead (LexState *ls) {
526 : lua_assert(ls->lookahead.token == TK_EOS);
527 464 : ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
528 464 : return ls->lookahead.token;
529 : }
530 :
|