LCOV - code coverage report
Current view: top level - src - llex.c Coverage Total Hit
Test: Lua 5.2.4 Lines: 98.3 % 293 288
Test Date: 2024-04-28 10:23:12
Legend: Lines: hit not hit

            Line data    Source code
       1              : /*
       2              : ** $Id: llex.c,v 2.63.1.3 2015/02/09 17:56:34 roberto Exp $
       3              : ** Lexical Analyzer
       4              : ** See Copyright Notice in lua.h
       5              : */
       6              : 
       7              : 
       8              : #include <locale.h>
       9              : #include <string.h>
      10              : 
      11              : #define llex_c
      12              : #define LUA_CORE
      13              : 
      14              : #include "lua.h"
      15              : 
      16              : #include "lctype.h"
      17              : #include "ldo.h"
      18              : #include "llex.h"
      19              : #include "lobject.h"
      20              : #include "lparser.h"
      21              : #include "lstate.h"
      22              : #include "lstring.h"
      23              : #include "ltable.h"
      24              : #include "lzio.h"
      25              : 
      26              : 
      27              : 
      28              : #define next(ls) (ls->current = zgetc(ls->z))
      29              : 
      30              : 
      31              : 
      32              : #define currIsNewline(ls)       (ls->current == '\n' || ls->current == '\r')
      33              : 
      34              : 
      35              : /* ORDER RESERVED */
      36              : static const char *const luaX_tokens [] = {
      37              :     "and", "break", "do", "else", "elseif",
      38              :     "end", "false", "for", "function", "goto", "if",
      39              :     "in", "local", "nil", "not", "or", "repeat",
      40              :     "return", "then", "true", "until", "while",
      41              :     "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
      42              :     "<number>", "<name>", "<string>"
      43              : };
      44              : 
      45              : 
      46              : #define save_and_next(ls) (save(ls, ls->current), next(ls))
      47              : 
      48              : 
      49              : static l_noret lexerror (LexState *ls, const char *msg, int token);
      50              : 
      51              : 
      52       330250 : static void save (LexState *ls, int c) {
      53       330250 :   Mbuffer *b = ls->buff;
      54       330250 :   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
      55              :     size_t newsize;
      56           63 :     if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
      57            0 :       lexerror(ls, "lexical element too long", 0);
      58           63 :     newsize = luaZ_sizebuffer(b) * 2;
      59           63 :     luaZ_resizebuffer(ls->L, b, newsize);
      60              :   }
      61       330250 :   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
      62       330250 : }
      63              : 
      64              : 
      65          104 : void luaX_init (lua_State *L) {
      66              :   int i;
      67         2392 :   for (i=0; i<NUM_RESERVED; i++) {
      68         2288 :     TString *ts = luaS_new(L, luaX_tokens[i]);
      69         2288 :     luaS_fix(ts);  /* reserved words are never collected */
      70         2288 :     ts->tsv.extra = cast_byte(i+1);  /* reserved word */
      71              :   }
      72          104 : }
      73              : 
      74              : 
      75           35 : const char *luaX_token2str (LexState *ls, int token) {
      76           35 :   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
      77              :     lua_assert(token == cast(unsigned char, token));
      78           13 :     return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
      79            0 :                               luaO_pushfstring(ls->L, "char(%d)", token);
      80              :   }
      81              :   else {
      82           22 :     const char *s = luaX_tokens[token - FIRST_RESERVED];
      83           22 :     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
      84           11 :       return luaO_pushfstring(ls->L, LUA_QS, s);
      85              :     else  /* names, strings, and numerals */
      86           11 :       return s;
      87              :   }
      88              : }
      89              : 
      90              : 
      91           34 : static const char *txtToken (LexState *ls, int token) {
      92           34 :   switch (token) {
      93           11 :     case TK_NAME:
      94              :     case TK_STRING:
      95              :     case TK_NUMBER:
      96           11 :       save(ls, '\0');
      97           11 :       return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
      98           23 :     default:
      99           23 :       return luaX_token2str(ls, token);
     100              :   }
     101              : }
     102              : 
     103              : 
     104           38 : static l_noret lexerror (LexState *ls, const char *msg, int token) {
     105              :   char buff[LUA_IDSIZE];
     106           38 :   luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
     107           38 :   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
     108           38 :   if (token)
     109           34 :     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
     110           38 :   luaD_throw(ls->L, LUA_ERRSYNTAX);
     111              : }
     112              : 
     113              : 
     114           26 : l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
     115           26 :   lexerror(ls, msg, ls->t.token);
     116              : }
     117              : 
     118              : 
     119              : /*
     120              : ** creates a new string and anchors it in function's table so that
     121              : ** it will not be collected until the end of the function's compilation
     122              : ** (by that time it should be anchored in function's prototype)
     123              : */
     124        54104 : TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
     125        54104 :   lua_State *L = ls->L;
     126              :   TValue *o;  /* entry for `str' */
     127        54104 :   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
     128        54104 :   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
     129        54104 :   o = luaH_set(L, ls->fs->h, L->top - 1);
     130        54104 :   if (ttisnil(o)) {  /* not in use yet? (see 'addK') */
     131              :     /* boolean value does not need GC barrier;
     132              :        table has no metatable, so it does not need to invalidate cache */
     133        27647 :     setbvalue(o, 1);  /* t[string] = true */
     134        27647 :     luaC_checkGC(L);
     135              :   }
     136              :   else {  /* string already present */
     137        26457 :     ts = rawtsvalue(keyfromval(o));  /* re-use value previously stored */
     138              :   }
     139        54104 :   L->top--;  /* remove string from stack */
     140        54104 :   return ts;
     141              : }
     142              : 
     143              : 
     144              : /*
     145              : ** increment line number and skips newline sequence (any of
     146              : ** \n, \r, \n\r, or \r\n)
     147              : */
     148        25120 : static void inclinenumber (LexState *ls) {
     149        25120 :   int old = ls->current;
     150              :   lua_assert(currIsNewline(ls));
     151        25120 :   next(ls);  /* skip `\n' or `\r' */
     152        25120 :   if (currIsNewline(ls) && ls->current != old)
     153            0 :     next(ls);  /* skip `\n\r' or `\r\n' */
     154        25120 :   if (++ls->linenumber >= MAX_INT)
     155            0 :     lexerror(ls, "chunk has too many lines", 0);
     156        25120 : }
     157              : 
     158              : 
     159          495 : void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
     160              :                     int firstchar) {
     161          495 :   ls->decpoint = '.';
     162          495 :   ls->L = L;
     163          495 :   ls->current = firstchar;
     164          495 :   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
     165          495 :   ls->z = z;
     166          495 :   ls->fs = NULL;
     167          495 :   ls->linenumber = 1;
     168          495 :   ls->lastline = 1;
     169          495 :   ls->source = source;
     170          495 :   ls->envn = luaS_new(L, LUA_ENV);  /* create env name */
     171          495 :   luaS_fix(ls->envn);  /* never collect this name */
     172          495 :   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
     173          495 : }
     174              : 
     175              : 
     176              : 
     177              : /*
     178              : ** =======================================================
     179              : ** LEXICAL ANALYZER
     180              : ** =======================================================
     181              : */
     182              : 
     183              : 
     184              : 
     185        12314 : static int check_next (LexState *ls, const char *set) {
     186        12314 :   if (ls->current == '\0' || !strchr(set, ls->current))
     187        10042 :     return 0;
     188         2272 :   save_and_next(ls);
     189         2272 :   return 1;
     190              : }
     191              : 
     192              : 
     193              : /*
     194              : ** change all characters 'from' in buffer to 'to'
     195              : */
     196         3379 : static void buffreplace (LexState *ls, char from, char to) {
     197         3379 :   size_t n = luaZ_bufflen(ls->buff);
     198         3379 :   char *p = luaZ_buffer(ls->buff);
     199        11827 :   while (n--)
     200         8448 :     if (p[n] == from) p[n] = to;
     201         3379 : }
     202              : 
     203              : 
     204              : #if !defined(getlocaledecpoint)
     205              : #define getlocaledecpoint()     (localeconv()->decimal_point[0])
     206              : #endif
     207              : 
     208              : 
     209              : #define buff2d(b,e)     luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
     210              : 
     211              : /*
     212              : ** in case of format error, try to change decimal point separator to
     213              : ** the one defined in the current locale and check again
     214              : */
     215            1 : static void trydecpoint (LexState *ls, SemInfo *seminfo) {
     216            1 :   char old = ls->decpoint;
     217            1 :   ls->decpoint = getlocaledecpoint();
     218            1 :   buffreplace(ls, old, ls->decpoint);  /* try new decimal separator */
     219            1 :   if (!buff2d(ls->buff, &seminfo->r)) {
     220              :     /* format error with correct decimal point: no more options */
     221            1 :     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
     222            1 :     lexerror(ls, "malformed number", TK_NUMBER);
     223              :   }
     224            0 : }
     225              : 
     226              : 
     227              : /* LUA_NUMBER */
     228              : /*
     229              : ** this function is quite liberal in what it accepts, as 'luaO_str2d'
     230              : ** will reject ill-formed numerals.
     231              : */
     232         3377 : static void read_numeral (LexState *ls, SemInfo *seminfo) {
     233         3377 :   const char *expo = "Ee";
     234         3377 :   int first = ls->current;
     235              :   lua_assert(lisdigit(ls->current));
     236         3377 :   save_and_next(ls);
     237         3377 :   if (first == '0' && check_next(ls, "Xx"))  /* hexadecimal? */
     238           82 :     expo = "Pp";
     239              :   for (;;) {
     240         4957 :     if (check_next(ls, expo))  /* exponent part? */
     241           10 :       check_next(ls, "+-");  /* optional exponent sign */
     242         4957 :     if (lisxdigit(ls->current) || ls->current == '.')
     243         1580 :       save_and_next(ls);
     244              :     else  break;
     245              :   }
     246         3377 :   save(ls, '\0');
     247         3377 :   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
     248         3377 :   if (!buff2d(ls->buff, &seminfo->r))  /* format error? */
     249            1 :     trydecpoint(ls, seminfo); /* try to update decimal point separator */
     250         3376 : }
     251              : 
     252              : 
     253              : /*
     254              : ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
     255              : ** -1 if sequence is malformed
     256              : */
     257         1935 : static int skip_sep (LexState *ls) {
     258         1935 :   int count = 0;
     259         1935 :   int s = ls->current;
     260              :   lua_assert(s == '[' || s == ']');
     261         1935 :   save_and_next(ls);
     262         1948 :   while (ls->current == '=') {
     263           13 :     save_and_next(ls);
     264           13 :     count++;
     265              :   }
     266         1935 :   return (ls->current == s) ? count : (-count) - 1;
     267              : }
     268              : 
     269              : 
     270          592 : static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
     271          592 :   save_and_next(ls);  /* skip 2nd `[' */
     272          592 :   if (currIsNewline(ls))  /* string starts with a newline? */
     273          223 :     inclinenumber(ls);  /* skip it */
     274              :   for (;;) {
     275        44177 :     switch (ls->current) {
     276            2 :       case EOZ:
     277            2 :         lexerror(ls, (seminfo) ? "unfinished long string" :
     278              :                                  "unfinished long comment", TK_EOS);
     279              :         break;  /* to avoid warnings */
     280          597 :       case ']': {
     281          597 :         if (skip_sep(ls) == sep) {
     282          590 :           save_and_next(ls);  /* skip 2nd `]' */
     283          590 :           goto endloop;
     284              :         }
     285            7 :         break;
     286              :       }
     287         1915 :       case '\n': case '\r': {
     288         1915 :         save(ls, '\n');
     289         1915 :         inclinenumber(ls);
     290         1915 :         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
     291         1915 :         break;
     292              :       }
     293        41663 :       default: {
     294        41663 :         if (seminfo) save_and_next(ls);
     295        35755 :         else next(ls);
     296              :       }
     297              :     }
     298          590 :   } endloop:
     299          590 :   if (seminfo)
     300          154 :     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
     301          154 :                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
     302          590 : }
     303              : 
     304              : 
     305            3 : static void escerror (LexState *ls, int *c, int n, const char *msg) {
     306              :   int i;
     307            3 :   luaZ_resetbuffer(ls->buff);  /* prepare error message */
     308            3 :   save(ls, '\\');
     309            9 :   for (i = 0; i < n && c[i] != EOZ; i++)
     310            6 :     save(ls, c[i]);
     311            3 :   lexerror(ls, msg, TK_STRING);
     312              : }
     313              : 
     314              : 
     315           46 : static int readhexaesc (LexState *ls) {
     316              :   int c[3], i;  /* keep input for error message */
     317           46 :   int r = 0;  /* result accumulator */
     318           46 :   c[0] = 'x';  /* for error message */
     319          136 :   for (i = 1; i < 3; i++) {  /* read two hexadecimal digits */
     320           91 :     c[i] = next(ls);
     321           91 :     if (!lisxdigit(c[i]))
     322            1 :       escerror(ls, c, i + 1, "hexadecimal digit expected");
     323           90 :     r = (r << 4) + luaO_hexavalue(c[i]);
     324              :   }
     325           45 :   return r;
     326              : }
     327              : 
     328              : 
     329           63 : static int readdecesc (LexState *ls) {
     330              :   int c[3], i;
     331           63 :   int r = 0;  /* result accumulator */
     332          168 :   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
     333          105 :     c[i] = ls->current;
     334          105 :     r = 10*r + c[i] - '0';
     335          105 :     next(ls);
     336              :   }
     337           63 :   if (r > UCHAR_MAX)
     338            1 :     escerror(ls, c, i, "decimal escape too large");
     339           62 :   return r;
     340              : }
     341              : 
     342              : 
     343         7165 : static void read_string (LexState *ls, int del, SemInfo *seminfo) {
     344         7165 :   save_and_next(ls);  /* keep delimiter (for error messages) */
     345        84192 :   while (ls->current != del) {
     346        77035 :     switch (ls->current) {
     347            3 :       case EOZ:
     348            3 :         lexerror(ls, "unfinished string", TK_EOS);
     349              :         break;  /* to avoid warnings */
     350            2 :       case '\n':
     351              :       case '\r':
     352            2 :         lexerror(ls, "unfinished string", TK_STRING);
     353              :         break;  /* to avoid warnings */
     354          433 :       case '\\': {  /* escape sequences */
     355              :         int c;  /* final character to be saved */
     356          433 :         next(ls);  /* do not save the `\' */
     357          433 :         switch (ls->current) {
     358            2 :           case 'a': c = '\a'; goto read_save;
     359            8 :           case 'b': c = '\b'; goto read_save;
     360           12 :           case 'f': c = '\f'; goto read_save;
     361           59 :           case 'n': c = '\n'; goto read_save;
     362           17 :           case 'r': c = '\r'; goto read_save;
     363          182 :           case 't': c = '\t'; goto read_save;
     364            2 :           case 'v': c = '\v'; goto read_save;
     365           46 :           case 'x': c = readhexaesc(ls); goto read_save;
     366            1 :           case '\n': case '\r':
     367            1 :             inclinenumber(ls); c = '\n'; goto only_save;
     368           37 :           case '\\': case '\"': case '\'':
     369           37 :             c = ls->current; goto read_save;
     370            1 :           case EOZ: goto no_save;  /* will raise an error next loop */
     371            2 :           case 'z': {  /* zap following span of spaces */
     372            2 :             next(ls);  /* skip the 'z' */
     373            8 :             while (lisspace(ls->current)) {
     374            6 :               if (currIsNewline(ls)) inclinenumber(ls);
     375            5 :               else next(ls);
     376              :             }
     377            2 :             goto no_save;
     378              :           }
     379           64 :           default: {
     380           64 :             if (!lisdigit(ls->current))
     381            1 :               escerror(ls, &ls->current, 1, "invalid escape sequence");
     382              :             /* digital escape \ddd */
     383           63 :             c = readdecesc(ls);
     384           62 :             goto only_save;
     385              :           }
     386              :         }
     387          364 :        read_save: next(ls);  /* read next character */
     388          427 :        only_save: save(ls, c);  /* save 'c' */
     389          430 :        no_save: break;
     390              :       }
     391        76597 :       default:
     392        76597 :         save_and_next(ls);
     393              :     }
     394              :   }
     395         7157 :   save_and_next(ls);  /* skip delimiter */
     396        14314 :   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
     397         7157 :                                    luaZ_bufflen(ls->buff) - 2);
     398         7157 : }
     399              : 
     400              : 
     401       100863 : static int llex (LexState *ls, SemInfo *seminfo) {
     402       100863 :   luaZ_resetbuffer(ls->buff);
     403              :   for (;;) {
     404       263746 :     switch (ls->current) {
     405        22980 :       case '\n': case '\r': {  /* line breaks */
     406        22980 :         inclinenumber(ls);
     407        22980 :         break;
     408              :       }
     409       137464 :       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
     410       137464 :         next(ls);
     411       137464 :         break;
     412              :       }
     413         2800 :       case '-': {  /* '-' or '--' (comment) */
     414         2800 :         next(ls);
     415         2800 :         if (ls->current != '-') return '-';
     416              :         /* else is a comment */
     417         2440 :         next(ls);
     418         2440 :         if (ls->current == '[') {  /* long comment? */
     419          437 :           int sep = skip_sep(ls);
     420          437 :           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
     421          437 :           if (sep >= 0) {
     422          437 :             read_long_string(ls, NULL, sep);  /* skip long comment */
     423          436 :             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
     424          436 :             break;
     425              :           }
     426              :         }
     427              :         /* else short comment */
     428        40384 :         while (!currIsNewline(ls) && ls->current != EOZ)
     429        38381 :           next(ls);  /* skip until end of line (or end of file) */
     430         2003 :         break;
     431              :       }
     432          901 :       case '[': {  /* long string or simply '[' */
     433          901 :         int sep = skip_sep(ls);
     434          901 :         if (sep >= 0) {
     435          155 :           read_long_string(ls, seminfo, sep);
     436          154 :           return TK_STRING;
     437              :         }
     438          746 :         else if (sep == -1) return '[';
     439            1 :         else lexerror(ls, "invalid long string delimiter", TK_STRING);
     440              :       }
     441         4936 :       case '=': {
     442         4936 :         next(ls);
     443         4936 :         if (ls->current != '=') return '=';
     444          425 :         else { next(ls); return TK_EQ; }
     445              :       }
     446          127 :       case '<': {
     447          127 :         next(ls);
     448          127 :         if (ls->current != '=') return '<';
     449           82 :         else { next(ls); return TK_LE; }
     450              :       }
     451          227 :       case '>': {
     452          227 :         next(ls);
     453          227 :         if (ls->current != '=') return '>';
     454          191 :         else { next(ls); return TK_GE; }
     455              :       }
     456          233 :       case '~': {
     457          233 :         next(ls);
     458          233 :         if (ls->current != '=') return '~';
     459          233 :         else { next(ls); return TK_NE; }
     460              :       }
     461          556 :       case ':': {
     462          556 :         next(ls);
     463          556 :         if (ls->current != ':') return ':';
     464           16 :         else { next(ls); return TK_DBCOLON; }
     465              :       }
     466         7165 :       case '"': case '\'': {  /* short literal strings */
     467         7165 :         read_string(ls, ls->current, seminfo);
     468         7157 :         return TK_STRING;
     469              :       }
     470         4340 :       case '.': {  /* '.', '..', '...', or number */
     471         4340 :         save_and_next(ls);
     472         4340 :         if (check_next(ls, ".")) {
     473         2138 :           if (check_next(ls, "."))
     474           39 :             return TK_DOTS;   /* '...' */
     475         2099 :           else return TK_CONCAT;   /* '..' */
     476              :         }
     477         2202 :         else if (!lisdigit(ls->current)) return '.';
     478              :         /* else go through */
     479              :       }
     480              :       case '0': case '1': case '2': case '3': case '4':
     481              :       case '5': case '6': case '7': case '8': case '9': {
     482         3377 :         read_numeral(ls, seminfo);
     483         3376 :         return TK_NUMBER;
     484              :       }
     485          465 :       case EOZ: {
     486          465 :         return TK_EOS;
     487              :       }
     488        78176 :       default: {
     489        78176 :         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
     490              :           TString *ts;
     491              :           do {
     492       212985 :             save_and_next(ls);
     493       212985 :           } while (lislalnum(ls->current));
     494        45676 :           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
     495        45676 :                                   luaZ_bufflen(ls->buff));
     496        45676 :           seminfo->ts = ts;
     497        45676 :           if (isreserved(ts))  /* reserved word? */
     498        15004 :             return ts->tsv.extra - 1 + FIRST_RESERVED;
     499              :           else {
     500        30672 :             return TK_NAME;
     501              :           }
     502              :         }
     503              :         else {  /* single-char tokens (+ - / ...) */
     504        32500 :           int c = ls->current;
     505        32500 :           next(ls);
     506        32500 :           return c;
     507              :         }
     508              :       }
     509              :     }
     510              :   }
     511              : }
     512              : 
     513              : 
     514       100863 : void luaX_next (LexState *ls) {
     515       100863 :   ls->lastline = ls->linenumber;
     516       100863 :   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
     517          464 :     ls->t = ls->lookahead;  /* use this one */
     518          464 :     ls->lookahead.token = TK_EOS;  /* and discharge it */
     519              :   }
     520              :   else
     521       100399 :     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
     522       100851 : }
     523              : 
     524              : 
     525          464 : int luaX_lookahead (LexState *ls) {
     526              :   lua_assert(ls->lookahead.token == TK_EOS);
     527          464 :   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
     528          464 :   return ls->lookahead.token;
     529              : }
     530              : 
        

Generated by: LCOV version 2.0-1