// Check string for an identifier or keyword. If found, set *srcpp to first invalid character, set *lenp to word length (if // lenp not NULL), and return symbol; otherwise, return s_nil. enum e_sym getident(char **srcpp,ushort *lenp) { int i; ushort len; enum e_sym sym; char *srcp,*srcp0 = *srcpp; if(!isident1(*srcp0)) return s_nil; // Valid identifier found; find terminator. srcp = srcp0 + strspn(srcp0,identchars); // Query type? if(*srcp == TKC_QUERY) { *srcpp = srcp + 1; if(lenp != NULL) *lenp = *srcpp - srcp0; return s_identq; } // Check if keyword. sym = s_ident; char idbuf[(len = srcp - srcp0) + 1]; stplcpy(idbuf,srcp0,len + 1); if((i = binary(idbuf,kwname,NKEYWORDS)) >= 0) { sym = kwtab[i].s; if(lenp != NULL) *lenp = len; } // Return results. *srcpp = srcp; return sym; }
void vcc_Lexer(struct tokenlist *tl, struct source *sp) { const char *p, *q; unsigned u; tl->src = sp; for (p = sp->b; p < sp->e; ) { /* Skip any whitespace */ if (isspace(*p)) { p++; continue; } /* Skip '#.*\n' comments */ if (*p == '#') { while (p < sp->e && *p != '\n') p++; continue; } /* Skip C-style comments */ if (*p == '/' && p[1] == '*') { for (q = p + 2; q < sp->e; q++) { if (*q == '/' && q[1] == '*') { vsb_printf(tl->sb, "/* ... */ comment contains /*\n"); vcc_AddToken(tl, EOI, p, p + 2); vcc_ErrWhere(tl, tl->t); vcc_AddToken(tl, EOI, q, q + 2); vcc_ErrWhere(tl, tl->t); return; } if (*q == '*' && q[1] == '/') { p = q + 2; break; } } if (q < sp->e) continue; vcc_AddToken(tl, EOI, p, p + 2); vsb_printf(tl->sb, "Unterminated /* ... */ comment, starting at\n"); vcc_ErrWhere(tl, tl->t); return; } /* Skip C++-style comments */ if (*p == '/' && p[1] == '/') { while (p < sp->e && *p != '\n') p++; continue; } /* Recognize inline C-code */ if (*p == 'C' && p[1] == '{') { for (q = p + 2; q < sp->e; q++) { if (*q == '}' && q[1] == 'C') { vcc_AddToken(tl, CSRC, p, q + 2); break; } } if (q < sp->e) { p = q + 2; continue; } vcc_AddToken(tl, EOI, p, p + 2); vsb_printf(tl->sb, "Unterminated inline C source, starting at\n"); vcc_ErrWhere(tl, tl->t); return; } /* Recognize long-strings */ if (*p == '{' && p[1] == '"') { for (q = p + 2; q < sp->e; q++) { if (*q == '"' && q[1] == '}') { vcc_AddToken(tl, CSTR, p, q + 2); break; } } if (q < sp->e) { p = q + 2; u = tl->t->e - tl->t->b; u -= 4; /* {" ... "} */ tl->t->dec = TlAlloc(tl, u + 1 ); AN(tl->t->dec); memcpy(tl->t->dec, tl->t->b + 2, u); tl->t->dec[u] = '\0'; continue; } vcc_AddToken(tl, EOI, p, p + 2); vsb_printf(tl->sb, "Unterminated long-string, starting at\n"); vcc_ErrWhere(tl, tl->t); return; } /* Match for the fixed tokens (see token.tcl) */ u = vcl_fixed_token(p, &q); if (u != 0) { vcc_AddToken(tl, u, p, q); p = q; continue; } /* Match strings, with \\ and \" escapes */ if (*p == '"') { for (q = p + 1; q < sp->e; q++) { if (*q == '"') { q++; break; } if (*q == '\r' || *q == '\n') { vcc_AddToken(tl, EOI, p, q); vsb_printf(tl->sb, "Unterminated string at\n"); vcc_ErrWhere(tl, tl->t); return; } } vcc_AddToken(tl, CSTR, p, q); if (vcc_decstr(tl)) return; p = q; continue; } /* Match Identifiers */ if (isident1(*p)) { for (q = p; q < sp->e; q++) if (!isident(*q)) break; if (isvar(*q)) { for (; q < sp->e; q++) if (!isvar(*q)) break; vcc_AddToken(tl, VAR, p, q); } else { vcc_AddToken(tl, ID, p, q); } p = q; continue; } /* Match numbers { [0-9]+ } */ if (isdigit(*p)) { for (q = p; q < sp->e; q++) if (!isdigit(*q)) break; vcc_AddToken(tl, CNUM, p, q); p = q; continue; } vcc_AddToken(tl, EOI, p, p + 1); vsb_printf(tl->sb, "Syntax error at\n"); vcc_ErrWhere(tl, tl->t); return; } }