Example #1
0
// Check string for an identifier or keyword.  If found, set *srcpp to first invalid character, set *lenp to word length (if
// lenp not NULL), and return symbol; otherwise, return s_nil.
enum e_sym getident(char **srcpp,ushort *lenp) {
	int i;
	ushort len;
	enum e_sym sym;
	char *srcp,*srcp0 = *srcpp;

	if(!isident1(*srcp0))
		return s_nil;

	// Valid identifier found; find terminator.
	srcp = srcp0 + strspn(srcp0,identchars);

	// Query type?
	if(*srcp == TKC_QUERY) {
		*srcpp = srcp + 1;
		if(lenp != NULL)
			*lenp = *srcpp - srcp0;
		return s_identq;
		}

	// Check if keyword.
	sym = s_ident;
	char idbuf[(len = srcp - srcp0) + 1];
	stplcpy(idbuf,srcp0,len + 1);
	if((i = binary(idbuf,kwname,NKEYWORDS)) >= 0) {
		sym = kwtab[i].s;
		if(lenp != NULL)
			*lenp = len;
		}

	// Return results.
	*srcpp = srcp;
	return sym;
	}
Example #2
0
void
vcc_Lexer(struct tokenlist *tl, struct source *sp)
{
	const char *p, *q;
	unsigned u;

	tl->src = sp;
	for (p = sp->b; p < sp->e; ) {

		/* Skip any whitespace */
		if (isspace(*p)) {
			p++;
			continue;
		}

		/* Skip '#.*\n' comments */
		if (*p == '#') {
			while (p < sp->e && *p != '\n')
				p++;
			continue;
		}

		/* Skip C-style comments */
		if (*p == '/' && p[1] == '*') {
			for (q = p + 2; q < sp->e; q++) {
				if (*q == '/' && q[1] == '*') {
					vsb_printf(tl->sb,
					    "/* ... */ comment contains /*\n");
					vcc_AddToken(tl, EOI, p, p + 2);
					vcc_ErrWhere(tl, tl->t);
					vcc_AddToken(tl, EOI, q, q + 2);
					vcc_ErrWhere(tl, tl->t);
					return;
				}
				if (*q == '*' && q[1] == '/') {
					p = q + 2;
					break;
				}
			}
			if (q < sp->e)
				continue;
			vcc_AddToken(tl, EOI, p, p + 2);
			vsb_printf(tl->sb,
			    "Unterminated /* ... */ comment, starting at\n");
			vcc_ErrWhere(tl, tl->t);
			return;
		}

		/* Skip C++-style comments */
		if (*p == '/' && p[1] == '/') {
			while (p < sp->e && *p != '\n')
				p++;
			continue;
		}

		/* Recognize inline C-code */
		if (*p == 'C' && p[1] == '{') {
			for (q = p + 2; q < sp->e; q++) {
				if (*q == '}' && q[1] == 'C') {
					vcc_AddToken(tl, CSRC, p, q + 2);
					break;
				}
			}
			if (q < sp->e) {
				p = q + 2;
				continue;
			}
			vcc_AddToken(tl, EOI, p, p + 2);
			vsb_printf(tl->sb,
			    "Unterminated inline C source, starting at\n");
			vcc_ErrWhere(tl, tl->t);
			return;
		}

		/* Recognize long-strings */
		if (*p == '{' && p[1] == '"') {
			for (q = p + 2; q < sp->e; q++) {
				if (*q == '"' && q[1] == '}') {
					vcc_AddToken(tl, CSTR, p, q + 2);
					break;
				}
			}
			if (q < sp->e) {
				p = q + 2;
				u = tl->t->e - tl->t->b;
				u -= 4;		/* {" ... "} */
				tl->t->dec = TlAlloc(tl, u + 1 );
				AN(tl->t->dec);
				memcpy(tl->t->dec, tl->t->b + 2, u);
				tl->t->dec[u] = '\0';
				continue;
			}
			vcc_AddToken(tl, EOI, p, p + 2);
			vsb_printf(tl->sb,
			    "Unterminated long-string, starting at\n");
			vcc_ErrWhere(tl, tl->t);
			return;
		}

		/* Match for the fixed tokens (see token.tcl) */
		u = vcl_fixed_token(p, &q);
		if (u != 0) {
			vcc_AddToken(tl, u, p, q);
			p = q;
			continue;
		}

		/* Match strings, with \\ and \" escapes */
		if (*p == '"') {
			for (q = p + 1; q < sp->e; q++) {
				if (*q == '"') {
					q++;
					break;
				}
				if (*q == '\r' || *q == '\n') {
					vcc_AddToken(tl, EOI, p, q);
					vsb_printf(tl->sb,
					    "Unterminated string at\n");
					vcc_ErrWhere(tl, tl->t);
					return;
				}
			}
			vcc_AddToken(tl, CSTR, p, q);
			if (vcc_decstr(tl))
				return;
			p = q;
			continue;
		}

		/* Match Identifiers */
		if (isident1(*p)) {
			for (q = p; q < sp->e; q++)
				if (!isident(*q))
					break;
			if (isvar(*q)) {
				for (; q < sp->e; q++)
					if (!isvar(*q))
						break;
				vcc_AddToken(tl, VAR, p, q);
			} else {
				vcc_AddToken(tl, ID, p, q);
			}
			p = q;
			continue;
		}

		/* Match numbers { [0-9]+ } */
		if (isdigit(*p)) {
			for (q = p; q < sp->e; q++)
				if (!isdigit(*q))
					break;
			vcc_AddToken(tl, CNUM, p, q);
			p = q;
			continue;
		}
		vcc_AddToken(tl, EOI, p, p + 1);
		vsb_printf(tl->sb, "Syntax error at\n");
		vcc_ErrWhere(tl, tl->t);
		return;
	}
}