Token Lexer::on_string() { String str = build_.take(); // Translate the spelling of the lexeme ion the basic // character set into the execution character set. String rep; rep.reserve(str.size()); char const* p = str.c_str() + 1; while (*p != '\"') { if (*p != '\\') rep.push_back(*p); else rep.push_back(translate_escape(*++p)); ++p; } Symbol* sym = syms_.put<String_sym>(str, string_tok, rep); return Token(loc_, string_tok, sym); }
Token Lexer::on_character() { String str = build_.take(); // Translate the spelling of the lexeme in the // basic character set into the execution character // set. // // TODO: This belongs is a separate facility // in order to better enable translation between // the basic and execution character sets. int rep; char const* p = str.c_str(); if (*++p == '\\') rep = *p; else rep = translate_escape(*++p); Symbol* sym = syms_.put<Character_sym>(str, character_tok, rep); return Token(loc_, character_tok, sym); }
UBYTE *tokenize_word(register UBYTE *line,/* (in) -> current line position */ register UBYTE *word,/* (in) -> output token buffer */ UBYTE *qstring, /* (in) -> quoted string o/p buffer*/ SHORT *plen, /* (out) # of bytes put in word buf */ SHORT *ttype, /* (out) token type */ Boolean quote /* (in) preserve quotes on string? */ ) { UBYTE *wrkptr; UBYTE *sword = word; int toklen; SHORT toktype; register unsigned int c; UBYTE c1; UBYTE c2; /*---------------------------------------------------------------------------- * Skip leading whitespace, get the first character, return NULL if no char. *--------------------------------------------------------------------------*/ while (isspace(*line) ) ++line; if ('\0' == (c = *line)) { line = NULL; goto OUT; } /*---------------------------------------------------------------------------- * Handle keywords, types, symbols *--------------------------------------------------------------------------*/ if (iscsymf(c)) { line = po_chop_csym(line, word, MAX_SYM_LEN-1, &wrkptr); word = wrkptr; toktype = TOK_UNDEF; } #ifdef DEADWOOD else if (iscsymf(c)) { toktype = TOK_UNDEF; *word++ = c; ++line; toklen = MAX_SYM_LEN; for (;;) { c = *line; if (iscsym(c)) { ++line; if (toklen) { *word++ = c; --toklen; } } else break; } } #endif /* DEADWOOD */ /*---------------------------------------------------------------------------- * Handle numeric constants *--------------------------------------------------------------------------*/ else if (isdigit(c)) { toklen = get_digits(line,word,&toktype); line += toklen; word += toklen; } /*---------------------------------------------------------------------------- * Handle C operators and string/char constants... * This processes non-alphanumeric characters. Most will be passed through * as single character tokens. Some, like ==, !=, >= and <= are easier to * handle here than in parser (which only has a one-token look-ahead). * Also, quoted strings and chars are now handled in this switch statement. *--------------------------------------------------------------------------*/ else { c1 = line[1]; /* lookahead characters */ c2 = line[2]; switch (c) { case '"': /* note: a shortcut in this loop */ /* assumes that a quoted string */ if (qstring != NULL) /* could never be longer than */ sword = word = qstring; /* SZTOKE-3 chars. */ toktype = TOK_QUO; if (quote) *word++ = '"'; ++line; toklen = SZTOKE-3; /* room for nullterm and two quotes */ while(--toklen) { c = *line++; if (c == 0) break; else if (c == '\\') { if (quote) { *word++ = c; *word++ = *line++; } else { wrkptr = line; *word++ = translate_escape(&wrkptr); line = wrkptr; } } else if (c == '\"') { break; } else { *word++ = c; } } if (quote) *word++ = '"'; break; case '\'': if (quote) *word++ = '\''; toktype = TOK_SQUO; ++line; for (;;) { c = *line++; if (c == 0) break; else if (c == '\\') { if (quote) { *word++ = c; *word++ = *line++; } else { wrkptr = line; *word++ = translate_escape(&wrkptr); line = wrkptr; } } else if (c == '\'') { break; } else { *word++ = c; } } if (quote) *word++ = '\''; break; case '.': if (c1 == '.' && c2 == '.') { toktype = TOK_UNDEF; goto THREECHAR; } else goto SIMPLE; case '%': if (c1 == '=') /* mod-equals */ { toktype = TOK_MOD_EQUALS; goto TWOCHAR; } else goto SIMPLE; case '/': if (c1 == '=') /* div-equals */ { toktype = TOK_DIV_EQUALS; goto TWOCHAR; } else goto SIMPLE; case '*': if (c1 == '=') { toktype = TOK_MUL_EQUALS; goto TWOCHAR; } else goto SIMPLE; case '+': if (c1 == '+') { toktype = TOK_PLUS_PLUS; goto TWOCHAR; } else if (c1 == '=') { toktype = TOK_PLUS_EQUALS; goto TWOCHAR; } else goto SIMPLE; case '-': if (c1 == '-') { toktype = TOK_MINUS_MINUS; goto TWOCHAR; } else if (c1 == '=') { toktype = TOK_MINUS_EQUALS; goto TWOCHAR; } else if (c1 == '>') { toktype = TOK_ARROW; goto TWOCHAR; } else goto SIMPLE; case '=': if (c1 == '=') /* double equals */ { toktype = TOK_EQ; goto TWOCHAR; } else goto SIMPLE; case '!': if (c1 == '=') /* != */ { toktype = TOK_NE; goto TWOCHAR; } else goto SIMPLE; case '<': if (c1 == '=') /* <= */ { toktype = TOK_LE; goto TWOCHAR; } else if (c1 == '<') /* << */ { if (c2 == '=') { toktype = TOK_LSHIFT_EQUALS; goto THREECHAR; } else { toktype = TOK_LSHIFT; goto TWOCHAR; } } else goto SIMPLE; case '>': if (c1 == '=') /* >= */ { toktype = TOK_GE; goto TWOCHAR; } else if (c1 == '>') /* >> */ { if (c2 == '=') { toktype = TOK_RSHIFT_EQUALS; goto THREECHAR; } else { toktype = TOK_RSHIFT; goto TWOCHAR; } } else goto SIMPLE; case '&': if (c1 == '&') /* logical and - && */ { toktype = TOK_LAND; goto TWOCHAR; } else if (c1 == '=') { toktype = TOK_AND_EQUALS; goto TWOCHAR; } else goto SIMPLE; case '|': if (c1 == '|') /* logical or - || */ { toktype = TOK_LOR; goto TWOCHAR; } else if (c1 == '=') { toktype = TOK_OR_EQUALS; goto TWOCHAR; } else goto SIMPLE; case '^': if (c1 == '=') { toktype = TOK_XOR_EQUALS; goto TWOCHAR; } else goto SIMPLE; default: SIMPLE: toktype = *word++ = c; ++line; break; } } OUT: *ttype = toktype; *word = 0; if (plen != NULL) *plen = word - sword; return line; TWOCHAR: *word++ = c; *word++ = c1; line += 2; goto OUT; THREECHAR: *word++ = c; *word++ = c1; *word++ = c2; line += 3; goto OUT; }
static char * tcl_value(char *value) { int literal, last; char *p; if (!value) return (char *) NULL; #ifdef TCL_ARRAYS_DEBUG printf("pq_value = '%s'\n", value); #endif last = strlen(value) - 1; if ((last >= 1) && (value[0] == '{') && (value[last] == '}')) { /* Looks like an array, replace ',' with spaces */ /* Remove the outer pair of { }, the last first! */ value[last] = '\0'; value++; literal = 0; for (p = value; *p; p++) { if (!literal) { /* We are at the list level, look for ',' and '"' */ switch (*p) { case '"': /* beginning of literal */ literal = 1; break; case ',': /* replace the ',' with space */ *p = ' '; break; } } else { /* We are inside a C string */ switch (*p) { case '"': /* end of literal */ literal = 0; break; case '\\': /* * escape sequence, translate it */ p = translate_escape(p, 1); break; } } if (!*p) break; } } else { /* Looks like a normal scalar value */ for (p = value; *p; p++) { if (*p == '\\') { /* * escape sequence, translate it */ p = translate_escape(p, 0); } if (!*p) break; } } #ifdef TCL_ARRAYS_DEBUG printf("tcl_value = '%s'\n\n", value); #endif return value; }