Error Tokenizer::tokenize (const std::string & input) { bool inEmpty = true; int tokenStart = 0; for (int i = 0; i < (int) input.length(); i++) { char c = input[i]; if (inEmpty){ if (isEmpty (c)) continue; tokenStart = i; inEmpty = false; } if (isEmpty (c)) { // last token is at its end pushToken (input.substr (tokenStart, i - tokenStart), tokenStart); inEmpty = true; // add null token (will get removed at end) Token nullToken (" ", i); nullToken.type = Token::TT_NULL; mResult.push_back (nullToken); } if (isSingleChar (c)) { if (i - tokenStart > 0) { pushToken (input.substr (tokenStart, i - tokenStart), tokenStart); } pushToken (input.substr (i, 1), i); inEmpty = true; // ignores following empties } } // last character if (!inEmpty){ pushToken (input.substr (tokenStart), tokenStart); } fixupNegations (); removeNull (); return NoError; }
void Lexer::recognizeString() { consumeCharacter(); while (this->currentChar != '"') { consumeCharacter(); } consumeCharacter(); pushToken(TokenType::STRING); }
void Lexer::recognizeCharacter() { consumeCharacter(); while (this->currentChar != '\'') { consumeCharacter(); } consumeCharacter(); pushToken(TokenType::CHARACTER); }
void Lexer::recognizeNumber() { while (this->currentChar >= '0' && this->currentChar <= '9') { consumeCharacter(); } if (this->currentChar == '.') { consumeCharacter(); } while (this->currentChar >= '0' && this->currentChar <= '9') { consumeCharacter(); } pushToken(TokenType::NUMBER); }
void Lexer::recognizeIdentifier() { consumeCharacter(); while ((this->currentChar == '_' || this->currentChar == '-') || (this->currentChar >= 'a' && this->currentChar <= 'z') || (this->currentChar >= 'A' && this->currentChar <= 'Z') || (this->currentChar >= '0' && this->currentChar <= '9')) { consumeCharacter(); } pushToken(TokenType::IDENTIFIER); }
void recognizeIdentifierToken(Lexer *self) { consumeCharacter(self); while (isLetterOrDigit(self->currentChar)) { consumeCharacter(self); } while (isUnderscore(self->currentChar) && isLetterOrDigit(peekAhead(self, 1))) { consumeCharacter(self); while (isLetterOrDigit(self->currentChar)) { consumeCharacter(self); } } pushToken(self, TOKEN_IDENTIFIER); }
void pushSubString(struct token_info* info, int* tn, int ln, char type, const char* s, size_t start, size_t end) { size_t len = end-start; char* substring = malloc(sizeof(char)*(len+1)); if(substring == NULL) { memoryFailure(); exit(EXIT_FAILURE); } memcpy(substring, &s[start], len); substring[len]='\0'; pushToken(info, tn, ln, type, substring); free(substring); }
void pushChar(struct token_info* info, int* tn, int ln, char type, const char c) { char* substring = malloc(sizeof(char)*(2)); if(substring == NULL) { memoryFailure(); exit(EXIT_FAILURE); } substring[0] = c; substring[1] = '\0'; pushToken(info, tn, ln, type, substring); free(substring); }
void recognizeOperatorToken(Lexer *self) { // stop the annoying := treated as an operator // treat them as individual operators instead. if (self->currentChar == ':' && peekAhead(self, 1) == '=') { consumeCharacter(self); } else { consumeCharacter(self); // for double operators if (isOperator(self->currentChar)) { consumeCharacter(self); } } pushToken(self, TOKEN_OPERATOR); }
void recognizeStringToken(Lexer *self) { expectCharacter(self, '"'); int errpos = self->charNumber; int errline = self->lineNumber; // just consume everthing while (!isString(self->currentChar)) { consumeCharacter(self); if (isEndOfInput(self->currentChar)) { errorMessageWithPosition(self->fileName, errline, errpos, "Unterminated string literal"); } } expectCharacter(self, '"'); pushToken(self, TOKEN_STRING); }
void recognizeCharacterToken(Lexer *self) { expectCharacter(self, '\''); int errpos = self->charNumber; int errline = self->lineNumber; if (self->currentChar == '\'') errorMessageWithPosition(self->fileName, self->lineNumber, self->charNumber, "Empty character literal"); while (!(self->currentChar == '\'' && peekAhead(self, -1) != '\\')) { consumeCharacter(self); if (isEndOfInput(self->currentChar)) { errorMessageWithPosition(self->fileName, errline, errpos, "Unterminated character literal"); } } expectCharacter(self, '\''); pushToken(self, TOKEN_CHARACTER); }
void parseIdentifier(struct token_info* info, int* tn, int ln, const char* s, size_t slen, size_t* pos, char* urlMode, struct char_char* p) { size_t start = *pos, len = 0; char* ident = NULL; while(s[*pos] == '/') { (*pos)++; } for(; *pos < slen; (*pos)++) { if(s[*pos] == '\\') { (*pos)++; } else if(isPunctuation(p, s[*pos])) { break; } } len = *pos-start; ident = malloc(sizeof(char)*(len+1)); if(ident == NULL) { memoryFailure(); exit(EXIT_FAILURE); } memcpy(ident, &s[start], len); ident[len]='\0'; if(casecmp(ident, "url") == 0) { *urlMode = 1; } pushToken(info, tn, ln, TOKENTYPE_IDENTIFIER, ident); free(ident); (*pos)--; }
static void createContext (tokenInfo *const scope) { if (scope) { vString *contextName = vStringNew (); verbose ("Creating new context %s\n", vStringValue (scope->name)); /* Determine full context name */ if (currentContext->kind != K_UNDEFINED) { vStringCopy (contextName, currentContext->name); vStringCatS (contextName, "."); } vStringCat (contextName, scope->name); /* Create context */ currentContext = pushToken (currentContext, scope); vStringCopy (currentContext->name, contextName); vStringDelete (contextName); } }
void recognizeErroneousToken(Lexer *self) { consumeCharacter(self); pushToken(self, TOKEN_ERRORNEOUS); }
void recognizeSeparatorToken(Lexer *self) { consumeCharacter(self); pushToken(self, TOKEN_SEPARATOR); }
void recognizeNumberToken(Lexer *self) { consumeCharacter(self); if (self->currentChar == '_') { // ignore digit underscores consumeCharacter(self); } if (self->currentChar == '.') { consumeCharacter(self); // consume dot while (isDigit(self->currentChar)) { consumeCharacter(self); } if (self->currentChar == 'f' || self->currentChar == 'd') { consumeCharacter(self); } pushToken(self, TOKEN_NUMBER); } else if (self->currentChar == 'x' || self->currentChar == 'X') { consumeCharacter(self); while (isHexChar(self->currentChar)) { consumeCharacter(self); } pushToken(self, TOKEN_NUMBER); } else if (self->currentChar == 'b') { consumeCharacter(self); while (isBinChar(self->currentChar)) { consumeCharacter(self); } pushToken(self, TOKEN_NUMBER); } else if (self->currentChar == 'o') { consumeCharacter(self); while (isOctChar(self->currentChar)) { consumeCharacter(self); } pushToken(self, TOKEN_NUMBER); } else { // it'll do while (isDigit(self->currentChar)) { if (peekAhead(self, 1) == '.') { consumeCharacter(self); while (isDigit(self->currentChar)) { consumeCharacter(self); } if (self->currentChar == 'f' || self->currentChar == 'd') { consumeCharacter(self); } } else if (peekAhead(self, 1) == '_') { // ignore digit underscores consumeCharacter(self); } consumeCharacter(self); } pushToken(self, TOKEN_NUMBER); } }
void Lexer::recognizeSeparator() { consumeCharacter(); pushToken(TokenType::SEPARATOR); }
static void processClass (tokenInfo *const token) { /*Note: At the moment, only identifies typedef name and not its contents */ int c; tokenInfo *extra; tokenInfo *parameters = NULL; /* Get identifiers */ c = skipWhite (vGetc ()); if (isIdentifierCharacter (c)) { readIdentifier (token, c); c = skipWhite (vGetc ()); } /* Find class parameters list */ if (c == '#') { c = skipWhite (vGetc ()); if (c == '(') { parameters = newToken (); do { c = skipWhite (vGetc ()); readIdentifier (parameters, c); updateKind (parameters); verbose ("Found class parameter %s\n", vStringValue (parameters->name)); if (parameters->kind == K_UNDEFINED) { parameters->kind = K_CONSTANT; parameters = pushToken (parameters, newToken ()); c = vGetc(); while (c != ',' && c != ')' && c != EOF) { c = vGetc(); } } } while (c != ')' && c != EOF); c = vGetc (); parameters = popToken (parameters); } c = skipWhite (vGetc ()); } /* Search for inheritance information */ if (isIdentifierCharacter (c)) { extra = newToken (); readIdentifier (extra, c); c = skipWhite (vGetc ()); if (strcmp (vStringValue (extra->name), "extends") == 0) { readIdentifier (extra, c); vStringCopy (token->inheritance, extra->name); verbose ("Inheritance %s\n", vStringValue (token->inheritance)); } deleteToken (extra); } /* Use last identifier to create tag */ createTag (token); /* Add parameter list */ while (parameters) { createTag (parameters); parameters = popToken (parameters); } }
/** checks whether the current token is a section identifier, and if yes, switches to the corresponding section */ static SCIP_Bool isNewSection( SCIP* scip, /**< SCIP data structure */ LPINPUT* lpinput /**< LP reading data */ ) { SCIP_Bool iscolon; size_t len; assert(lpinput != NULL); /* remember first token by swapping the token buffer */ swapTokenBuffer(lpinput); /* look at next token: if this is a ':', the first token is a name and no section keyword */ iscolon = FALSE; if( getNextToken(scip, lpinput) ) { iscolon = (*lpinput->token == ':'); pushToken(lpinput); } /* reinstall the previous token by swapping back the token buffer */ swapTokenBuffer(lpinput); /* check for ':' */ if( iscolon ) return FALSE; len = strlen(lpinput->token); assert(len < LP_MAX_LINELEN); /* the section keywords are at least 2 characters up to 8 or exactly 15 characters long */ if( len > 1 && (len < 9 || len == 15) ) { char token[16]; int c = 0; while( lpinput->token[c] != '\0' ) { token[c] = toupper(lpinput->token[c]); /*lint !e734*/ ++c; assert(c < 16); } token[c] = '\0'; if( (len == 3 && strcmp(token, "MIN") == 0) || (len == 7 && strcmp(token, "MINIMUM") == 0) || (len == 8 && strcmp(token, "MINIMIZE") == 0) ) { SCIPdebugMessage("(line %d) new section: OBJECTIVE\n", lpinput->linenumber); lpinput->section = LP_OBJECTIVE; lpinput->objsense = SCIP_OBJSENSE_MINIMIZE; return TRUE; } if( (len == 3 && strcmp(token, "MAX") == 0) || (len == 7 && strcmp(token, "MAXIMUM") == 0) || (len == 8 && strcmp(token, "MAXIMIZE") == 0) ) { SCIPdebugMessage("(line %d) new section: OBJECTIVE\n", lpinput->linenumber); lpinput->section = LP_OBJECTIVE; lpinput->objsense = SCIP_OBJSENSE_MAXIMIZE; return TRUE; } if( len == 3 && strcmp(token, "END") == 0 ) { SCIPdebugMessage("(line %d) new section: END\n", lpinput->linenumber); lpinput->section = LP_END; return TRUE; } } return FALSE; }
/** reads an objective or constraint with name and coefficients */ static SCIP_RETCODE readCoefficients( SCIP* scip, /**< SCIP data structure */ LPINPUT* lpinput, /**< LP reading data */ SCIP_Bool isobjective, /**< indicates whether we are currently reading the coefficients of the objective */ char* name, /**< pointer to store the name of the line; must be at least of size * LP_MAX_LINELEN */ SCIP_VAR*** vars, /**< pointer to store the array with variables (must be freed by caller) */ SCIP_Real** coefs, /**< pointer to store the array with coefficients (must be freed by caller) */ int* ncoefs, /**< pointer to store the number of coefficients */ SCIP_Bool* newsection /**< pointer to store whether a new section was encountered */ ) { SCIP_Bool havesign; SCIP_Bool havevalue; SCIP_Real coef; int coefsign; int coefssize; assert(lpinput != NULL); assert(name != NULL); assert(vars != NULL); assert(coefs != NULL); assert(ncoefs != NULL); assert(newsection != NULL); *vars = NULL; *coefs = NULL; *name = '\0'; *ncoefs = 0; *newsection = FALSE; /* read the first token, which may be the name of the line */ if( getNextToken(scip, lpinput) ) { /* check if we reached a new section */ if( isNewSection(scip, lpinput) ) { *newsection = TRUE; return SCIP_OKAY; } /* remember the token in the token buffer */ swapTokenBuffer(lpinput); /* get the next token and check, whether it is a colon */ if( getNextToken(scip, lpinput) ) { if( strcmp(lpinput->token, ":") == 0 ) { /* the second token was a colon: the first token is the line name */ (void)SCIPmemccpy(name, lpinput->tokenbuf, '\0', LP_MAX_LINELEN); name[LP_MAX_LINELEN - 1] = '\0'; SCIPdebugMessage("(line %d) read constraint name: '%s'\n", lpinput->linenumber, name); } else { /* the second token was no colon: push the tokens back onto the token stack and parse them as coefficients */ pushToken(lpinput); pushBufferToken(lpinput); } } else { /* there was only one token left: push it back onto the token stack and parse it as coefficient */ pushBufferToken(lpinput); } } /* initialize buffers for storing the coefficients */ coefssize = LP_INIT_COEFSSIZE; SCIP_CALL( SCIPallocMemoryArray(scip, vars, coefssize) ); SCIP_CALL( SCIPallocMemoryArray(scip, coefs, coefssize) ); /* read the coefficients */ coefsign = +1; coef = 1.0; havesign = FALSE; havevalue = FALSE; *ncoefs = 0; while( getNextToken(scip, lpinput) ) { SCIP_VAR* var; /* check if we read a sign */ if( isSign(lpinput, &coefsign) ) { SCIPdebugMessage("(line %d) read coefficient sign: %+d\n", lpinput->linenumber, coefsign); havesign = TRUE; continue; } /* check if we read a value */ if( isValue(scip, lpinput, &coef) ) { SCIPdebugMessage("(line %d) read coefficient value: %g with sign %+d\n", lpinput->linenumber, coef, coefsign); if( havevalue ) { syntaxError(scip, lpinput, "two consecutive values."); return SCIP_OKAY; } havevalue = TRUE; continue; } /* check if we reached an equation sense */ if( isSense(lpinput, NULL) ) { if( isobjective ) { syntaxError(scip, lpinput, "no sense allowed in objective"); return SCIP_OKAY; } /* put the sense back onto the token stack */ pushToken(lpinput); break; } /* check if we reached a new section, that will be only allowed when having no current sign and value and if we * are not in the quadratic part */ if( (isobjective || (!havevalue && !havesign)) && isNewSection(scip, lpinput) ) { if( havesign && !havevalue ) { SCIPwarningMessage(scip, "skipped single sign %c without value or variable in objective\n", coefsign == 1 ? '+' : '-'); } else if( isobjective && havevalue && !SCIPisZero(scip, coef) ) { SCIPwarningMessage(scip, "constant term %+g in objective is skipped\n", coef * coefsign); } *newsection = TRUE; return SCIP_OKAY; } /* check if we start a quadratic part */ if( *lpinput->token == '[' ) { syntaxError(scip, lpinput, "diff reader does not support quadratic objective function."); return SCIP_READERROR; } /* all but the first coefficient need a sign */ if( *ncoefs > 0 && !havesign ) { syntaxError(scip, lpinput, "expected sign ('+' or '-') or sense ('<' or '>')."); return SCIP_OKAY; } /* check if the last variable should be squared */ if( *lpinput->token == '^' ) { syntaxError(scip, lpinput, "diff reader does not support quadratic objective function."); return SCIP_READERROR; } else { /* the token is a variable name: get the corresponding variable */ SCIP_CALL( getVariable(scip, lpinput->token, &var) ); } /* insert the linear coefficient */ SCIPdebugMessage("(line %d) read linear coefficient: %+g<%s>\n", lpinput->linenumber, coefsign * coef, SCIPvarGetName(var)); if( !SCIPisZero(scip, coef) ) { /* resize the vars and coefs array if needed */ if( *ncoefs >= coefssize ) { coefssize *= 2; coefssize = MAX(coefssize, (*ncoefs)+1); SCIP_CALL( SCIPreallocMemoryArray(scip, vars, coefssize) ); SCIP_CALL( SCIPreallocMemoryArray(scip, coefs, coefssize) ); } assert(*ncoefs < coefssize); /* add coefficient */ (*vars)[*ncoefs] = var; (*coefs)[*ncoefs] = coefsign * coef; (*ncoefs)++; } /* reset the flags and coefficient value for the next coefficient */ coefsign = +1; coef = 1.0; havesign = FALSE; havevalue = FALSE; } return SCIP_OKAY; }
XPathParser::Token* XPathParser::tokenize(const char* xpath, TokenFactory* tokenFactory) { /* * First, Define some convenient Macros. * These macros operate on the current Token, on the last Token, and on the token Stack * * allocToken : allocates a new token, puts it in current * nextToken : puts a new token in current, update last <- current * newSymbolToken : creates a new token, initiated to the current symbol character * pushQName : push the qname string buffer as a new QName token * pushToken : push the current token at the top of the stack * pushSingleToken : push the current token at the top of the stack, but just for one QName token * popToken : pop the top of the stack to current token. */ #define allocToken() do { current = tokenFactory->allocToken(); } while(0) #define nextToken() \ do { allocToken(); \ Log_XPathParser_Tokenize ( "New Token at %p\n", current ); \ if ( ! headToken ) { headToken = current; last = current; }\ else if ( last ) { last->next = current; last = current; } \ else if ( tokenStack.size() ) \ { AssertBug ( ! tokenStack.front()->subExpression, "Front token has already a subExpression !\n" ); \ tokenStack.front()->subExpression = current; last = current; } \ else { Bug ( "Don't know how to link this token !\n" ); } } while (0) #define newSymbolToken() \ Log_XPathParser_Tokenize ( "New Symbol Token '%c'\n", *c ); \ nextToken(); \ current->type = Token::Symbol; \ current->symbol = *c; #define pushQName() \ if ( qname.size() ) \ { \ if ( last && last->isQName() && ( qname == "::" || stringEndsWith(last->token,String("::") ) ) ) \ { \ last->token += qname; qname = ""; \ } \ else \ { \ nextToken(); \ current->token = qname; \ qname = ""; \ Log_XPathParser_Tokenize ( "New QName : '%s'\n", current->token.c_str() ); \ if ( pushedSingleToken ) \ { popToken (); } \ } \ } #define pushToken() \ pushedSingleToken = false; \ Log_XPathParser_Tokenize ( "Pushing token '%p'\n", current ); \ tokenStack.push_front ( current ); \ current = last = NULL; #define pushSingleToken() \ pushToken (); \ pushedSingleToken = true; #define popToken() \ AssertBug ( tokenStack.size(), "Empty Stack, can't pop !\n" ); \ current = tokenStack.front ( ); \ Log_XPathParser_Tokenize ( "Popped token '%p'\n", current ); \ tokenStack.pop_front (); \ last = current; \ pushedSingleToken = false; Log_XPathParser_Tokenize ( "**** Tokenize : '%s' *****\n", xpath ); Token* current = NULL, *last = NULL, *headToken = NULL; char lastChar = '\0'; std::list<Token*> tokenStack; String qname, text; bool pushedSingleToken = false; for (const char* c = xpath; *c; c++) { Log_XPathParser_Tokenize ( "At Char '%c'\n", *c ); if (lastChar == '!' && *c != '=') { Bug ( "Invalid character following '!' : '%c'\n", *c ); } switch (*c) { case '\'': case '"': { char start = *c; c++; text = ""; for (; *c && (*c != start); c++) { text += *c; } if (!*c) { throwXPathException ( "Unbalanced quote caracter '%c'\n", start ); } nextToken (); current->type = Token::Text; current->token = text; break; } case '{': case '(': case '[': pushQName (); if (*c == '[' && last && last->symbol == '*') { last->type = Token::QName; last->token = "*"; } newSymbolToken (); pushToken (); break; case ']': case '}': case ')': { pushQName (); popToken (); char mustBe; if (*c == ']') mustBe = '['; else if (*c == ')') mustBe = '('; else if (*c == '}') mustBe = '{'; else { mustBe = '\0'; Bug ( "Invalid section matcher from character '%c'\n", *c ); } if (!current->isSymbol()) { throwXPathException ( "Originating token is not a symbol !\n" ); } if (current->symbol != mustBe) { throwXPathException ( "Unmatched closing symbols : openned with '%c', close with '%c'\n", *c, current->symbol ); } if (*c == ')') { Token* lastArg = current->subExpression, *lastArg0 = NULL; while (lastArg) { if (lastArg->isSymbol() && lastArg->symbol == ',') lastArg0 = lastArg; lastArg = lastArg->next; } if (lastArg0) { Token* father = current; allocToken (); current->type = Token::Symbol; current->symbol = ','; current->subExpression = lastArg0->next; lastArg0->next = current; current = last = father; } } } break; case '$': case '@': /* * Variable and attribute short form * These short forms expect a QName defined after. */ pushQName (); newSymbolToken (); pushSingleToken (); break; case ',': { /* * Here we must re-arrange the tokens * so that the ',' symbol appears with the contents underneath */ pushQName (); AssertBug ( tokenStack.size(), "Empty token stack !\n" ); Token* father = tokenStack.front(); AssertBug ( father->isSymbol(), "father token is not a symbol !\n" ); AssertBug ( father->symbol == '(', "father token is not the '(' symbol : %c\n", father->symbol ); if (!father->subExpression) throwXPathException ( "Empty XPath function argument after '%c'", father->symbol ); AssertBug ( father->subExpression, "father token has no subExpression !\n" ); Token* lastArg = father->subExpression, *lastArg0 = NULL; while (lastArg) { if (lastArg->isSymbol() && lastArg->symbol == ',') lastArg0 = lastArg; lastArg = lastArg->next; } allocToken (); current->type = Token::Symbol; current->symbol = ','; if (lastArg0) { current->subExpression = lastArg0->next; lastArg0->next = current; } else { current->subExpression = father->subExpression; father->subExpression = current; } last = current; break; } case '*': { /* * Meaning of the '*' character is ambiguous : * It may be a wildcard for node testing (as in '@*', 'axis::*', 'namespace:*', ..) * Or it may be the multiplication symbol. */ bool isQName = false; if (qname.c_str() && *qname.c_str()) { Log_XPathParser_Tokenize ( "QName : %s\n", qname.c_str() ); if (__endsWith(qname.c_str(), ':')) isQName = true; } else if (last && last->isQName()) { Log_XPathParser_Tokenize ( "Last is qname : %s\n", last->token.c_str() ); if (__endsWith(last->token.c_str(), ':') || last->token == "or") //< Fixup for docbook.xsl */self::ng:* or */self::db:* isQName = true; } else if (!last || isCharIn(last->symbol, "/~|,+*-")) { isQName = true; } if (qname == "mod" || qname == "div") { /* * We must find the token before this one. */ if (last) { isQName = true; } else { isQName = false; } pushQName (); Log_XPathParser_Tokenize ( "Multiply with qname='%s', last=%p(%c/%s), qname=%d\n", qname.c_str(), last, last ? last->symbol : '?', last ? last->token.c_str() : "", isQName ); } Log_XPathParser_Tokenize ( "While at char '*' : last=%p(%d,q=%d/%c/%s), qname='%s' isQName=%d\n", last, last ? last->type : -1, last ? last->isQName() : -1, last ? last->symbol : '?', last ? last->token.c_str() : "", qname.c_str(), isQName ); if (isQName) { qname += *c; } else { pushQName (); newSymbolToken (); } } break; case '-': /* * Meaning of the '-' character is ambiguous : * It may be a character inside of a QName (as in 'xsl:for-each') * Or it may be the substraction operator ('op1 - op2') * Or it may be the unary negative operator ('-op1') */ if (isNumeric(qname.c_str()) || lastChar == '\0' || isCharIn( lastChar, "\n\r ([)]=<>+-*")) { pushQName (); bool isUnary = false; if (!last || (last->isSymbol() && isCharIn(last->symbol, "*+-=><|[/~,"))) { Log_XPathParser_Tokenize ( "Negate : lastChar='%c', last=%p, last->symbol=%c, c[1]=%c\n", lastChar, last, last ? last->symbol : ' ', c[1] ); isUnary = true; } if (last && (last->token == "mod" || last->token == "div")) { // We must find the token before this one. Token* before = NULL; if (tokenStack.size()) { before = tokenStack.front()->subExpression; if (before == last) before = NULL; } else before = headToken; while (before) { if (before->next == last) break; before = before->next; } if (before) isUnary = true; Log_XPathParser_Tokenize ( "Minus with last='%s', before=%p(%c/%s), unary=%d\n", last->token.c_str(), before, before ? before->symbol : '?', before ? before->token.c_str() : "", isUnary ); } newSymbolToken ( ); if (isUnary) current->symbol = 'N'; // Negate ! } else if (c[1] == '>') { pushQName (); newSymbolToken (); } else { qname += *c; } break; case '/': /* * The '/' character only appears in single step separator ('step/step') or initial root ('/step') * Or in descendant short form (as initial '//step' or inside 'step//step'). */ if (lastChar == '/') { AssertBug ( last->isSymbol() && last->symbol == lastChar, "Dropped the last in a lastChar\n" ); last->token = last->symbol; last->token += *c; last->symbol = '~'; break; } case '=': if (*c == '=' && (lastChar == '<' || lastChar == '>' || lastChar == '!')) { AssertBug ( last->isSymbol() && last->symbol == lastChar, "Dropped the last in a lastChar\n" ); last->token = last->symbol; last->token += *c; break; } case '<': case '>': if (lastChar == '-') { if (last) { Log_XPathParser_Tokenize ( "ElementFunctionCall : last=%c/'%s'\n", last->symbol, last->token.c_str() ); last->symbol = '#'; } else { Bug ( "No last !\n" ); } Log_XPathParser_Tokenize ( "Operator '->' found !\n" ); break; } case '+': case '!': case '|': pushQName (); newSymbolToken ( ); break; case '\t': case ' ': case '\n': case '\r': pushQName (); break; default: qname += *c; break; } lastChar = *c; } pushQName (); if (tokenStack.size()) { throwXPathException ( "Unbalanced parentheses : still %ld tokens on stack.\n", (unsigned long) tokenStack.size() ); } return headToken; #undef allocToken #undef nextToken #undef newSymbolToken #undef pushQName #undef pushToken #undef pushSingleToken #undef popToken }
void _getTokens(const char* s, struct char_char* p, struct token_info* info) { char urlMode = 0, blockMode = 0; int tn = 0, ln = 0; size_t pos = 0, slen = strlen(s); for(; pos < slen; pos++) { char c = s[pos]; char cn = s[pos+1]; if(c == '/' && cn == '*') { parseMLComment(info, &tn, ln, s, slen, &pos); } else if (!urlMode && c == '/' && s[pos+5] == '/' && cn == 'd' && s[pos+2] == 'e' && s[pos+3] == 'e' && s[pos+4] == 'p') { pushToken(info, &tn, ln, TOKENTYPE_DEEP, "/deep/"); pos += 5; } else if(!urlMode && c == '/' && cn == '/') { if(blockMode > 0) { parseIdentifier(info, &tn, ln, s, slen, &pos, &urlMode, p); } else { parseSLComment(info, &tn, ln, s, slen, &pos); } } else if(c == '"' || c == '\'') { parseString(info, &tn, ln, s, slen, &pos, c); } else if(c == ' ') { parseSpaces(info, &tn, ln, s, slen, &pos); } else if(isPunctuation(p, c)) { pushChar(info, &tn, ln, isPunctuation(p, c), c); if(c == '\n' || c == '\r') { ln++; } if(c == ')') { urlMode = 0; } if(c == '{') { blockMode++; } if(c == '}') { blockMode--; } } else if(isDecimalDigit(c)) { parseDecimalNumber(info, &tn, ln, s, slen, &pos); } else { parseIdentifier(info, &tn, ln, s, slen, &pos, &urlMode, p); } } }
/** checks whether the current token is a section identifier, and if yes, switches to the corresponding section */ static SCIP_Bool isNewSection( SCIP* scip, /**< SCIP data structure */ BLKINPUT* blkinput /**< BLK reading data */ ) { SCIP_Bool iscolon; assert(blkinput != NULL); /* remember first token by swapping the token buffer */ swapTokenBuffer(blkinput); /* look at next token: if this is a ':', the first token is a name and no section keyword */ iscolon = FALSE; if( getNextToken(blkinput) ) { iscolon = (strcmp(blkinput->token, ":") == 0); pushToken(blkinput); } /* reinstall the previous token by swapping back the token buffer */ swapTokenBuffer(blkinput); /* check for ':' */ if( iscolon ) return FALSE; if( strcasecmp(blkinput->token, "PRESOLVED") == 0 ) { SCIPdebugMessage("(line %d) new section: PRESOLVED\n", blkinput->linenumber); blkinput->section = BLK_PRESOLVED; return TRUE; } if( strcasecmp(blkinput->token, "NBLOCKS") == 0 ) { SCIPdebugMessage("(line %d) new section: NBLOCKS\n", blkinput->linenumber); blkinput->section = BLK_NBLOCKS; return TRUE; } if( strcasecmp(blkinput->token, "BLOCK") == 0 ) { int blocknr; blkinput->section = BLK_BLOCK; if( getNextToken(blkinput) ) { /* read block number */ if( isInt(scip, blkinput, &blocknr) ) { assert(blocknr >= 0); assert(blocknr <= blkinput->nblocks); blkinput->blocknr = blocknr-1; } else syntaxError(scip, blkinput, "no block number after block keyword!\n"); } else syntaxError(scip, blkinput, "no block number after block keyword!\n"); SCIPdebugMessage("new section: BLOCK %d\n", blkinput->blocknr); return TRUE; } if( strcasecmp(blkinput->token, "MASTERCONSS") == 0 ) { blkinput->section = BLK_MASTERCONSS; SCIPdebugMessage("new section: MASTERCONSS\n"); return TRUE; } if( strcasecmp(blkinput->token, "END") == 0 ) { SCIPdebugMessage("(line %d) new section: END\n", blkinput->linenumber); blkinput->section = BLK_END; return TRUE; } return FALSE; }
void Lexer::recognizeOperator() { consumeCharacter(); pushToken(TokenType::OPERATOR); }