Esempio n. 1
0
Error Tokenizer::tokenize (const std::string & input) {
	bool inEmpty = true;
	int tokenStart = 0;
	for (int i = 0; i < (int) input.length(); i++) {
		char c = input[i];
		if (inEmpty){
			if (isEmpty (c)) continue;
			tokenStart = i;
			inEmpty = false;
		}
		if (isEmpty (c)) {
			// last token is at its end
			pushToken (input.substr (tokenStart, i - tokenStart), tokenStart);
			inEmpty = true;
			// add null token (will get removed at end)
			Token nullToken (" ", i);
			nullToken.type = Token::TT_NULL;
			mResult.push_back (nullToken);
		}
		if (isSingleChar (c)) {
			if (i - tokenStart > 0) {
				pushToken (input.substr (tokenStart, i - tokenStart), tokenStart);
			}
			pushToken (input.substr (i, 1), i);
			inEmpty = true; // ignores following empties
		}
	}
	// last character
	if (!inEmpty){
		pushToken (input.substr (tokenStart), tokenStart);
	}
	fixupNegations ();
	removeNull ();
	return NoError;
}
Esempio n. 2
0
void Lexer::recognizeString() {
    consumeCharacter();

    while (this->currentChar != '"') {
        consumeCharacter();
    }

    consumeCharacter();
    pushToken(TokenType::STRING);
}
Esempio n. 3
0
void Lexer::recognizeCharacter() {
    consumeCharacter();

    while (this->currentChar != '\'') {
        consumeCharacter();
    }

    consumeCharacter();
    pushToken(TokenType::CHARACTER);
}
Esempio n. 4
0
void Lexer::recognizeNumber() {
    while (this->currentChar >= '0' && this->currentChar <= '9') {
        consumeCharacter();
    }
    if (this->currentChar == '.') {
        consumeCharacter();
    }
    while (this->currentChar >= '0' && this->currentChar <= '9') {
        consumeCharacter();
    }
    pushToken(TokenType::NUMBER);
}
Esempio n. 5
0
void Lexer::recognizeIdentifier() {
    consumeCharacter();

    while ((this->currentChar == '_' || this->currentChar == '-') || 
          (this->currentChar >= 'a' && this->currentChar <= 'z') ||
          (this->currentChar >= 'A' && this->currentChar <= 'Z') ||
          (this->currentChar >= '0' && this->currentChar <= '9')) {
        consumeCharacter();
    }

    pushToken(TokenType::IDENTIFIER);
}
Esempio n. 6
0
File: lexer.c Progetto: 8l/ark-c
void recognizeIdentifierToken(Lexer *self) {
	consumeCharacter(self);

	while (isLetterOrDigit(self->currentChar)) {
		consumeCharacter(self);
	}
	while (isUnderscore(self->currentChar) && isLetterOrDigit(peekAhead(self, 1))) {
		consumeCharacter(self);
		while (isLetterOrDigit(self->currentChar)) {
			consumeCharacter(self);
		}
	}

	pushToken(self, TOKEN_IDENTIFIER);
}
Esempio n. 7
0
void pushSubString(struct token_info* info, int* tn, int ln, char type, const char* s, size_t start, size_t end)
{
    size_t len = end-start;

    char* substring = malloc(sizeof(char)*(len+1));
    if(substring == NULL)
    {
        memoryFailure();
        exit(EXIT_FAILURE);
    }

    memcpy(substring, &s[start], len);
    substring[len]='\0';
    pushToken(info, tn, ln, type, substring);
    free(substring);
}
Esempio n. 8
0
void pushChar(struct token_info* info, int* tn, int ln, char type, const char c)
{
    char* substring = malloc(sizeof(char)*(2));
    if(substring == NULL)
    {
        memoryFailure();
        exit(EXIT_FAILURE);
    }

    substring[0] = c;
    substring[1] = '\0';

    pushToken(info, tn, ln, type, substring);
    free(substring);

}
Esempio n. 9
0
File: lexer.c Progetto: 8l/ark-c
void recognizeOperatorToken(Lexer *self) {
	// stop the annoying := treated as an operator
	// treat them as individual operators instead.
	if (self->currentChar == ':' && peekAhead(self, 1) == '=') {
		consumeCharacter(self);
	}
	else {
		consumeCharacter(self);

		// for double operators
		if (isOperator(self->currentChar)) {
			consumeCharacter(self);
		}
	}

	pushToken(self, TOKEN_OPERATOR);
}
Esempio n. 10
0
File: lexer.c Progetto: 8l/ark-c
void recognizeStringToken(Lexer *self) {
	expectCharacter(self, '"');

	int errpos = self->charNumber;
	int errline = self->lineNumber;
	// just consume everthing
	while (!isString(self->currentChar)) {
		consumeCharacter(self);
		if (isEndOfInput(self->currentChar)) {
			errorMessageWithPosition(self->fileName, errline, errpos, "Unterminated string literal");
		}
	}

	expectCharacter(self, '"');

	pushToken(self, TOKEN_STRING);
}
Esempio n. 11
0
File: lexer.c Progetto: 8l/ark-c
void recognizeCharacterToken(Lexer *self) {
	expectCharacter(self, '\'');

	int errpos = self->charNumber;
	int errline = self->lineNumber;
	if (self->currentChar == '\'')
		errorMessageWithPosition(self->fileName, self->lineNumber, self->charNumber, "Empty character literal");

	while (!(self->currentChar == '\'' && peekAhead(self, -1) != '\\')) {
		consumeCharacter(self);
		if (isEndOfInput(self->currentChar)) {
			errorMessageWithPosition(self->fileName, errline, errpos, "Unterminated character literal");
		}
	}

	expectCharacter(self, '\'');

	pushToken(self, TOKEN_CHARACTER);
}
Esempio n. 12
0
void parseIdentifier(struct token_info* info, int* tn, int ln, const char* s, size_t slen, size_t* pos, char* urlMode, struct char_char* p)
{
    size_t start = *pos, len = 0;
    char* ident = NULL;

    while(s[*pos] == '/')
    {
        (*pos)++;
    }

    for(; *pos < slen; (*pos)++)
    {
        if(s[*pos] == '\\')
        {
            (*pos)++;
        }
        else if(isPunctuation(p, s[*pos]))
        {
            break;
        }
    }

    len = *pos-start;
    ident = malloc(sizeof(char)*(len+1));
    if(ident == NULL)
    {
        memoryFailure();
        exit(EXIT_FAILURE);
    }

    memcpy(ident, &s[start], len);
    ident[len]='\0';

    if(casecmp(ident, "url") == 0)
    {
        *urlMode = 1;
    }

    pushToken(info, tn, ln, TOKENTYPE_IDENTIFIER, ident);
    free(ident);

    (*pos)--;
}
Esempio n. 13
0
static void createContext (tokenInfo *const scope)
{
	if (scope)
	{
		vString *contextName = vStringNew ();

		verbose ("Creating new context %s\n", vStringValue (scope->name));
		/* Determine full context name */
		if (currentContext->kind != K_UNDEFINED)
		{
			vStringCopy (contextName, currentContext->name);
			vStringCatS (contextName, ".");
		}
		vStringCat (contextName, scope->name);
		/* Create context */
		currentContext = pushToken (currentContext, scope);
		vStringCopy (currentContext->name, contextName);
		vStringDelete (contextName);
	}
}
Esempio n. 14
0
File: lexer.c Progetto: 8l/ark-c
void recognizeErroneousToken(Lexer *self) {
	consumeCharacter(self);
	pushToken(self, TOKEN_ERRORNEOUS);
}
Esempio n. 15
0
File: lexer.c Progetto: 8l/ark-c
void recognizeSeparatorToken(Lexer *self) {
	consumeCharacter(self);
	pushToken(self, TOKEN_SEPARATOR);
}
Esempio n. 16
0
File: lexer.c Progetto: 8l/ark-c
void recognizeNumberToken(Lexer *self) {
	consumeCharacter(self);

	if (self->currentChar == '_') { // ignore digit underscores
		consumeCharacter(self);
	}

	if (self->currentChar == '.') {
		consumeCharacter(self); // consume dot

		while (isDigit(self->currentChar)) {
			consumeCharacter(self);
		}

		if (self->currentChar == 'f' || self->currentChar == 'd') {
			consumeCharacter(self);
		}

		pushToken(self, TOKEN_NUMBER);
	}
	else if (self->currentChar == 'x' || self->currentChar == 'X') {
		consumeCharacter(self);

		while (isHexChar(self->currentChar)) {
			consumeCharacter(self);
		}

		pushToken(self, TOKEN_NUMBER);
	}
	else if (self->currentChar == 'b') {
		consumeCharacter(self);

		while (isBinChar(self->currentChar)) {
			consumeCharacter(self);
		}

		pushToken(self, TOKEN_NUMBER);
	}
	else if (self->currentChar == 'o') {
		consumeCharacter(self);

		while (isOctChar(self->currentChar)) {
			consumeCharacter(self);
		}

		pushToken(self, TOKEN_NUMBER);
	}
	else {
		// it'll do
		while (isDigit(self->currentChar)) {
			if (peekAhead(self, 1) == '.') {
				consumeCharacter(self);
				while (isDigit(self->currentChar)) {
					consumeCharacter(self);
				}

				if (self->currentChar == 'f' || self->currentChar == 'd') {
					consumeCharacter(self);
				}
			}
			else if (peekAhead(self, 1) == '_') { // ignore digit underscores
				consumeCharacter(self);
			}
			consumeCharacter(self);
		}
		pushToken(self, TOKEN_NUMBER);
	}
}
Esempio n. 17
0
void Lexer::recognizeSeparator() {
    consumeCharacter();
    pushToken(TokenType::SEPARATOR);
}
Esempio n. 18
0
static void processClass (tokenInfo *const token)
{
	/*Note: At the moment, only identifies typedef name and not its contents */
	int c;
	tokenInfo *extra;
	tokenInfo *parameters = NULL;

	/* Get identifiers */
	c = skipWhite (vGetc ());
	if (isIdentifierCharacter (c))
	{
		readIdentifier (token, c);
		c = skipWhite (vGetc ());
	}

	/* Find class parameters list */
	if (c == '#')
	{
		c = skipWhite (vGetc ());
		if (c == '(')
		{
			parameters = newToken ();
			do
			{
				c = skipWhite (vGetc ());
				readIdentifier (parameters, c);
				updateKind (parameters);
				verbose ("Found class parameter %s\n", vStringValue (parameters->name));
				if (parameters->kind == K_UNDEFINED)
				{
					parameters->kind = K_CONSTANT;
					parameters = pushToken (parameters, newToken ());
					c = vGetc();
					while (c != ',' && c != ')' && c != EOF)
					{
						c = vGetc();
					}
				}
			} while (c != ')' && c != EOF);
			c = vGetc ();
			parameters = popToken (parameters);
		}
		c = skipWhite (vGetc ());
	}

	/* Search for inheritance information */
	if (isIdentifierCharacter (c))
	{
		extra = newToken ();

		readIdentifier (extra, c);
		c = skipWhite (vGetc ());
		if (strcmp (vStringValue (extra->name), "extends") == 0)
		{
			readIdentifier (extra, c);
			vStringCopy (token->inheritance, extra->name);
			verbose ("Inheritance %s\n", vStringValue (token->inheritance));
		}
		deleteToken (extra);
	}

	/* Use last identifier to create tag */
	createTag (token);

	/* Add parameter list */
	while (parameters)
	{
		createTag (parameters);
		parameters = popToken (parameters);
	}
}
Esempio n. 19
0
/** checks whether the current token is a section identifier, and if yes, switches to the corresponding section */
static
SCIP_Bool isNewSection(
   SCIP*                 scip,               /**< SCIP data structure */
   LPINPUT*              lpinput             /**< LP reading data */
   )
{
   SCIP_Bool iscolon;
   size_t len;

   assert(lpinput != NULL);

   /* remember first token by swapping the token buffer */
   swapTokenBuffer(lpinput);

   /* look at next token: if this is a ':', the first token is a name and no section keyword */
   iscolon = FALSE;
   if( getNextToken(scip, lpinput) )
   {
      iscolon = (*lpinput->token == ':');
      pushToken(lpinput);
   }

   /* reinstall the previous token by swapping back the token buffer */
   swapTokenBuffer(lpinput);

   /* check for ':' */
   if( iscolon )
      return FALSE;

   len = strlen(lpinput->token);
   assert(len < LP_MAX_LINELEN);

   /* the section keywords are at least 2 characters up to 8 or exactly 15 characters long */
   if( len > 1 && (len < 9 || len == 15) )
   {
      char token[16];
      int c = 0;

      while( lpinput->token[c] != '\0' )
      {
         token[c] = toupper(lpinput->token[c]); /*lint !e734*/
         ++c;
         assert(c < 16);
      }
      token[c] = '\0';

      if( (len == 3 && strcmp(token, "MIN") == 0)
         || (len == 7 && strcmp(token, "MINIMUM") == 0)
         || (len == 8 && strcmp(token, "MINIMIZE") == 0) )
      {
         SCIPdebugMessage("(line %d) new section: OBJECTIVE\n", lpinput->linenumber);
         lpinput->section = LP_OBJECTIVE;
         lpinput->objsense = SCIP_OBJSENSE_MINIMIZE;
         return TRUE;
      }

      if( (len == 3 && strcmp(token, "MAX") == 0)
         || (len == 7 && strcmp(token, "MAXIMUM") == 0)
         || (len == 8 && strcmp(token, "MAXIMIZE") == 0) )
      {
         SCIPdebugMessage("(line %d) new section: OBJECTIVE\n", lpinput->linenumber);
         lpinput->section = LP_OBJECTIVE;
         lpinput->objsense = SCIP_OBJSENSE_MAXIMIZE;
         return TRUE;
      }

      if( len == 3 && strcmp(token, "END") == 0 )
      {
         SCIPdebugMessage("(line %d) new section: END\n", lpinput->linenumber);
         lpinput->section = LP_END;
         return TRUE;
      }
   }

   return FALSE;
}
Esempio n. 20
0
/** reads an objective or constraint with name and coefficients */
static
SCIP_RETCODE readCoefficients(
   SCIP*                 scip,               /**< SCIP data structure */
   LPINPUT*              lpinput,            /**< LP reading data */
   SCIP_Bool             isobjective,        /**< indicates whether we are currently reading the coefficients of the objective */
   char*                 name,               /**< pointer to store the name of the line; must be at least of size
                                              *   LP_MAX_LINELEN */
   SCIP_VAR***           vars,               /**< pointer to store the array with variables (must be freed by caller) */
   SCIP_Real**           coefs,              /**< pointer to store the array with coefficients (must be freed by caller) */
   int*                  ncoefs,             /**< pointer to store the number of coefficients */
   SCIP_Bool*            newsection          /**< pointer to store whether a new section was encountered */
   )
{
   SCIP_Bool havesign;
   SCIP_Bool havevalue;
   SCIP_Real coef;
   int coefsign;
   int coefssize;

   assert(lpinput != NULL);
   assert(name != NULL);
   assert(vars != NULL);
   assert(coefs != NULL);
   assert(ncoefs != NULL);
   assert(newsection != NULL);

   *vars = NULL;
   *coefs = NULL;
   *name = '\0';
   *ncoefs = 0;
   *newsection = FALSE;

   /* read the first token, which may be the name of the line */
   if( getNextToken(scip, lpinput) )
   {
      /* check if we reached a new section */
      if( isNewSection(scip, lpinput) )
      {
         *newsection = TRUE;
         return SCIP_OKAY;
      }

      /* remember the token in the token buffer */
      swapTokenBuffer(lpinput);

      /* get the next token and check, whether it is a colon */
      if( getNextToken(scip, lpinput) )
      {
         if( strcmp(lpinput->token, ":") == 0 )
         {
            /* the second token was a colon: the first token is the line name */
            (void)SCIPmemccpy(name, lpinput->tokenbuf, '\0', LP_MAX_LINELEN);

            name[LP_MAX_LINELEN - 1] = '\0';
            SCIPdebugMessage("(line %d) read constraint name: '%s'\n", lpinput->linenumber, name);
         }
         else
         {
            /* the second token was no colon: push the tokens back onto the token stack and parse them as coefficients */
            pushToken(lpinput);
            pushBufferToken(lpinput);
         }
      }
      else
      {
         /* there was only one token left: push it back onto the token stack and parse it as coefficient */
         pushBufferToken(lpinput);
      }
   }

   /* initialize buffers for storing the coefficients */
   coefssize = LP_INIT_COEFSSIZE;
   SCIP_CALL( SCIPallocMemoryArray(scip, vars, coefssize) );
   SCIP_CALL( SCIPallocMemoryArray(scip, coefs, coefssize) );

   /* read the coefficients */
   coefsign = +1;
   coef = 1.0;
   havesign = FALSE;
   havevalue = FALSE;
   *ncoefs = 0;
   while( getNextToken(scip, lpinput) )
   {
      SCIP_VAR* var;

      /* check if we read a sign */
      if( isSign(lpinput, &coefsign) )
      {
         SCIPdebugMessage("(line %d) read coefficient sign: %+d\n", lpinput->linenumber, coefsign);
         havesign = TRUE;
         continue;
      }

      /* check if we read a value */
      if( isValue(scip, lpinput, &coef) )
      {
         SCIPdebugMessage("(line %d) read coefficient value: %g with sign %+d\n", lpinput->linenumber, coef, coefsign);
         if( havevalue )
         {
            syntaxError(scip, lpinput, "two consecutive values.");
            return SCIP_OKAY;
         }
         havevalue = TRUE;
         continue;
      }

      /* check if we reached an equation sense */
      if( isSense(lpinput, NULL) )
      {
         if( isobjective )
         {
            syntaxError(scip, lpinput, "no sense allowed in objective");
            return SCIP_OKAY;
         }

         /* put the sense back onto the token stack */
         pushToken(lpinput);
         break;
      }

      /* check if we reached a new section, that will be only allowed when having no current sign and value and if we
       * are not in the quadratic part
       */
      if( (isobjective || (!havevalue && !havesign)) && isNewSection(scip, lpinput) )
      {
         if( havesign && !havevalue )
         {
            SCIPwarningMessage(scip, "skipped single sign %c without value or variable in objective\n", coefsign == 1 ? '+' : '-');
         }
         else if( isobjective && havevalue && !SCIPisZero(scip, coef) )
         {
            SCIPwarningMessage(scip, "constant term %+g in objective is skipped\n", coef * coefsign);
         }

         *newsection = TRUE;
         return SCIP_OKAY;
      }

      /* check if we start a quadratic part */
      if( *lpinput->token ==  '[' )
      {
         syntaxError(scip, lpinput, "diff reader does not support quadratic objective function.");
         return SCIP_READERROR;
      }

      /* all but the first coefficient need a sign */
      if( *ncoefs > 0 && !havesign )
      {
         syntaxError(scip, lpinput, "expected sign ('+' or '-') or sense ('<' or '>').");
         return SCIP_OKAY;
      }

      /* check if the last variable should be squared */
      if( *lpinput->token == '^' )
      {
         syntaxError(scip, lpinput, "diff reader does not support quadratic objective function.");
         return SCIP_READERROR;
      }
      else
      {
         /* the token is a variable name: get the corresponding variable */
         SCIP_CALL( getVariable(scip, lpinput->token, &var) );
      }

      /* insert the linear coefficient */
      SCIPdebugMessage("(line %d) read linear coefficient: %+g<%s>\n", lpinput->linenumber, coefsign * coef, SCIPvarGetName(var));
      if( !SCIPisZero(scip, coef) )
      {
         /* resize the vars and coefs array if needed */
         if( *ncoefs >= coefssize )
         {
            coefssize *= 2;
            coefssize = MAX(coefssize, (*ncoefs)+1);
            SCIP_CALL( SCIPreallocMemoryArray(scip, vars, coefssize) );
            SCIP_CALL( SCIPreallocMemoryArray(scip, coefs, coefssize) );
         }
         assert(*ncoefs < coefssize);

         /* add coefficient */
         (*vars)[*ncoefs] = var;
         (*coefs)[*ncoefs] = coefsign * coef;
         (*ncoefs)++;
      }

      /* reset the flags and coefficient value for the next coefficient */
      coefsign = +1;
      coef = 1.0;
      havesign = FALSE;
      havevalue = FALSE;
   }

   return SCIP_OKAY;
}
Esempio n. 21
0
  XPathParser::Token*
  XPathParser::tokenize(const char* xpath, TokenFactory* tokenFactory)
  {
    /*
     * First, Define some convenient Macros.
     * These macros operate on the current Token, on the last Token, and on the token Stack
     *
     * allocToken : allocates a new token, puts it in current
     * nextToken  : puts a new token in current, update last <- current
     * newSymbolToken : creates a new token, initiated to the current symbol character
     * pushQName : push the qname string buffer as a new QName token
     * pushToken : push the current token at the top of the stack
     * pushSingleToken : push the current token at the top of the stack, but just for one QName token
     * popToken : pop the top of the stack to current token.
     */

#define allocToken() do { current = tokenFactory->allocToken(); } while(0)

#define nextToken() \
    do { allocToken(); \
    Log_XPathParser_Tokenize ( "New Token at %p\n", current ); \
    if ( ! headToken ) { headToken = current; last = current; }\
    else if ( last ) { last->next = current; last = current; } \
    else if ( tokenStack.size() ) \
    { AssertBug ( ! tokenStack.front()->subExpression, "Front token has already a subExpression !\n" ); \
      tokenStack.front()->subExpression = current; last = current; } \
    else { Bug ( "Don't know how to link this token !\n" ); } } while (0)

#define newSymbolToken() \
    Log_XPathParser_Tokenize ( "New Symbol Token '%c'\n", *c ); \
    nextToken();  \
    current->type = Token::Symbol; \
    current->symbol = *c;

#define pushQName() \
    if ( qname.size() ) \
    { 	\
      if ( last && last->isQName() && ( qname == "::" || stringEndsWith(last->token,String("::") ) ) ) \
      { \
        last->token += qname; qname = ""; \
      } \
      else \
      { \
        nextToken(); \
        current->token = qname; \
        qname = ""; \
        Log_XPathParser_Tokenize ( "New QName : '%s'\n", current->token.c_str() ); \
        if ( pushedSingleToken ) \
        { popToken (); } \
      } \
    }

#define pushToken() \
    pushedSingleToken = false; \
    Log_XPathParser_Tokenize ( "Pushing token '%p'\n", current ); \
    tokenStack.push_front ( current ); \
    current = last = NULL;    

#define pushSingleToken() \
    pushToken (); \
    pushedSingleToken = true;

#define popToken() \
    AssertBug ( tokenStack.size(), "Empty Stack, can't pop !\n" ); \
    current = tokenStack.front ( ); \
    Log_XPathParser_Tokenize ( "Popped token '%p'\n", current ); \
    tokenStack.pop_front (); \
    last = current; \
    pushedSingleToken = false; 

    Log_XPathParser_Tokenize ( "**** Tokenize : '%s' *****\n", xpath );

    Token* current = NULL, *last = NULL, *headToken = NULL;
    char lastChar = '\0';
    std::list<Token*> tokenStack;
    String qname, text;
    bool pushedSingleToken = false;

    for (const char* c = xpath; *c; c++)
      {
        Log_XPathParser_Tokenize ( "At Char '%c'\n", *c );
        if (lastChar == '!' && *c != '=')
          {
            Bug ( "Invalid character following '!' : '%c'\n", *c );
          }
        switch (*c)
          {
        case '\'':
        case '"':
          {
            char start = *c;
            c++;
            text = "";
            for (; *c && (*c != start); c++)
              {
                text += *c;
              }
            if (!*c)
              {
                throwXPathException ( "Unbalanced quote caracter '%c'\n", start );
              }
            nextToken ();
            current->type = Token::Text;
            current->token = text;
            break;
          }
        case '{':
        case '(':
        case '[':
          pushQName ();
          if (*c == '[' && last && last->symbol == '*')
            {
              last->type = Token::QName;
              last->token = "*";
            }
          newSymbolToken ();
          pushToken ();
          break;
        case ']':
        case '}':
        case ')':
          {
            pushQName ();
            popToken ();
            char mustBe;
            if (*c == ']')
              mustBe = '[';
            else if (*c == ')')
              mustBe = '(';
            else if (*c == '}')
              mustBe = '{';
            else
              {
                mustBe = '\0';
                Bug ( "Invalid section matcher from character '%c'\n", *c );
              }

            if (!current->isSymbol())
              {
                throwXPathException ( "Originating token is not a symbol !\n" );
              }
            if (current->symbol != mustBe)
              {
                throwXPathException ( "Unmatched closing symbols : openned with '%c', close with '%c'\n",
                    *c, current->symbol );
              }
            if (*c == ')')
              {
                Token* lastArg = current->subExpression, *lastArg0 = NULL;
                while (lastArg)
                  {
                    if (lastArg->isSymbol() && lastArg->symbol == ',')
                      lastArg0 = lastArg;
                    lastArg = lastArg->next;
                  }
                if (lastArg0)
                  {
                    Token* father = current;
                    allocToken ();
                    current->type = Token::Symbol;
                    current->symbol = ',';
                    current->subExpression = lastArg0->next;
                    lastArg0->next = current;
                    current = last = father;
                  }
              }
          }
          break;
        case '$':
        case '@':
          /*
           * Variable and attribute short form
           * These short forms expect a QName defined after.
           */
          pushQName ();
          newSymbolToken ();
          pushSingleToken ();
          break;
        case ',':
          {
            /*
             * Here we must re-arrange the tokens
             * so that the ',' symbol appears with the contents underneath
             */
            pushQName ();
            AssertBug ( tokenStack.size(), "Empty token stack !\n" );
            Token* father = tokenStack.front();
            AssertBug ( father->isSymbol(), "father token is not a symbol !\n" );
            AssertBug ( father->symbol == '(', "father token is not the '(' symbol : %c\n", father->symbol );

            if (!father->subExpression)
              throwXPathException ( "Empty XPath function argument after '%c'", father->symbol );

            AssertBug ( father->subExpression, "father token has no subExpression !\n" );
            Token* lastArg = father->subExpression, *lastArg0 = NULL;
            while (lastArg)
              {
                if (lastArg->isSymbol() && lastArg->symbol == ',')
                  lastArg0 = lastArg;
                lastArg = lastArg->next;
              }
            allocToken ();
            current->type = Token::Symbol;
            current->symbol = ',';
            if (lastArg0)
              {
                current->subExpression = lastArg0->next;
                lastArg0->next = current;
              }
            else
              {
                current->subExpression = father->subExpression;
                father->subExpression = current;
              }
            last = current;
            break;
          }
        case '*':
          {
            /*
             * Meaning of the '*' character is ambiguous :
             * It may be a wildcard for node testing (as in '@*', 'axis::*', 'namespace:*', ..)
             * Or it may be the multiplication symbol.
             */
            bool isQName = false;
            if (qname.c_str() && *qname.c_str())
              {
                Log_XPathParser_Tokenize ( "QName : %s\n", qname.c_str() );
                if (__endsWith(qname.c_str(), ':'))
                  isQName = true;
              }
            else if (last && last->isQName())
              {
                Log_XPathParser_Tokenize ( "Last is qname : %s\n", last->token.c_str() );
                if (__endsWith(last->token.c_str(), ':') || last->token == "or") //< Fixup for docbook.xsl */self::ng:* or */self::db:*
                  isQName = true;
              }
            else if (!last || isCharIn(last->symbol, "/~|,+*-"))
              {
                isQName = true;
              }
            if (qname == "mod" || qname == "div")
              {
                /*
                 *  We must find the token before this one.
                 */
                if (last)
                  {
                    isQName = true;
                  }
                else
                  {
                    isQName = false;
                  }
                pushQName ();
                Log_XPathParser_Tokenize ( "Multiply with qname='%s', last=%p(%c/%s), qname=%d\n",
                    qname.c_str(), last, last ? last->symbol : '?',
                    last ? last->token.c_str() : "", isQName );
              }

            Log_XPathParser_Tokenize ( "While at char '*' : last=%p(%d,q=%d/%c/%s), qname='%s' isQName=%d\n",
                last, last ? last->type : -1, last ? last->isQName() : -1,
                last ? last->symbol : '?', last ? last->token.c_str() : "",
                qname.c_str(), isQName );

            if (isQName)
              {
                qname += *c;
              }
            else
              {
                pushQName ();
                newSymbolToken ();
              }
          }
          break;

        case '-':
          /*
           * Meaning of the '-' character is ambiguous :
           * It may be a character inside of a QName (as in 'xsl:for-each')
           * Or it may be the substraction operator ('op1 - op2')
           * Or it may be the unary negative operator ('-op1')
           */
          if (isNumeric(qname.c_str()) || lastChar == '\0' || isCharIn(
              lastChar, "\n\r ([)]=<>+-*"))
            {
              pushQName ();
              bool isUnary = false;
              if (!last || (last->isSymbol() && isCharIn(last->symbol,
                  "*+-=><|[/~,")))
                {
                  Log_XPathParser_Tokenize ( "Negate : lastChar='%c', last=%p, last->symbol=%c, c[1]=%c\n",
                      lastChar, last, last ? last->symbol : ' ', c[1] );
                  isUnary = true;
                }
              if (last && (last->token == "mod" || last->token == "div"))
                {
                  // We must find the token before this one.
                  Token* before = NULL;
                  if (tokenStack.size())
                    {
                      before = tokenStack.front()->subExpression;
                      if (before == last)
                        before = NULL;
                    }
                  else
                    before = headToken;
                  while (before)
                    {
                      if (before->next == last)
                        break;
                      before = before->next;
                    }
                  if (before)
                    isUnary = true;
                  Log_XPathParser_Tokenize ( "Minus with last='%s', before=%p(%c/%s), unary=%d\n",
                      last->token.c_str(), before, before ? before->symbol : '?',
                      before ? before->token.c_str() : "", isUnary );
                }
              newSymbolToken ( );
              if (isUnary)
                current->symbol = 'N'; // Negate !
            }
          else if (c[1] == '>')
            {
              pushQName ();
              newSymbolToken ();
            }
          else
            {
              qname += *c;
            }
          break;

        case '/':
          /*
           * The '/' character only appears in single step separator ('step/step') or initial root ('/step')
           * Or in descendant short form (as initial '//step' or inside 'step//step').
           */
          if (lastChar == '/')
            {
              AssertBug ( last->isSymbol() && last->symbol == lastChar, "Dropped the last in a lastChar\n" );
              last->token = last->symbol;
              last->token += *c;
              last->symbol = '~';
              break;
            }
        case '=':
          if (*c == '=' && (lastChar == '<' || lastChar == '>' || lastChar
              == '!'))
            {
              AssertBug ( last->isSymbol() && last->symbol == lastChar, "Dropped the last in a lastChar\n" );
              last->token = last->symbol;
              last->token += *c;
              break;
            }
        case '<':
        case '>':
          if (lastChar == '-')
            {
              if (last)
                {
                  Log_XPathParser_Tokenize ( "ElementFunctionCall : last=%c/'%s'\n", last->symbol, last->token.c_str() );
                  last->symbol = '#';
                }
              else
                {
                  Bug ( "No last !\n" );
                }
              Log_XPathParser_Tokenize ( "Operator '->' found !\n" );
              break;
            }
        case '+':
        case '!':
        case '|':
          pushQName ();
          newSymbolToken ( );
          break;
        case '\t':
        case ' ':
        case '\n':
        case '\r':
          pushQName ();
          break;

        default:
          qname += *c;
          break;
          }
        lastChar = *c;
      }
    pushQName ();
    if (tokenStack.size())
      {
        throwXPathException ( "Unbalanced parentheses : still %ld tokens on stack.\n", (unsigned long) tokenStack.size() );
      }
    return headToken;
#undef allocToken
#undef nextToken
#undef newSymbolToken
#undef pushQName
#undef pushToken
#undef pushSingleToken
#undef popToken
  }
Esempio n. 22
0
void _getTokens(const char* s, struct char_char* p, struct token_info* info)
{
    char urlMode = 0, blockMode = 0;
    int tn = 0, ln = 0;
    size_t pos = 0, slen = strlen(s);

    for(; pos < slen; pos++)
    {
        char c = s[pos];
        char cn = s[pos+1];

        if(c == '/' && cn == '*')
        {
            parseMLComment(info, &tn, ln, s, slen, &pos);
        }
        else if (!urlMode && c == '/' && s[pos+5] == '/' && cn == 'd' && s[pos+2] == 'e' && s[pos+3] == 'e' && s[pos+4] == 'p') {
            pushToken(info, &tn, ln, TOKENTYPE_DEEP, "/deep/");
            pos += 5;
        }
        else if(!urlMode && c == '/' && cn == '/')
        {
            if(blockMode > 0)
            {
                parseIdentifier(info, &tn, ln, s, slen, &pos, &urlMode, p);
            }
            else
            {
                parseSLComment(info, &tn, ln, s, slen, &pos);
            }
        }
        else if(c == '"' || c == '\'')
        {
            parseString(info, &tn, ln, s, slen, &pos, c);
        }
        else if(c == ' ')
        {
            parseSpaces(info, &tn, ln, s, slen, &pos);
        }
        else if(isPunctuation(p, c))
        {
            pushChar(info, &tn, ln, isPunctuation(p, c), c);
            if(c == '\n' || c == '\r')
            {
                ln++;
            }

            if(c == ')')
            {
                urlMode = 0;
            }

            if(c == '{')
            {
                blockMode++;
            }

            if(c == '}')
            {
                blockMode--;
            }
        }
        else if(isDecimalDigit(c))
        {
            parseDecimalNumber(info, &tn, ln, s, slen, &pos);
        }
        else
        {
            parseIdentifier(info, &tn, ln, s, slen, &pos, &urlMode, p);
        }
    }
}
Esempio n. 23
0
/** checks whether the current token is a section identifier, and if yes, switches to the corresponding section */
static
SCIP_Bool isNewSection(
    SCIP*                 scip,               /**< SCIP data structure */
    BLKINPUT*             blkinput            /**< BLK reading data */
)
{
    SCIP_Bool iscolon;

    assert(blkinput != NULL);

    /* remember first token by swapping the token buffer */
    swapTokenBuffer(blkinput);

    /* look at next token: if this is a ':', the first token is a name and no section keyword */
    iscolon = FALSE;
    if( getNextToken(blkinput) )
    {
        iscolon = (strcmp(blkinput->token, ":") == 0);
        pushToken(blkinput);
    }

    /* reinstall the previous token by swapping back the token buffer */
    swapTokenBuffer(blkinput);

    /* check for ':' */
    if( iscolon )
        return FALSE;

    if( strcasecmp(blkinput->token, "PRESOLVED") == 0 )
    {
        SCIPdebugMessage("(line %d) new section: PRESOLVED\n", blkinput->linenumber);
        blkinput->section = BLK_PRESOLVED;
        return TRUE;
    }

    if( strcasecmp(blkinput->token, "NBLOCKS") == 0 )
    {
        SCIPdebugMessage("(line %d) new section: NBLOCKS\n", blkinput->linenumber);
        blkinput->section = BLK_NBLOCKS;
        return TRUE;
    }

    if( strcasecmp(blkinput->token, "BLOCK") == 0 )
    {
        int blocknr;

        blkinput->section = BLK_BLOCK;

        if( getNextToken(blkinput) )
        {
            /* read block number */
            if( isInt(scip, blkinput, &blocknr) )
            {
                assert(blocknr >= 0);
                assert(blocknr <= blkinput->nblocks);

                blkinput->blocknr = blocknr-1;
            }
            else
                syntaxError(scip, blkinput, "no block number after block keyword!\n");
        }
        else
            syntaxError(scip, blkinput, "no block number after block keyword!\n");

        SCIPdebugMessage("new section: BLOCK %d\n", blkinput->blocknr);

        return TRUE;

    }

    if( strcasecmp(blkinput->token, "MASTERCONSS") == 0 )
    {
        blkinput->section = BLK_MASTERCONSS;

        SCIPdebugMessage("new section: MASTERCONSS\n");

        return TRUE;
    }

    if( strcasecmp(blkinput->token, "END") == 0 )
    {
        SCIPdebugMessage("(line %d) new section: END\n", blkinput->linenumber);
        blkinput->section = BLK_END;
        return TRUE;
    }

    return FALSE;
}
Esempio n. 24
0
void Lexer::recognizeOperator() {
    consumeCharacter();
    pushToken(TokenType::OPERATOR);
}