Example #1
0
//Looks at the next token in the stream and returns it if it is, in fact, a string 
TOKEN string_literal(void)
{
    TOKEN tok = NULL;
    tok = peek_token();
    if((tok != NULL) &&
            ((get_token_type(tok) == STRING_LITERAL) ||
             (get_token_type(tok) == CHARACTER_LITERAL)))
    {
        tok = get_token();
    }
    else
    {
        tok = NULL;
    }
    return tok;
}
Example #2
0
//Looks at the next token in the stream and returns it if it is, in fact, a constant token
//FIXME: this doesn't actually follow the C-grammar at the moment. We will need to fix this to handle enums later
TOKEN constant(void)
{
    TOKEN tok = NULL;
    tok = peek_token();
    if((tok != NULL) &&
            ((get_token_type(tok) == NUMBER_TOKEN) ||
             (get_token_type(tok) == CHARACTER_LITERAL))) //FIXME: character literals are broken in the lexer right now and I don't feel like
                        //fixing the lexer to recognize them at the moment.
    {
        tok = get_token();
    }
    else
    {
        tok = NULL;
    }
    return tok;
}
Example #3
0
//Looks at the next token in the stream and returns it if it is, in fact, a character
TOKEN character_constant(void)
{
    TOKEN tok = NULL;
    tok = peek_token();
    if((tok != NULL) &&
             (get_token_type(tok) == CHARACTER_LITERAL))
    {
        tok = get_token();
    }
    else
    {
        tok = NULL;
    }
    return tok;
}
Example #4
0
TOKEN identifier(void)
{
    TOKEN tok = NULL;
    tok = peek_token(); 
    if( (tok != NULL) && 
            (get_token_type(tok) == IDENTIFIER_TOKEN))
    {
        tok = get_token();
    }
    else
    {
        tok = NULL;
    }
    return tok;
}
Example #5
0
PyObject* Preprocessor_format_tokens(Preprocessor *self, PyObject *args)
{
    PyObject *tokens;
    if (!PyArg_ParseTuple(args, "O:format_tokens", &tokens))
        return NULL;

    ScopedPyObject iter(PyObject_GetIter(tokens));
    if (!iter)
        return NULL;

    try
    {
        // Iterate through the tokens, accumulating in a vector.
        std::vector<cmonster::core::Token> token_vector;
        for (;;)
        {
            ScopedPyObject item(PyIter_Next(iter));
            if (!item)
            {
                if (PyErr_Occurred())
                    return NULL;
                else
                    break;
            }
            if (!PyObject_TypeCheck(item, get_token_type()))
            {
                PyErr_SetString(PyExc_TypeError, "Expected sequence of tokens");
                return NULL;
            }
            token_vector.push_back(get_token((Token*)(PyObject*)item));
        }

        // Format the sequence of tokens.
        std::ostringstream ss;
        if (!token_vector.empty())
            self->preprocessor->format(ss, token_vector);
        std::string formatted = ss.str();
        return PyUnicode_FromStringAndSize(formatted.data(), formatted.size());
    }
    catch (...)
    {
        set_python_exception();
        return NULL;
    }
}
Example #6
0
void expect(TokenType tType, unsigned int whichToken, void (*errorAction)(void))
{
    TOKEN peek = peek_token();
    if((tType != get_token_type(peek)) || (whichToken != get_token_subtype(peek)))
    {
        if(NULL == errorAction)
        {
            printf("ERROR: an unexpected token was encountered on line %5lu of the source file\n", get_source_code_line_number());
        }
        else
        {
            errorAction();
        }
        //if(recovery options are set)
        //{
        // do recovery stuff
        // }
        // else
        // barf
        exit(EXIT_FAILURE);
    }
    get_token();
}
Example #7
0
void
parse_gettoken(parsestate_t *parsestate, token_t *token)
{
	int i;
	char *str = parsestate->position;	// current string
	int quote_state;			// Hint!
	int any_quotes;				// Hint!

	// EXERCISE: Skip initial whitespace in 'str'.

	/* Your code here. */
	while (isspace(*str))
		str++;
	
	// Report TOK_END at the end of the command string.
	if (*str == '\0') {
		// Save initial position so parse_ungettoken() will work
		parsestate->last_position = parsestate->position;
		token->buffer[0] = '\0';	// empty token
		token->type = TOK_END;
		return;
	}


	// EXERCISE: Store the next token into 'token', and terminate the token
	// with a null character.  Handle quotes properly.  Store at most
	// TOKENSIZE - 1 characters into 'token' (plus a terminating null
	// character); longer tokens should cause an error.

	// The current code treats the entire rest of the string as a token.
	// Change this code to handle quotes and terminate tokens at spaces.

	i = 0;
	quote_state = QUOTE_OUT;
	any_quotes = QUOTE_OUT;

	//while (*str != '\0' && !isspace(*str)) {
	while (*str != '\0') {
		if (i >= TOKENSIZE - 1)
			// Token too long; this is an error
			goto error;
		while (*str == '"') {
			quote_state = - quote_state;
			any_quotes = QUOTE_IN;
			str++;
		}

		if ((quote_state == QUOTE_OUT) && isspace(*str))
			break;
		else
			token->buffer[i++] = *str++;
	}
	token->buffer[i] = '\0';	// end the token string


	// Save initial position so parse_ungettoken() will work
	parsestate->last_position = parsestate->position;
	// Move current position in place for the next token
	parsestate->position = str;


	// EXERCISE: Examine the token and store its type in token->type.
	// Quoted special tokens, such as '">"', have type TOK_NORMAL.
	// done by get_token_type func

	/* Your code here. */
	
	//token->type = TOK_NORMAL;
	if (any_quotes == QUOTE_IN)
		token->type = TOK_NORMAL;
	else
		token->type = get_token_type(token->buffer);
	return;

 error:
	token->buffer[0] = '\0';
	token->type = TOK_ERROR;
}
Example #8
0
  token_t get_next_token(char* inputStream, struct token *prev)
{
  static int pos = -1;
  int token_size = 0;
  int max_token_size = 20;
  char *str = checked_malloc(max_token_size*sizeof(char));
  str[token_size] = '\0';
  token_t t1 = (token_t)checked_malloc(sizeof(struct token));
  token_t t2 = (token_t)checked_malloc(sizeof(struct token));
  static int parenthesisCounter = 0;

  for(;;)
  {
    pos++;
    if(inputStream[pos] == EOF)
    {
      if (prev->type != EMPTY && (prev->prev->type == AND || prev->prev->type == OR 
        || prev->prev->type == OPEN_ANGLE || prev->prev->type == CLOSE_ANGLE) && strcmp(str, "") == 0)
        error(1, 0, "%d: Incorrect syntax near EOF", lineNumber);
      if (parenthesisCounter != 0)
        error(1, 0, "%d: Incorrect syntax of parenthesis", lineNumber);
      goto return_token;
    }
    if(token_size == max_token_size)
    {
      max_token_size += 20;
      str = checked_realloc(str, max_token_size*sizeof(char));
    }

    //while ((inputStream[pos] == ' ' || inputStream[pos] == '\t') && prev->type == WHITESPACE)
      //pos++;

    if (inputStream[pos] == '`')
      error(1, 0, "%d: Incorrect syntax near token \'`\'", lineNumber);

    if(inputStream[pos] == ' ' || inputStream[pos] == '\n' || inputStream[pos] == '\t' || inputStream[pos] == '('
      || inputStream[pos] == ')' || inputStream[pos] == '<' || inputStream[pos] == '>' || inputStream[pos] == ';')
    {
      if (inputStream[pos] == '\n')
        lineNumber++;
      if ((inputStream[pos] == '>' || inputStream[pos] == '<') && ((strcmp(str, "") == 0 && prev->type == EMPTY) || pos == 0))
        error(1, 0, "%d: Incorrect syntax near I/O redirection", lineNumber);
      if (inputStream[pos] == '>' && inputStream[pos-1] == '>' && inputStream[pos+1] == '>')
        error(1, 0, "%d: Incorrect syntax near I/O redirection", lineNumber);
      if (inputStream[pos] == '>' && inputStream[pos+1] == EOF)
        error(1, 0, "%d: Incorrect syntax near I/O redirection", lineNumber);
      if (inputStream[pos] == ';')
      {
        if (prev->type == SEMI_COLON)
          error(1, 0, "%d: Incorrect syntax near token \';\'", lineNumber);
        if ((pos == 0 || prev->type == NEWLINE) && (strcmp(str, "") == 0 && prev->type == EMPTY))
          error(1, 0, "%d: Incorrect syntax near token \';\'", lineNumber);
      }
      if (inputStream[pos] == ';')
      {
        if (prev->type == WHITESPACE) {
        token_t temp = prev;
        while (temp->type == WHITESPACE && temp->type != EMPTY)
        {
          if (temp->prev->type == NEWLINE)
            error(1, 0, "%d: Incorrect syntax near token \';\'", lineNumber);
          temp = temp->prev;
        }
      }
      }
      if (inputStream[pos] == '(') {
        parenthesisCounter++;
      }
      if (inputStream[pos] == ')') {
        parenthesisCounter--;
        if (parenthesisCounter < 0)
          error(1, 0, "%d: Incorrect closing parenthesis", lineNumber);
      }
      
      goto return_token;
    }
    if (inputStream[pos] == '&')
    {
      if (prev->type == AND)
        error(1, 0, "%d: Incorrect syntax near token \'&\'", lineNumber);
      if ((pos == 0 || prev->type == NEWLINE) && (strcmp(str, "") == 0 && prev->type == EMPTY))
        error(1, 0, "%d: Incorrect syntax near token \'&\'", lineNumber);
      if (prev->type == WHITESPACE) {
        token_t temp = prev;
        while (temp->type == WHITESPACE && temp->type != EMPTY)
        {
          if (temp->prev->type == NEWLINE)
            error(1, 0, "%d: Incorrect syntax near token \'&\'", lineNumber);
          temp = temp->prev;
        }
      }
      if (inputStream[pos+1] == '&')
      {
        pos++; t2->type = AND;
      }
      else
        t2->type = INVALID;
      goto return_token;
    }
    if (inputStream[pos] == '|')
    {
      if (prev->type == OR)
        error(1, 0, "%d: Incorrect syntax near token \'|\'", lineNumber);
      if ((pos == 0 || prev->type == NEWLINE) && (strcmp(str, "") == 0 && prev->type == EMPTY))
        error(1, 0, "%d: Incorrect syntax near token \'|\'", lineNumber);
      if (prev->type == WHITESPACE) {
        token_t temp = prev;
        while (temp->type == WHITESPACE && temp->type != EMPTY)
        {
          if (temp->prev->type == NEWLINE)
            error(1, 0, "%d: Incorrect syntax near token \'|\'", lineNumber);
          temp = temp->prev;
        }
      }
      if (inputStream[pos+1] == '|')
      {
        pos++; t2->type = OR;
      }
      goto return_token;
    }
    if (inputStream[pos] == '#')
    {
      // if (token_size != 0)
      //   fprintf(stderr, "error in comment token");

      while (inputStream[pos] != '\n')
      {
        pos++;
      }
      lineNumber++;
      goto return_token;
    }
    str[token_size] = inputStream[pos];
    token_size++;
  }

  return_token:
  t2->str = checked_malloc(2*sizeof(char));
  t2->str[0] = inputStream[pos];
  t2->str[1] = '\0';
  t2->prev = t1;

  if (t2 -> type != AND && t2->type != OR && t2->type != INVALID)
    t2->type = get_token_type(t2->str);

  if(token_size == max_token_size)
  {
    max_token_size += 1;
    str = checked_realloc(str, max_token_size*sizeof(char));
  }
  str[token_size] = '\0';
  t1->str = str;
  t1->prev = prev;
  prev->next = t1;
  t1->next = t2;
  t1->type = get_token_type(t1->str);
  if (t1 -> type == EMPTY)
  {
    token_t toDelete = t1;
    token_t toReturn = t2;
    t1->prev->next=t2;
    t2->prev = t1->prev;
    free(toDelete);
    return toReturn;
  }
  else
  return t2;
}
Example #9
0
std::vector<cmonster::core::Token>
FunctionMacro::operator()(
    clang::SourceLocation const& expansion_location,
    std::vector<cmonster::core::Token> const& arguments) const
{
    // Create the arguments tuple.
    ScopedPyObject args_tuple = PyTuple_New(arguments.size());
    if (!args_tuple)
        throw std::runtime_error("Failed to create argument tuple");
    for (Py_ssize_t i = 0; i < static_cast<Py_ssize_t>(arguments.size()); ++i)
    {
        Token *token = create_token(m_preprocessor, arguments[i]);
        PyTuple_SetItem(args_tuple, i, reinterpret_cast<PyObject*>(token));
    }

    // Set the "preprocessor" and "location" global variables.
    //
    // XXX How do we create a closure via the C API? It would be better if we
    // could bind a function to the preprocessor it was created with, when we
    // define the function.
    PyObject *globals = PyEval_GetGlobals();
    if (globals)
    {
        PyObject *key = PyUnicode_FromString("preprocessor");
        if (!key)
            throw python_exception();
        Py_INCREF((PyObject*)m_preprocessor);
        PyDict_SetItem(globals, key, (PyObject*)m_preprocessor);

        cmonster::core::Preprocessor &pp = get_preprocessor(m_preprocessor);
        SourceLocation *location = create_source_location(
            expansion_location, pp.getClangPreprocessor().getSourceManager());
        if (!location)
            throw python_exception();
        key = PyUnicode_FromString("location");
        if (!key)
            throw python_exception();
        PyDict_SetItem(globals, key, (PyObject*)location);
    }

    // Call the function.
    ScopedPyObject py_result = PyObject_Call(m_callable, args_tuple, NULL);
    if (!py_result)
        throw python_exception();

    // Transform the result.
    std::vector<cmonster::core::Token> result;
    if (py_result == Py_None)
        return result;

    // Is it a string? If so, tokenize it.
    if (PyUnicode_Check(py_result))
    {
        ScopedPyObject utf8(PyUnicode_AsUTF8String(py_result));
        if (utf8)
        {
            char *u8_chars;
            Py_ssize_t u8_size;
            if (PyBytes_AsStringAndSize(utf8, &u8_chars, &u8_size) == -1)
            {
                throw python_exception();
            }
            else
            {
                cmonster::core::Preprocessor &pp =
                    get_preprocessor(m_preprocessor);
                result = pp.tokenize(u8_chars, u8_size);
                return result;
            }
        }
        else
        {
            throw python_exception();
        }
    }

    // If it's not a string, it should be a sequence of Token objects.
    if (!PySequence_Check(py_result))
    {
        throw python_exception(PyExc_TypeError,
            "macro functions must return a sequence of tokens");
    }

    const Py_ssize_t seqlen = PySequence_Size(py_result);
    if (seqlen == -1)
    {
        throw python_exception();
    }
    else
    {
        for (Py_ssize_t i = 0; i < seqlen; ++i)
        {
            ScopedPyObject token_ = PySequence_GetItem(py_result, i);
            if (PyObject_TypeCheck(token_, get_token_type()))
            {
                Token *token = (Token*)(PyObject*)token_;
                result.push_back(get_token(token));
            }
            else
            {
                // Invalid return value.
                throw python_exception(PyExc_TypeError,
                    "macro functions must return a sequence of tokens");
            }
        }
    }
    return result;
}