//Looks at the next token in the stream and returns it if it is, in fact, a string TOKEN string_literal(void) { TOKEN tok = NULL; tok = peek_token(); if((tok != NULL) && ((get_token_type(tok) == STRING_LITERAL) || (get_token_type(tok) == CHARACTER_LITERAL))) { tok = get_token(); } else { tok = NULL; } return tok; }
//Looks at the next token in the stream and returns it if it is, in fact, a constant token //FIXME: this doesn't actually follow the C-grammar at the moment. We will need to fix this to handle enums later TOKEN constant(void) { TOKEN tok = NULL; tok = peek_token(); if((tok != NULL) && ((get_token_type(tok) == NUMBER_TOKEN) || (get_token_type(tok) == CHARACTER_LITERAL))) //FIXME: character literals are broken in the lexer right now and I don't feel like //fixing the lexer to recognize them at the moment. { tok = get_token(); } else { tok = NULL; } return tok; }
//Looks at the next token in the stream and returns it if it is, in fact, a character TOKEN character_constant(void) { TOKEN tok = NULL; tok = peek_token(); if((tok != NULL) && (get_token_type(tok) == CHARACTER_LITERAL)) { tok = get_token(); } else { tok = NULL; } return tok; }
TOKEN identifier(void) { TOKEN tok = NULL; tok = peek_token(); if( (tok != NULL) && (get_token_type(tok) == IDENTIFIER_TOKEN)) { tok = get_token(); } else { tok = NULL; } return tok; }
PyObject* Preprocessor_format_tokens(Preprocessor *self, PyObject *args) { PyObject *tokens; if (!PyArg_ParseTuple(args, "O:format_tokens", &tokens)) return NULL; ScopedPyObject iter(PyObject_GetIter(tokens)); if (!iter) return NULL; try { // Iterate through the tokens, accumulating in a vector. std::vector<cmonster::core::Token> token_vector; for (;;) { ScopedPyObject item(PyIter_Next(iter)); if (!item) { if (PyErr_Occurred()) return NULL; else break; } if (!PyObject_TypeCheck(item, get_token_type())) { PyErr_SetString(PyExc_TypeError, "Expected sequence of tokens"); return NULL; } token_vector.push_back(get_token((Token*)(PyObject*)item)); } // Format the sequence of tokens. std::ostringstream ss; if (!token_vector.empty()) self->preprocessor->format(ss, token_vector); std::string formatted = ss.str(); return PyUnicode_FromStringAndSize(formatted.data(), formatted.size()); } catch (...) { set_python_exception(); return NULL; } }
void expect(TokenType tType, unsigned int whichToken, void (*errorAction)(void)) { TOKEN peek = peek_token(); if((tType != get_token_type(peek)) || (whichToken != get_token_subtype(peek))) { if(NULL == errorAction) { printf("ERROR: an unexpected token was encountered on line %5lu of the source file\n", get_source_code_line_number()); } else { errorAction(); } //if(recovery options are set) //{ // do recovery stuff // } // else // barf exit(EXIT_FAILURE); } get_token(); }
void parse_gettoken(parsestate_t *parsestate, token_t *token) { int i; char *str = parsestate->position; // current string int quote_state; // Hint! int any_quotes; // Hint! // EXERCISE: Skip initial whitespace in 'str'. /* Your code here. */ while (isspace(*str)) str++; // Report TOK_END at the end of the command string. if (*str == '\0') { // Save initial position so parse_ungettoken() will work parsestate->last_position = parsestate->position; token->buffer[0] = '\0'; // empty token token->type = TOK_END; return; } // EXERCISE: Store the next token into 'token', and terminate the token // with a null character. Handle quotes properly. Store at most // TOKENSIZE - 1 characters into 'token' (plus a terminating null // character); longer tokens should cause an error. // The current code treats the entire rest of the string as a token. // Change this code to handle quotes and terminate tokens at spaces. i = 0; quote_state = QUOTE_OUT; any_quotes = QUOTE_OUT; //while (*str != '\0' && !isspace(*str)) { while (*str != '\0') { if (i >= TOKENSIZE - 1) // Token too long; this is an error goto error; while (*str == '"') { quote_state = - quote_state; any_quotes = QUOTE_IN; str++; } if ((quote_state == QUOTE_OUT) && isspace(*str)) break; else token->buffer[i++] = *str++; } token->buffer[i] = '\0'; // end the token string // Save initial position so parse_ungettoken() will work parsestate->last_position = parsestate->position; // Move current position in place for the next token parsestate->position = str; // EXERCISE: Examine the token and store its type in token->type. // Quoted special tokens, such as '">"', have type TOK_NORMAL. // done by get_token_type func /* Your code here. */ //token->type = TOK_NORMAL; if (any_quotes == QUOTE_IN) token->type = TOK_NORMAL; else token->type = get_token_type(token->buffer); return; error: token->buffer[0] = '\0'; token->type = TOK_ERROR; }
token_t get_next_token(char* inputStream, struct token *prev) { static int pos = -1; int token_size = 0; int max_token_size = 20; char *str = checked_malloc(max_token_size*sizeof(char)); str[token_size] = '\0'; token_t t1 = (token_t)checked_malloc(sizeof(struct token)); token_t t2 = (token_t)checked_malloc(sizeof(struct token)); static int parenthesisCounter = 0; for(;;) { pos++; if(inputStream[pos] == EOF) { if (prev->type != EMPTY && (prev->prev->type == AND || prev->prev->type == OR || prev->prev->type == OPEN_ANGLE || prev->prev->type == CLOSE_ANGLE) && strcmp(str, "") == 0) error(1, 0, "%d: Incorrect syntax near EOF", lineNumber); if (parenthesisCounter != 0) error(1, 0, "%d: Incorrect syntax of parenthesis", lineNumber); goto return_token; } if(token_size == max_token_size) { max_token_size += 20; str = checked_realloc(str, max_token_size*sizeof(char)); } //while ((inputStream[pos] == ' ' || inputStream[pos] == '\t') && prev->type == WHITESPACE) //pos++; if (inputStream[pos] == '`') error(1, 0, "%d: Incorrect syntax near token \'`\'", lineNumber); if(inputStream[pos] == ' ' || inputStream[pos] == '\n' || inputStream[pos] == '\t' || inputStream[pos] == '(' || inputStream[pos] == ')' || inputStream[pos] == '<' || inputStream[pos] == '>' || inputStream[pos] == ';') { if (inputStream[pos] == '\n') lineNumber++; if ((inputStream[pos] == '>' || inputStream[pos] == '<') && ((strcmp(str, "") == 0 && prev->type == EMPTY) || pos == 0)) error(1, 0, "%d: Incorrect syntax near I/O redirection", lineNumber); if (inputStream[pos] == '>' && inputStream[pos-1] == '>' && inputStream[pos+1] == '>') error(1, 0, "%d: Incorrect syntax near I/O redirection", lineNumber); if (inputStream[pos] == '>' && inputStream[pos+1] == EOF) error(1, 0, "%d: Incorrect syntax near I/O redirection", lineNumber); if (inputStream[pos] == ';') { if (prev->type == SEMI_COLON) error(1, 0, "%d: Incorrect syntax near token \';\'", lineNumber); if ((pos == 0 || prev->type == NEWLINE) && (strcmp(str, "") == 0 && prev->type == EMPTY)) error(1, 0, "%d: Incorrect syntax near token \';\'", lineNumber); } if (inputStream[pos] == ';') { if (prev->type == WHITESPACE) { token_t temp = prev; while (temp->type == WHITESPACE && temp->type != EMPTY) { if (temp->prev->type == NEWLINE) error(1, 0, "%d: Incorrect syntax near token \';\'", lineNumber); temp = temp->prev; } } } if (inputStream[pos] == '(') { parenthesisCounter++; } if (inputStream[pos] == ')') { parenthesisCounter--; if (parenthesisCounter < 0) error(1, 0, "%d: Incorrect closing parenthesis", lineNumber); } goto return_token; } if (inputStream[pos] == '&') { if (prev->type == AND) error(1, 0, "%d: Incorrect syntax near token \'&\'", lineNumber); if ((pos == 0 || prev->type == NEWLINE) && (strcmp(str, "") == 0 && prev->type == EMPTY)) error(1, 0, "%d: Incorrect syntax near token \'&\'", lineNumber); if (prev->type == WHITESPACE) { token_t temp = prev; while (temp->type == WHITESPACE && temp->type != EMPTY) { if (temp->prev->type == NEWLINE) error(1, 0, "%d: Incorrect syntax near token \'&\'", lineNumber); temp = temp->prev; } } if (inputStream[pos+1] == '&') { pos++; t2->type = AND; } else t2->type = INVALID; goto return_token; } if (inputStream[pos] == '|') { if (prev->type == OR) error(1, 0, "%d: Incorrect syntax near token \'|\'", lineNumber); if ((pos == 0 || prev->type == NEWLINE) && (strcmp(str, "") == 0 && prev->type == EMPTY)) error(1, 0, "%d: Incorrect syntax near token \'|\'", lineNumber); if (prev->type == WHITESPACE) { token_t temp = prev; while (temp->type == WHITESPACE && temp->type != EMPTY) { if (temp->prev->type == NEWLINE) error(1, 0, "%d: Incorrect syntax near token \'|\'", lineNumber); temp = temp->prev; } } if (inputStream[pos+1] == '|') { pos++; t2->type = OR; } goto return_token; } if (inputStream[pos] == '#') { // if (token_size != 0) // fprintf(stderr, "error in comment token"); while (inputStream[pos] != '\n') { pos++; } lineNumber++; goto return_token; } str[token_size] = inputStream[pos]; token_size++; } return_token: t2->str = checked_malloc(2*sizeof(char)); t2->str[0] = inputStream[pos]; t2->str[1] = '\0'; t2->prev = t1; if (t2 -> type != AND && t2->type != OR && t2->type != INVALID) t2->type = get_token_type(t2->str); if(token_size == max_token_size) { max_token_size += 1; str = checked_realloc(str, max_token_size*sizeof(char)); } str[token_size] = '\0'; t1->str = str; t1->prev = prev; prev->next = t1; t1->next = t2; t1->type = get_token_type(t1->str); if (t1 -> type == EMPTY) { token_t toDelete = t1; token_t toReturn = t2; t1->prev->next=t2; t2->prev = t1->prev; free(toDelete); return toReturn; } else return t2; }
std::vector<cmonster::core::Token> FunctionMacro::operator()( clang::SourceLocation const& expansion_location, std::vector<cmonster::core::Token> const& arguments) const { // Create the arguments tuple. ScopedPyObject args_tuple = PyTuple_New(arguments.size()); if (!args_tuple) throw std::runtime_error("Failed to create argument tuple"); for (Py_ssize_t i = 0; i < static_cast<Py_ssize_t>(arguments.size()); ++i) { Token *token = create_token(m_preprocessor, arguments[i]); PyTuple_SetItem(args_tuple, i, reinterpret_cast<PyObject*>(token)); } // Set the "preprocessor" and "location" global variables. // // XXX How do we create a closure via the C API? It would be better if we // could bind a function to the preprocessor it was created with, when we // define the function. PyObject *globals = PyEval_GetGlobals(); if (globals) { PyObject *key = PyUnicode_FromString("preprocessor"); if (!key) throw python_exception(); Py_INCREF((PyObject*)m_preprocessor); PyDict_SetItem(globals, key, (PyObject*)m_preprocessor); cmonster::core::Preprocessor &pp = get_preprocessor(m_preprocessor); SourceLocation *location = create_source_location( expansion_location, pp.getClangPreprocessor().getSourceManager()); if (!location) throw python_exception(); key = PyUnicode_FromString("location"); if (!key) throw python_exception(); PyDict_SetItem(globals, key, (PyObject*)location); } // Call the function. ScopedPyObject py_result = PyObject_Call(m_callable, args_tuple, NULL); if (!py_result) throw python_exception(); // Transform the result. std::vector<cmonster::core::Token> result; if (py_result == Py_None) return result; // Is it a string? If so, tokenize it. if (PyUnicode_Check(py_result)) { ScopedPyObject utf8(PyUnicode_AsUTF8String(py_result)); if (utf8) { char *u8_chars; Py_ssize_t u8_size; if (PyBytes_AsStringAndSize(utf8, &u8_chars, &u8_size) == -1) { throw python_exception(); } else { cmonster::core::Preprocessor &pp = get_preprocessor(m_preprocessor); result = pp.tokenize(u8_chars, u8_size); return result; } } else { throw python_exception(); } } // If it's not a string, it should be a sequence of Token objects. if (!PySequence_Check(py_result)) { throw python_exception(PyExc_TypeError, "macro functions must return a sequence of tokens"); } const Py_ssize_t seqlen = PySequence_Size(py_result); if (seqlen == -1) { throw python_exception(); } else { for (Py_ssize_t i = 0; i < seqlen; ++i) { ScopedPyObject token_ = PySequence_GetItem(py_result, i); if (PyObject_TypeCheck(token_, get_token_type())) { Token *token = (Token*)(PyObject*)token_; result.push_back(get_token(token)); } else { // Invalid return value. throw python_exception(PyExc_TypeError, "macro functions must return a sequence of tokens"); } } } return result; }