Lexeme *evalInclude(Lexeme *tree, Lexeme *env) { Lexeme *eargs = evalExprList(tree,env); Lexeme *val = NULL; Lexeme *result = NULL; while(eargs != NULL && strcmp(eargs->type,NIL)) { val = car(eargs); if(strcmp(val->type,STRING) == 0) { FILE *fp = fopen(val->sval,"r"); if(!fp) { fprintf(stderr,"Cannot open %s, invalid filename\n",val->sval); result = lexeme(NIL); } else { result = eval(parse(fp),env); } } else { fprintf(stderr,"Cannot open %s\n",displayLexeme(*val)); } eargs = cdr(eargs); } return result; }
// primary: primitive // | idExpr // | parenExpr // | OB optExpressionList CB // | anonFunc Lexeme *primary(Parser *p) { if(primitivePending(p)) { return primitive(p); } else if(idExprPending(p)) { return idExpr(p); } else if(parenExprPending(p)) { return parenExpr(p); } else if(uOpPending(p)) { Lexeme *a = uOp(p); a->right = primary(p); return a; } else if(check(p,OB)) { Lexeme *a = lexeme(ARRAY); match(p,OB); if(expressionListPending(p)) { a->right = expressionList(p); } match(p,CB); return a; } else { return anonFunc(p); } }
// expression: primary // | primary operator expression // | VAR ID optInit Lexeme *expression(Parser *p) { Lexeme *a, *b, *c; if(primaryPending(p)) { a = primary(p); if(operatorPending(p)) { b = operator(p); c = expression(p); b->left = a; b->right = c; return b; } return a; } else { a = match(p,VAR); a->left = match(p,ID); a->right = NULL; if(initPending(p)) { a->right = init(p); } else { a->right = lexeme(NIL); } return a; } }
void lexer::parse_string(line_of_code & output) { std::string string; char string_character = input[i]; i++; std::size_t start = i; for(; i < end; i++) { char byte = input[i]; switch(byte) { case '\\': { if(end - i < 2) lexer_error("Backslash at the end of the input"); i++; char next_byte = input[i]; switch(next_byte) { case 'r': string.push_back('\r'); continue; case 'n': string.push_back('\n'); continue; } if(ail::is_hex_digit(next_byte)) { if(end - i < 2) lexer_error("Incomplete hex number escape sequence at the end of the input"); if(!ail::is_hex_digit(input[i + 1])) lexer_error("Invalid hex number escape sequence"); std::string hex_string = input.substr(i, 2); i++; char new_byte = ail::string_to_number<char>(hex_string, std::ios_base::hex); string.push_back(new_byte); } else lexer_error("Invalid escape sequence: " + ail::hex_string_8(static_cast<uchar>(next_byte))); break; } case '\n': lexer_error("Detected a newline in a string"); break; case '\'': case '"': if(byte == string_character) { output.lexemes.push_back(lexeme(string)); i++; return; } string.push_back(byte); break; default: string.push_back(byte); break; } } lexer_error("String lacks terminator"); }
bool assembly_lexer(std::string const & file_name, unsigned int & line, std::string::const_iterator & begin, std::string::const_iterator const & end, lexeme & output) { std::string input; for(; begin != end; ++begin) { char current_char = *begin; char type = type_lookup_table[current_char]; switch(type) { case char_type_illegal: { lexer_exception(file_name, line, "Illegal character"); } case char_type_name: { std::string::const_iterator name_begin = begin; for(++begin; (begin != end) && (is_name_char(*begin) == true); ++begin); output = lexeme(lexeme_name, std::string(name_begin, begin), line); return true; } case char_type_digit: { std::string::const_iterator number_begin = begin; for(++begin; (begin != end) && (nil::string::is_digit(*begin) == true) ; ++begin); output = lexeme(lexeme_number, std::string(number_begin, begin), line); return true; } case char_type_zero: { std::string::const_iterator number_begin = begin; ++begin; if(begin != end) { char second_character = *begin; if( (second_character == 'x') || (second_character == 'X') ) { for(++begin; (begin != end) && (nil::string::is_digit(*begin) == true) ; ++begin); } else if(is_binary_digit(second_character) == true) { for(++begin; (begin != end) && (is_binary_digit(*begin) == true) ; ++begin); } } output = lexeme(lexeme_number, std::string(number_begin, begin), line); return true; } case char_type_string: { std::string string; for(++begin; begin != end;) { char current_char = *begin; switch(current_char) { case '"': { ++begin; output = lexeme(lexeme_string, string, line); return true; } case '\\': { try { parse_backslash(begin, end, string); } catch(std::exception & exception) { lexer_exception(file_name, line, exception.what()); } break; } case '\n': { lexer_exception(file_name, line, "Newline in string"); } default: { string += current_char; ++begin; break; } } } lexer_exception(file_name, line, "Incomplete string at the end of file"); } case char_type_operator: { ++begin; output = lexeme(lexeme_operator, std::string(1, current_char), line); return true; } case char_type_operator_extended: { ++begin; if(begin == end) { output = lexeme(lexeme_operator, std::string(1, current_char), line); return false; } else { char second_char = *begin; bool is_extended = false; switch(current_char) { case '&': { if(second_char == '&') { is_extended = true; } break; } case '|': { if(second_char == '|') { is_extended = true; } break; } case '=': { if(second_char == '=') { is_extended = true; } break; } case '!': { if(second_char == '=') { is_extended = true; } break; } case '<': { if(second_char == '=') { is_extended = true; } break; } case '>': { if(second_char == '=') { is_extended = true; } break; } } if(is_extended == true) { ++begin; output = lexeme(lexeme_operator, std::string(1, current_char) + second_char, line); } else { output = lexeme(lexeme_operator, std::string(1, current_char), line); } return true; } } case char_type_newline: { ++begin; ++line; output = lexeme(lexeme_newline, line); return true; } case char_type_comment: { for(++begin; begin != end; ++begin) { if(*begin == '\n') { ++begin; ++line; output = lexeme(lexeme_newline, line); return true; } } return false; } } } return false; }
bool LexicalAnalyser::scan() { this->clearComment(); string token; string::const_iterator nextSymbol = this->sourceText.begin(); while (nextSymbol != this->sourceText.end()) { switch (this->curState) { case this->stEmpty: { if (!this->isSpace(*nextSymbol)) { token.push_back(*nextSymbol); } if (this->isLetter(*nextSymbol)) { this->curState = this->stIdentifier; } else if (this->isOctalNumber(*nextSymbol)) { this->curState = this->stConst; } else if (this->isNotOctalNumber(*nextSymbol)) { this->curState = this->stError; } else if (*nextSymbol == ':') { this->curState = this->stAssign; } else if (*nextSymbol == '<') { this->curState = this->stLeftShift; } else if (*nextSymbol == '>') { this->curState = this->stRightShift; } else if (this->isOperationSymbol(*nextSymbol)) { this->curState = this->stOpSymbol; --nextSymbol; } ++nextSymbol; } break; case this->stIdentifier: { if (this->isSpace(*nextSymbol)) { if (this->isKeyWord(token)) { Lexeme lexeme(token, Lexeme::tyKeyWord); this->lexemes.push_back(lexeme); } else { Lexeme lexeme(token, Lexeme::tyIdentifier); this->lexemes.push_back(lexeme); Identifier id; id.present = false; this->identifiersTable[token] = id; } token.clear(); this->curState = this->stEmpty; } else if (this->isLetter(*nextSymbol) || this->isNumber(*nextSymbol)) { token.push_back(*nextSymbol); this->curState = this->stIdentifier; } else { if (this->isKeyWord(token)) { Lexeme lexeme(token, Lexeme::tyKeyWord); this->lexemes.push_back(lexeme); } else { Lexeme lexeme(token, Lexeme::tyIdentifier); this->lexemes.push_back(lexeme); Identifier id; id.present = false; this->identifiersTable[token] = id; } token.clear(); token.push_back(*nextSymbol); if (*nextSymbol == ':') { this->curState = this->stAssign; } else if (*nextSymbol == '<') { this->curState = this->stLeftShift; } else if (*nextSymbol == '>') { this->curState = this->stRightShift; } else if (this->isOperationSymbol(*nextSymbol)) { this->curState = this->stOpSymbol; --nextSymbol; } } ++nextSymbol; } break; case this->stOpSymbol: { Lexeme lexeme(token, Lexeme::tyOperationSymbol); this->lexemes.push_back(lexeme); token.clear(); this->curState = this->stEmpty; ++nextSymbol; } break; case this->stAssign: { token.push_back(*nextSymbol); if (*nextSymbol == '=') { this->curState = this->stOpSymbol; } else { this->curState = this->stError; } ++nextSymbol; } break; case this->stLeftShift: { if (*nextSymbol == '<') { token.push_back(*nextSymbol); this->curState = this->stOpSymbol; } else { this->curState = this->stError; Lexeme lexeme(token, Lexeme::tyOperationSymbol); this->lexemes.push_back(lexeme); token.clear(); this->curState = this->stEmpty; } ++nextSymbol; } break; case this->stRightShift: { if (*nextSymbol == '>') { token.push_back(*nextSymbol); this->curState = this->stOpSymbol; } else { this->curState = this->stError; Lexeme lexeme(token, Lexeme::tyOperationSymbol); this->lexemes.push_back(lexeme); token.clear(); this->curState = this->stEmpty; } ++nextSymbol; } break; case this->stConst: { if (this->isSpace(*nextSymbol)) { Lexeme lexeme(token, Lexeme::tyConst); int* value = new int; string val = "0"; val.append(token); *value = this->toOctal(atoi(val.c_str())); lexeme.value = value; this->lexemes.push_back(lexeme); token.clear(); this->curState = this->stEmpty; } else if (this->isOctalNumber(*nextSymbol)) { token.push_back(*nextSymbol); } else if (this->isLetter(*nextSymbol) || this->isNumber(*nextSymbol)) { token.push_back(*nextSymbol); this->curState = this->stError; } else { Lexeme lexeme(token, Lexeme::tyConst); int* value = new int; string val = "0"; val.append(token); *value = this->toOctal(atoi(val.c_str())); // *value = atoi(token.c_str()); lexeme.value = value; this->lexemes.push_back(lexeme); token.clear(); token.push_back(*nextSymbol); if (*nextSymbol == ':') { this->curState = this->stAssign; } else if (*nextSymbol == '<') { this->curState = this->stLeftShift; } else if (*nextSymbol == '>') { this->curState = this->stRightShift; } else if (this->isOperationSymbol(*nextSymbol)) { Lexeme lexeme(token, Lexeme::tyOperationSymbol); int* value = new int; string val = "0"; val.append(token); *value = this->toOctal(atoi(val.c_str())); // *value = atoi(token.c_str()); lexeme.value = value; this->lexemes.push_back(lexeme); token.clear(); this->curState = this->stEmpty; } } ++nextSymbol; } break; case this->stError: { this->success = false; Lexeme lexeme(token, Lexeme::tyUnknown); this->lexemes.push_back(lexeme); this->errorsTable.push_back(lexeme); token.clear(); this->curState = this->stEmpty; } break; } } this->curLexemesPos = this->lexemes.begin(); return this->success; }
Lexeme *evalNil(Lexeme *tree, Lexeme *env) { return lexeme(NIL); }
/*! This is called by parseSourceFile() to do the actual parsing and tree building. It only processes qdoc comments. It skips everything else. */ bool PureDocParser::processQdocComments() { QSet<QString> topicCommandsAllowed = topicCommands(); QSet<QString> otherMetacommandsAllowed = otherMetaCommands(); QSet<QString> metacommandsAllowed = topicCommandsAllowed + otherMetacommandsAllowed; while (tok != Tok_Eoi) { if (tok == Tok_Doc) { /* lexeme() returns an entire qdoc comment. */ QString comment = lexeme(); Location start_loc(location()); readToken(); Doc::trimCStyleComment(start_loc,comment); Location end_loc(location()); /* Doc parses the comment. */ Doc doc(start_loc,end_loc,comment,metacommandsAllowed); QString topic; ArgList args; QSet<QString> topicCommandsUsed = topicCommandsAllowed & doc.metaCommandsUsed(); /* There should be one topic command in the set, or none. If the set is empty, then the comment should be a function description. */ if (topicCommandsUsed.count() > 0) { topic = *topicCommandsUsed.begin(); args = doc.metaCommandArgs(topic); } NodeList nodes; QList<Doc> docs; if (topic.isEmpty()) { doc.location().warning(tr("This qdoc comment contains no topic command " "(e.g., '\\%1', '\\%2').") .arg(COMMAND_MODULE).arg(COMMAND_PAGE)); } else { /* There is a topic command. Process it. */ if ((topic == COMMAND_QMLPROPERTY) || (topic == COMMAND_QMLATTACHEDPROPERTY)) { Doc nodeDoc = doc; Node* node = processTopicCommandGroup(nodeDoc,topic,args); if (node != 0) { nodes.append(node); docs.append(nodeDoc); } } else { ArgList::ConstIterator a = args.begin(); while (a != args.end()) { Doc nodeDoc = doc; Node* node = processTopicCommand(nodeDoc,topic,*a); if (node != 0) { nodes.append(node); docs.append(nodeDoc); } ++a; } } } Node* treeRoot = QDocDatabase::qdocDB()->treeRoot(); NodeList::Iterator n = nodes.begin(); QList<Doc>::Iterator d = docs.begin(); while (n != nodes.end()) { processOtherMetaCommands(*d, *n); (*n)->setDoc(*d); checkModuleInclusion(*n); if ((*n)->isInnerNode() && ((InnerNode *)*n)->includes().isEmpty()) { InnerNode *m = static_cast<InnerNode *>(*n); while (m->parent() && m->parent() != treeRoot) m = m->parent(); if (m == *n) ((InnerNode *)*n)->addInclude((*n)->name()); else ((InnerNode *)*n)->setIncludes(m->includes()); } ++d; ++n; } } else { readToken(); } } return true; }
bool LexDbLemmatizer::lemmatize(const std::string& token, AnnotationItemManager& annotationItemManager, LemmatizerOutputIterator& outputIterator) { bool foundLemma = false; pqxx::work transaction(connection_); pqxx::result tuples = transaction.exec( "SELECT canon,name,sinflection,M.morphology" " FROM forms F, lexemes L, morphologies M" " WHERE F.lexeme_sn = L.lexeme_sn AND M.morphology_no = F.morphology_no" " AND (discard is null OR discard = 0)" " AND form = '" + pqxx::to_string(token) + "'" " ORDER BY canon, name, priority"); std::string currentLemma; std::string currentLexeme; static pqxx::tuple::size_type CANON_INDEX = 0; static pqxx::tuple::size_type NAME_INDEX = 1; static pqxx::tuple::size_type SINFLECTION_INDEX = 2; static pqxx::tuple::size_type MORPHOLOGY_INDEX = 3; for (pqxx::result::const_iterator iter = tuples.begin(); iter != tuples.end(); ++iter) { const pqxx::tuple& tuple = *iter; if (tuple[CANON_INDEX].c_str() != currentLemma) { currentLemma = tuple[CANON_INDEX].c_str(); outputIterator.addLemma(currentLemma); foundLemma = true; } if (tuple[NAME_INDEX].c_str() != currentLexeme) { currentLexeme = tuple[NAME_INDEX].c_str(); std::string partOfSpeech; std::string flags; parseSinflection_(tuple[SINFLECTION_INDEX].c_str(), partOfSpeech, flags); AnnotationItem lexeme( partOfSpeech, StringFrag(currentLexeme)); annotationItemManager.setValue(lexeme, "flags", flags); outputIterator.addLexeme(lexeme); foundLemma = true; } { std::string partOfSpeech; std::string flags; parseSinflection_(tuple[SINFLECTION_INDEX].c_str(), partOfSpeech, flags); AnnotationItem form( partOfSpeech, StringFrag(currentLexeme)); annotationItemManager.setValue(form, "flags", flags); annotationItemManager.setValue(form, "morpho", tuple[MORPHOLOGY_INDEX].c_str()); outputIterator.addForm(form); foundLemma = true; } } return foundLemma; }
Lexeme *table = car(env); Lexeme *vars = car(table); Lexeme *vals = cdr(table); table->left = cons(JOIN,variable,vars); if(value != NULL) { table->right = cons(JOIN,value,vals); } else { table->right = cons(JOIN,lexeme(NIL),vals); } return value; } Lexeme *extend(Lexeme *variables, Lexeme *values, Lexeme *env) { Lexeme *new = cons(ENV,cons(VALUES,variables,values),env); Lexeme *this = lexeme(ID); Lexeme *__context = lexeme(ID); __context->sval = "__context"; this->sval = "this"; insert(this,new,new); insert(__context,env,new); return new; } Lexeme *create() { return extend(lexeme(NIL),lexeme(NIL),NULL); } int sameVariable(Lexeme *var1, Lexeme *var2) { return !strcmp(var1->sval,var2->sval); }
bool Parser::parse_statement(StatementList *list) { lexer.identify_keywords(); switch(lexeme()) { case Lexeme::KW_IF: parse_if(list); break; case Lexeme::KW_WHILE: parse_while(list); break; case Lexeme::KW_DO: parse_do(list); parse_terminator(); break; case Lexeme::KW_RETURN: parse_return(list); parse_terminator(); break; case Lexeme::KW_BREAK: parse_break(list); parse_terminator(); break; case Lexeme::KW_CONTINUE: parse_continue(list); parse_terminator(); break; case Lexeme::KW_CONST: step(); parse_local(true, parse_expression(), list); parse_terminator(); break; case Lexeme::BRACET_OPEN: list->append(parse_block<true, false>(Scope::EMPTY)); break; case Lexeme::SEMICOLON: step(); break; case Lexeme::END: case Lexeme::BRACET_CLOSE: return false; default: if(is_expression(lexeme())) { ExpressionNode *node = parse_expression(); if(lexeme() == Lexeme::IDENT && node->is_type_name(document, false)) parse_local(false, node, list); else list->append(node); parse_terminator(); } else return false; } return true; }
virtual void print (std::ostream& o) { o << lexeme (); }
virtual void print (std::ostream& o) { //@@ todo o << lexeme (); }
bool value () const { return lexeme () == "TRUE"; }
bool lexer::parse_number(line_of_code & output) { std::size_t start = i; char byte = input[i]; if(ail::is_digit(byte)) { i++; if(byte == '0') { std::size_t remaining_bytes = end - i; if(remaining_bytes > 1) { char next_byte = input[i + 1]; if(next_byte == 'x') { i++; remaining_bytes = end - i; if(remaining_bytes == 0) number_parsing_error("Incomplete hex number at the end of the input"); std::size_t hex_start = i; for(; i < end && ail::is_hex_digit(input[i]); i++); std::size_t hex_length = i - hex_start; if(hex_length == 0) lexer_error("Incomplete hex number"); std::string hex_string = input.substr(hex_start, i - end); types::unsigned_integer value = ail::string_to_number<types::unsigned_integer>(hex_string, std::ios_base::hex); output.lexemes.push_back(lexeme(value)); return true; } } } char const dot = '.'; bool got_dot = false; char last_byte = byte; for(; i < end; i++) { byte = input[i]; if(byte == dot) { if(got_dot) number_parsing_error("Encountered a floating point value containing multiple dots"); else got_dot = true; } else if(!ail::is_digit(byte)) break; last_byte = byte; } if(last_byte == dot) number_parsing_error("Encountered a floating point value ending with a dot"); std::string number_string = input.substr(start, i - start); lexeme current_lexeme; if(got_dot) current_lexeme = lexeme(ail::string_to_number<types::floating_point_value>(number_string)); else current_lexeme = lexeme(ail::string_to_number<types::signed_integer>(number_string)); output.lexemes.push_back(current_lexeme); return true; } else return false; }