void Document::_mergeMultilineHtmlTags() { static const boost::regex cHtmlTokenStart("<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\5))*? */? *))$"); static const boost::regex cHtmlTokenEnd("^ *((?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\3))*? */? *))>"); TokenGroup processed; token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get()); assert(tokens!=0); for (TokenGroup::const_iterator i=tokens->subTokens().begin(), ie=tokens->subTokens().end(); i!=ie; ++i) { if ((*i)->text() && boost::regex_match(*(*i)->text(), cHtmlTokenStart)) { TokenGroup::const_iterator i2=i; ++i2; if (i2!=tokens->subTokens().end() && (*i2)->text() && boost::regex_match(*(*i2)->text(), cHtmlTokenEnd)) { processed.push_back(TokenPtr(new markdown::token::RawText(*(*i)->text()+' '+*(*i2)->text()))); ++i; continue; } } processed.push_back(*i); } tokens->swapSubtokens(processed); }
TokenPtr Scanner::scan_finite() { // token to build TokenPtr new_token = TokenPtr(new Token()); new_token->set_line(this->line_number); unsigned long cur_col = this->col_number; new_token->set_column(--cur_col); // wait until no state machine accepts while (!this->none_accept() || this->not_dead()->size() > 0) { // check for the null pointed item if (this->get_char() == '\0') { break; } // store and forward this->run_chain(); } // shave off the end of the buffer this->shave_chain(); // get first high priority accepting machine FSMachineListPtr accepting_list = this->accepting(); FSMachinePtr accepting = move(*accepting_list->begin()); string token_name = accepting->get_name(); // create a token TokType this_tok = get_token_by_name(token_name); new_token->set_token(this_tok); string contents = this->contents(); contents = to_lower(contents); new_token->set_lexeme(contents); // clear buffer this->clear_buffer(); // return new token this->consume(new_token); return new_token; }
void LoopBlock::generate_post() { if (this->type == RPTUNTLLOOP) { write_raw(this->cond_label + ":\n"); VarType result = this->generate_expr(this->get_symbol_list()); if (result != BOOLEAN) { report_error_lc("Semantic Error", "Conditional expression doesn't evaluate to boolean value.", (*this->get_symbol_list()->begin())->get_row(), (*this->get_symbol_list()->begin())->get_col()); } write_raw("\nBRFS " + this->body_label); write_raw("BR " + this->exit_label + "\n"); write_raw(this->exit_label + ":\n"); } else if (this->type == WHILELOOP) { write_raw("BR " + this->cond_label); write_raw(this->exit_label + ":\n"); } else if (this->type == FORLOOP) { // get the incrementer token TokenPtr incrementer = (*this->get_unprocessed())[3]; if (incrementer->get_token() == MP_TO) { // generate an incrementer AssignmentBlockPtr inc = AssignmentBlockPtr(new AssignmentBlock(false)); inc->set_analyzer(this->get_analyzer()); inc->catch_token((*this->get_unprocessed())[0]); inc->catch_token((*this->get_unprocessed())[0]); inc->catch_token(TokenPtr(new Token(MP_PLUS, "+", -1, -1))); inc->catch_token(TokenPtr(new Token(MP_INT_LITERAL, "1", -1, -1))); inc->preprocess(); inc->generate_pre(); inc->generate_post(); } else if (incrementer->get_token() == MP_DOWNTO) { // generate a decrementer AssignmentBlockPtr dec = AssignmentBlockPtr(new AssignmentBlock(false)); dec->set_analyzer(this->get_analyzer()); dec->catch_token((*this->get_unprocessed())[0]); dec->catch_token((*this->get_unprocessed())[0]); dec->catch_token(TokenPtr(new Token(MP_MINUS, "-", -1, -1))); dec->catch_token(TokenPtr(new Token(MP_INT_LITERAL, "1", -1, -1))); dec->preprocess(); dec->generate_pre(); dec->generate_post(); } write_raw("BR " + this->cond_label + "\n"); write_raw(this->exit_label + ":\n"); } }
bool Document::read(std::istream& in) { if (mProcessed) return false; token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get()); assert(tokens!=0); std::string line; TokenGroup tgt; while (_getline(in, line)) { if (isBlankLine(line)) { tgt.push_back(TokenPtr(new token::BlankLine(line))); } else { tgt.push_back(TokenPtr(new token::RawText(line))); } } tokens->appendSubtokens(tgt); return true; }
void Formatter::init(const char* fmt) { format_ = fmt; boost::regex e("\\$\\{([a-z|A-Z]+)\\}"); boost::match_results<std::string::const_iterator> results; std::string::const_iterator start, end; start = format_.begin(); end = format_.end(); bool matched_once = false; std::string last_suffix; tokens_.clear(); while (boost::regex_search(start, end, results, e)) { #if 0 for (size_t i = 0; i < results.size(); ++i) { std::cout << i << "|" << results.prefix() << "|" << results[i] << "|" << results.suffix() << std::endl; } #endif std::string token = results[1]; last_suffix = results.suffix(); tokens_.push_back(TokenPtr(new FixedToken(results.prefix()))); tokens_.push_back(createTokenFromType(token)); start = results[0].second; matched_once = true; } if (matched_once) { tokens_.push_back(TokenPtr(new FixedToken(last_suffix))); } else { tokens_.push_back(TokenPtr(new FixedToken(format_))); } }
void LoopBlock::generate_pre() { if (this->type == RPTUNTLLOOP) { write_raw(this->body_label + ":\n"); } else if (this->type == WHILELOOP) { write_raw(this->cond_label + ":\n"); VarType result = this->generate_expr(this->get_symbol_list()); if (result != BOOLEAN) { report_error_lc("Semantic Error", "Conditional expression doesn't evaluate to boolean value.", (*this->get_symbol_list()->begin())->get_row(), (*this->get_symbol_list()->begin())->get_col()); } write_raw("\nBRTS " + this->body_label); write_raw("BR " + this->exit_label); write_raw(this->body_label + ":\n"); } else if (this->type == FORLOOP) { // parse the assignment // process the assignment AssignmentBlockPtr assignment = AssignmentBlockPtr(new AssignmentBlock(false)); assignment->set_analyzer(this->get_analyzer()); for (auto i = 0; i < 3; i++) { assignment->catch_token((*this->get_unprocessed())[i]); } // generate its code assignment->preprocess(); assignment->generate_pre(); assignment->generate_post(); // generate the condition label write_raw(this->cond_label + ":\n"); // process the ordinal expression AssignmentBlockPtr ordinal_expr = AssignmentBlockPtr(new AssignmentBlock(true)); ordinal_expr->set_analyzer(this->get_analyzer()); for (unsigned int i = 4; i < this->get_unprocessed()->size(); i++) { ordinal_expr->catch_token((*this->get_unprocessed())[i]); } // get the comparison components for the ordinal expr TokenPtr incrementer = (*this->get_unprocessed())[3]; if (incrementer->get_token() == MP_TO) { ordinal_expr->catch_token(TokenPtr(new Token(MP_EQUALS, "=", -1, -1))); } else if (incrementer->get_token() == MP_DOWNTO) { ordinal_expr->catch_token(TokenPtr(new Token(MP_EQUALS, "=", -1, -1))); } ordinal_expr->catch_token((*this->get_unprocessed())[0]); if (incrementer->get_token() == MP_TO) { ordinal_expr->catch_token(TokenPtr(new Token (MP_MINUS, "-", -1, -1))); ordinal_expr->catch_token(TokenPtr(new Token (MP_INT_LITERAL, "1", -1, -1))); } else if (incrementer->get_token() == MP_DOWNTO) { ordinal_expr->catch_token(TokenPtr(new Token (MP_PLUS, "+", -1, -1))); ordinal_expr->catch_token(TokenPtr(new Token (MP_INT_LITERAL, "1", -1, -1))); } // generate its code ordinal_expr->preprocess(); ordinal_expr->generate_pre(); write_raw("\nBRFS " + this->body_label); write_raw("BR " + this->exit_label + "\n"); write_raw(this->body_label + ":\n"); } }
TokenPtr Formatter::createTokenFromType(const std::string& type) { if (type == "severity") { return TokenPtr(new SeverityToken()); } else if (type == "message") { return TokenPtr(new MessageToken()); } else if (type == "time") { return TokenPtr(new TimeToken()); } else if (type == "thread") { return TokenPtr(new ThreadToken()); } else if (type == "file") { return TokenPtr(new FileToken()); } else if (type == "line") { return TokenPtr(new LineToken()); } else if (type == "function") { return TokenPtr(new FunctionToken()); } else if (type == "streamname") { return TokenPtr(new StreamNameToken()); } return TokenPtr(new FixedMapToken(type, extra_fixed_tokens_)); }
TokenPtr Scanner::scan_one() { // scan for whitespace this->skip_whitespace(); // return EOF on failed scan of // end of file if (this->get_char() == '\0') { TokenPtr new_token = TokenPtr(new Token()); new_token->set_token(MP_EOF); new_token->set_lexeme("EOF"); new_token->set_line(this->line_number); new_token->set_column(this->col_number); return new_token; } // throw a character into the buffer this->cache(); // move the input pointer forward this->forward(); // run buffer, then check to see what scanning // mode we should be in this->run_buffer(); // if none accept, scan to the end of the input // to try to accept if (this->none_accept()) { // scan until we find something, or reach EOF this->reset_all_auto(); return this->scan_infinite(); } else { // scan until nothing accepts any longer this->reset_all_auto(); return this->scan_finite(); } }
void Document::_processParagraphLines(TokenPtr inTokenContainer) { token::Container *tokens=dynamic_cast<token::Container*>(inTokenContainer.get()); assert(tokens!=0); bool noPara=tokens->inhibitParagraphs(); for (TokenGroup::const_iterator ii=tokens->subTokens().begin(), iie=tokens->subTokens().end(); ii!=iie; ++ii) if ((*ii)->isContainer()) _processParagraphLines(*ii); TokenGroup processed; std::string paragraphText; TokenGroup paragraphTokens; for (TokenGroup::const_iterator ii=tokens->subTokens().begin(), iie=tokens->subTokens().end(); ii!=iie; ++ii) { if ((*ii)->text() && (*ii)->canContainMarkup() && !(*ii)->inhibitParagraphs()) { static const boost::regex cExpression("^(.*) $"); if (!paragraphText.empty()) paragraphText+=" "; boost::smatch m; if (boost::regex_match(*(*ii)->text(), m, cExpression)) { paragraphText += m[1]; flushParagraph(paragraphText, paragraphTokens, processed, noPara); processed.push_back(TokenPtr(new markdown::token::HtmlTag("br/"))); } else paragraphText += *(*ii)->text(); } else { flushParagraph(paragraphText, paragraphTokens, processed, noPara); processed.push_back(*ii); } } // Make sure the last paragraph is properly flushed too. flushParagraph(paragraphText, paragraphTokens, processed, noPara); tokens->swapSubtokens(processed); }
TokenPtr create_token(TokType token, string lexeme, unsigned int line, unsigned int column) { // create a cool token return TokenPtr(new Token(token, lexeme, line, column)); }
TokenPtr Scanner::scan_infinite() { // a token pointer to see what we can get TokenPtr new_token = TokenPtr(new Token()); unsigned long start_line = this->line_number; unsigned long start_column = this->col_number; new_token->set_line(start_line); new_token->set_column(--start_column); // moves to the end of the buffer unsigned int moves = 0; // scan until the null character while (this->get_char() != '\0') { // run the buffer this->run_buffer(); // check if a machine accepted if (some_accept()) { // if some machine(s) is(are) accepting while(!none_accept() || this->not_dead()->size() > 0) { // scan until none accept this->run_chain(); } // shave chain this->shave_chain(); // error condition checked if (this->none_accept()) { break; } // some will accept, since // the condition above satisfies it // get first high priority accepting machines FSMachinePtr accepting = move(*this->accepting()->begin()); string token_name = accepting->get_name(); // create a token TokType this_tok = get_token_by_name(token_name); new_token->set_token(this_tok); string contents = this->contents(); contents = to_lower(contents); new_token->set_lexeme(contents); // clear buffer this->clear_buffer(); // return new token this->consume(new_token); return new_token; } else { this->reset_all_auto(); // cache the current token under the fp this->cache(); // move the file pointer forward 1 this->forward(); // run the entire buffer against the automata // individually } moves++; } // error condition // unwind the file pointer back to the first item for (unsigned int i = moves; i > 0; i--) { this->shave_all(); } if (*this->scan_buf->begin() == '\'') { new_token->set_token(MP_RUN_STRING); new_token->set_error("There is a run on string here"); this->goto_next('\n'); } else if (*this->scan_buf->begin() == '{') { new_token->set_token(MP_RUN_COMMENT); new_token->set_error("There is a run on comment here"); this->goto_next('\n'); } else { new_token->set_token(MP_ERROR); new_token->set_error("There is an unreconized token here"); } // create the error token string contents = this->contents(); contents = to_lower(contents); new_token->set_lexeme(contents); // hidden error message here? (might be good) report_error_lc("Scan Error", new_token->get_error() + contents, start_line, start_column); // clear buffer this->clear_buffer(); // return error token this->consume(new_token); return new_token; }