Beispiel #1
0
void Document::_mergeMultilineHtmlTags() {
	static const boost::regex cHtmlTokenStart("<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\5))*? */? *))$");
	static const boost::regex cHtmlTokenEnd("^ *((?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\3))*? */? *))>");

	TokenGroup processed;

	token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get());
	assert(tokens!=0);

	for (TokenGroup::const_iterator i=tokens->subTokens().begin(),
		ie=tokens->subTokens().end(); i!=ie; ++i)
	{
		if ((*i)->text() && boost::regex_match(*(*i)->text(), cHtmlTokenStart)) {
			TokenGroup::const_iterator i2=i;
			++i2;
			if (i2!=tokens->subTokens().end() && (*i2)->text() &&
				boost::regex_match(*(*i2)->text(), cHtmlTokenEnd))
			{
				processed.push_back(TokenPtr(new markdown::token::RawText(*(*i)->text()+' '+*(*i2)->text())));
				++i;
				continue;
			}
		}
		processed.push_back(*i);
	}
	tokens->swapSubtokens(processed);
}
TokenPtr Scanner::scan_finite() {
	// token to build
	TokenPtr new_token = TokenPtr(new Token());
	new_token->set_line(this->line_number);
	unsigned long cur_col = this->col_number;
	new_token->set_column(--cur_col);
	// wait until no state machine accepts
	while (!this->none_accept() || this->not_dead()->size() > 0) {
		// check for the null pointed item
		if (this->get_char() == '\0') {
			break;
		}
		// store and forward
		this->run_chain();
	}
	// shave off the end of the buffer
	this->shave_chain();
	// get first high priority accepting machine
	FSMachineListPtr accepting_list = this->accepting();
	FSMachinePtr accepting = move(*accepting_list->begin());
	string token_name = accepting->get_name();
	// create a token
	TokType this_tok = get_token_by_name(token_name);
	new_token->set_token(this_tok);
	string contents = this->contents();
	contents = to_lower(contents);
	new_token->set_lexeme(contents);
	// clear buffer
	this->clear_buffer();
	// return new token
	this->consume(new_token);
	return new_token;
}
void LoopBlock::generate_post() {
	if (this->type == RPTUNTLLOOP) {
		write_raw(this->cond_label + ":\n");
		VarType result = this->generate_expr(this->get_symbol_list());
		if (result != BOOLEAN) {
			report_error_lc("Semantic Error",
							"Conditional expression doesn't evaluate to boolean value.",
							(*this->get_symbol_list()->begin())->get_row(),
							(*this->get_symbol_list()->begin())->get_col());
		}
		write_raw("\nBRFS " + this->body_label);
		write_raw("BR " + this->exit_label + "\n");
		write_raw(this->exit_label + ":\n");
	} else if (this->type == WHILELOOP) {
		write_raw("BR " + this->cond_label);
		write_raw(this->exit_label + ":\n");
	} else if (this->type == FORLOOP) {
		// get the incrementer token
		TokenPtr incrementer = (*this->get_unprocessed())[3];
		if (incrementer->get_token() == MP_TO) {
			// generate an incrementer
			AssignmentBlockPtr inc = AssignmentBlockPtr(new AssignmentBlock(false));
			inc->set_analyzer(this->get_analyzer());
			inc->catch_token((*this->get_unprocessed())[0]);
			inc->catch_token((*this->get_unprocessed())[0]);
			inc->catch_token(TokenPtr(new Token(MP_PLUS, "+", -1, -1)));
			inc->catch_token(TokenPtr(new Token(MP_INT_LITERAL, "1", -1, -1)));
			inc->preprocess();
			inc->generate_pre();
			inc->generate_post();
		} else if (incrementer->get_token() == MP_DOWNTO) {
			// generate a decrementer
			AssignmentBlockPtr dec = AssignmentBlockPtr(new AssignmentBlock(false));
			dec->set_analyzer(this->get_analyzer());
			dec->catch_token((*this->get_unprocessed())[0]);
			dec->catch_token((*this->get_unprocessed())[0]);
			dec->catch_token(TokenPtr(new Token(MP_MINUS, "-", -1, -1)));
			dec->catch_token(TokenPtr(new Token(MP_INT_LITERAL, "1", -1, -1)));
			dec->preprocess();
			dec->generate_pre();
			dec->generate_post();
		}
		write_raw("BR " + this->cond_label + "\n");
		write_raw(this->exit_label + ":\n");
	}
}
Beispiel #4
0
bool Document::read(std::istream& in) {
	if (mProcessed) return false;

	token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get());
	assert(tokens!=0);

	std::string line;
	TokenGroup tgt;
	while (_getline(in, line)) {
		if (isBlankLine(line)) {
			tgt.push_back(TokenPtr(new token::BlankLine(line)));
		} else {
			tgt.push_back(TokenPtr(new token::RawText(line)));
		}
	}
	tokens->appendSubtokens(tgt);

	return true;
}
Beispiel #5
0
            void Formatter::init(const char* fmt)
                {
                    format_ = fmt;

                    boost::regex e("\\$\\{([a-z|A-Z]+)\\}");
                    boost::match_results<std::string::const_iterator> results;
                    std::string::const_iterator start, end;
                    start = format_.begin();
                    end = format_.end();
                    bool matched_once = false;
                    std::string last_suffix;
                    tokens_.clear();
                    while (boost::regex_search(start, end, results, e))
                    {
#if 0
                        for (size_t i = 0; i < results.size(); ++i)
                        {
                            std::cout << i << "|" << results.prefix() << "|" <<  results[i] << "|" << results.suffix() << std::endl;
                        }
#endif

                        std::string token = results[1];
                        last_suffix = results.suffix();
                        tokens_.push_back(TokenPtr(new FixedToken(results.prefix())));
                        tokens_.push_back(createTokenFromType(token));

                        start = results[0].second;
                        matched_once = true;
                    }

                    if (matched_once)
                    {
                        tokens_.push_back(TokenPtr(new FixedToken(last_suffix)));
                    }
                    else
                    {
                        tokens_.push_back(TokenPtr(new FixedToken(format_)));
                    }
                }
void LoopBlock::generate_pre() {
	if (this->type == RPTUNTLLOOP) {
		write_raw(this->body_label + ":\n");
	} else if (this->type == WHILELOOP) {
		write_raw(this->cond_label + ":\n");
		VarType result = this->generate_expr(this->get_symbol_list());
		if (result != BOOLEAN) {
			report_error_lc("Semantic Error",
							"Conditional expression doesn't evaluate to boolean value.",
							(*this->get_symbol_list()->begin())->get_row(),
							(*this->get_symbol_list()->begin())->get_col());
		}
		write_raw("\nBRTS " + this->body_label);
		write_raw("BR " + this->exit_label);
		write_raw(this->body_label + ":\n");
	} else if (this->type == FORLOOP) {
		// parse the assignment
		// process the assignment
		AssignmentBlockPtr assignment = AssignmentBlockPtr(new AssignmentBlock(false));
		assignment->set_analyzer(this->get_analyzer());
		for (auto i = 0; i < 3; i++) {
			assignment->catch_token((*this->get_unprocessed())[i]);
		}
		// generate its code
		assignment->preprocess();
		assignment->generate_pre();
		assignment->generate_post();
		// generate the condition label
		write_raw(this->cond_label + ":\n");
		// process the ordinal expression
		AssignmentBlockPtr ordinal_expr = AssignmentBlockPtr(new AssignmentBlock(true));
		ordinal_expr->set_analyzer(this->get_analyzer());
		for (unsigned int i = 4; i < this->get_unprocessed()->size(); i++) {
			ordinal_expr->catch_token((*this->get_unprocessed())[i]);
		}
		// get the comparison components for the ordinal expr
		TokenPtr incrementer = (*this->get_unprocessed())[3];
		if (incrementer->get_token() == MP_TO) {
			ordinal_expr->catch_token(TokenPtr(new Token(MP_EQUALS, "=", -1, -1)));
		} else if (incrementer->get_token() == MP_DOWNTO) {
			ordinal_expr->catch_token(TokenPtr(new Token(MP_EQUALS, "=", -1, -1)));
		}
		ordinal_expr->catch_token((*this->get_unprocessed())[0]);
		if (incrementer->get_token() == MP_TO) {
			ordinal_expr->catch_token(TokenPtr(new Token (MP_MINUS, "-", -1, -1)));
			ordinal_expr->catch_token(TokenPtr(new Token (MP_INT_LITERAL, "1", -1, -1)));
		} else if (incrementer->get_token() == MP_DOWNTO) {
			ordinal_expr->catch_token(TokenPtr(new Token (MP_PLUS, "+", -1, -1)));
			ordinal_expr->catch_token(TokenPtr(new Token (MP_INT_LITERAL, "1", -1, -1)));
		}
		// generate its code
		ordinal_expr->preprocess();
		ordinal_expr->generate_pre();
		write_raw("\nBRFS " + this->body_label);
		write_raw("BR " + this->exit_label + "\n");
		write_raw(this->body_label + ":\n");
	}
}
Beispiel #7
0
            TokenPtr Formatter::createTokenFromType(const std::string& type)
                {
                    if (type == "severity")
                    {
                        return TokenPtr(new SeverityToken());
                    }
                    else if (type == "message")
                    {
                        return TokenPtr(new MessageToken());
                    }
                    else if (type == "time")
                    {
                        return TokenPtr(new TimeToken());
                    }
                    else if (type == "thread")
                    {
                        return TokenPtr(new ThreadToken());
                    }
                    else if (type == "file")
                    {
                        return TokenPtr(new FileToken());
                    }
                    else if (type == "line")
                    {
                        return TokenPtr(new LineToken());
                    }
                    else if (type == "function")
                    {
                        return TokenPtr(new FunctionToken());
                    }
                    else if (type == "streamname")
                    {
                        return TokenPtr(new StreamNameToken());
                    }

                    return TokenPtr(new FixedMapToken(type, extra_fixed_tokens_));
                }
TokenPtr Scanner::scan_one() {
	// scan for whitespace
	this->skip_whitespace();
	
	// return EOF on failed scan of
	// end of file
	if (this->get_char() == '\0') {
		TokenPtr new_token = TokenPtr(new Token());
		new_token->set_token(MP_EOF);
		new_token->set_lexeme("EOF");
		new_token->set_line(this->line_number);
		new_token->set_column(this->col_number);
		return new_token;
	}
	
	// throw a character into the buffer
	this->cache();
	
	// move the input pointer forward
	this->forward();
	
	// run buffer, then check to see what scanning
	// mode we should be in
	this->run_buffer();
	
	// if none accept, scan to the end of the input
	// to try to accept
	if (this->none_accept()) {
		// scan until we find something, or reach EOF
		this->reset_all_auto();
		return this->scan_infinite();
	} else {
		// scan until nothing accepts any longer
		this->reset_all_auto();
		return this->scan_finite();
	}
}
Beispiel #9
0
void Document::_processParagraphLines(TokenPtr inTokenContainer) {
	token::Container *tokens=dynamic_cast<token::Container*>(inTokenContainer.get());
	assert(tokens!=0);

	bool noPara=tokens->inhibitParagraphs();
	for (TokenGroup::const_iterator ii=tokens->subTokens().begin(),
		iie=tokens->subTokens().end(); ii!=iie; ++ii)
			if ((*ii)->isContainer()) _processParagraphLines(*ii);

	TokenGroup processed;
	std::string paragraphText;
	TokenGroup paragraphTokens;
	for (TokenGroup::const_iterator ii=tokens->subTokens().begin(),
		iie=tokens->subTokens().end(); ii!=iie; ++ii)
	{
		if ((*ii)->text() && (*ii)->canContainMarkup() && !(*ii)->inhibitParagraphs()) {
			static const boost::regex cExpression("^(.*)  $");
			if (!paragraphText.empty()) paragraphText+=" ";

			boost::smatch m;
			if (boost::regex_match(*(*ii)->text(), m, cExpression)) {
				paragraphText += m[1];
				flushParagraph(paragraphText, paragraphTokens, processed, noPara);
				processed.push_back(TokenPtr(new markdown::token::HtmlTag("br/")));
			} else paragraphText += *(*ii)->text();
		} else {
			flushParagraph(paragraphText, paragraphTokens, processed, noPara);
			processed.push_back(*ii);
		}
	}

	// Make sure the last paragraph is properly flushed too.
	flushParagraph(paragraphText, paragraphTokens, processed, noPara);

	tokens->swapSubtokens(processed);
}
TokenPtr create_token(TokType token, string lexeme,
					  unsigned int line, unsigned int column) {
	// create a cool token
	return TokenPtr(new Token(token, lexeme, line, column));
}
TokenPtr Scanner::scan_infinite() {
	// a token pointer to see what we can get
	TokenPtr new_token = TokenPtr(new Token());
	unsigned long start_line = this->line_number;
	unsigned long start_column = this->col_number;
	new_token->set_line(start_line);
	new_token->set_column(--start_column);
	// moves to the end of the buffer
	unsigned int moves = 0;
	// scan until the null character
	while (this->get_char() != '\0') {
		// run the buffer
		this->run_buffer();
		// check if a machine accepted
		if (some_accept()) {
			// if some machine(s) is(are) accepting
			while(!none_accept() || this->not_dead()->size() > 0) {
				// scan until none accept
				this->run_chain();
			}
			// shave chain
			this->shave_chain();
			// error condition checked
			if (this->none_accept()) {
				break;
			}
			// some will accept, since
			// the condition above satisfies it
			// get first high priority accepting machines
			FSMachinePtr accepting = move(*this->accepting()->begin());
			string token_name = accepting->get_name();
			// create a token
			TokType this_tok = get_token_by_name(token_name);
			new_token->set_token(this_tok);
			string contents = this->contents();
			contents = to_lower(contents);
			new_token->set_lexeme(contents);
			// clear buffer
			this->clear_buffer();
			// return new token
			this->consume(new_token);
			return new_token;
		} else {
			this->reset_all_auto();
			// cache the current token under the fp
			this->cache();
			// move the file pointer forward 1
			this->forward();
			// run the entire buffer against the automata
			// individually
		}
		moves++;
	}
	// error condition
	// unwind the file pointer back to the first item
	for (unsigned int i = moves; i > 0; i--) {
		this->shave_all();
	}
	if (*this->scan_buf->begin() == '\'') {
		new_token->set_token(MP_RUN_STRING);
		new_token->set_error("There is a run on string here");
		this->goto_next('\n');
	} else if (*this->scan_buf->begin() == '{') {
		new_token->set_token(MP_RUN_COMMENT);
		new_token->set_error("There is a run on comment here");
		this->goto_next('\n');
	} else {
		new_token->set_token(MP_ERROR);
		new_token->set_error("There is an unreconized token here");
	}
	// create the error token
	string contents = this->contents();
	contents = to_lower(contents);
	new_token->set_lexeme(contents);
	// hidden error message here? (might be good)
	report_error_lc("Scan Error", new_token->get_error() + contents,
					start_line, start_column);
	// clear buffer
	this->clear_buffer();
	// return error token
	this->consume(new_token);
	return new_token;
}