Пример #1
0
Token *Scanner::scanSingleLineComment(LexContext *ctx)
{
	Token *ret = NULL;
	ScriptManager *smgr = ctx->smgr;
	TokenManager *tmgr = ctx->tmgr;
	if (ctx->existsBuffer()) tmgr->add(scanPrevSymbol(ctx, '#'));
	Token *prev_tk = ctx->tmgr->lastToken();
	TokenType::Type prev_type = (prev_tk) ?  prev_tk->info.type : TokenType::Undefined;
	if (isRegexStarted || prev_type == TokenType::RegExp || prev_type ==  TokenType::RegReplaceTo) {
		ctx->writeBuffer('#');
		ret = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
		ret->info = tmgr->getTokenInfo(TokenType::RegDelim);
		ctx->clearBuffer();
	} else {
		if (verbose) {
			for (; smgr->currentChar() != '\n' && !smgr->end(); smgr->next()) {
				ctx->writeBuffer(smgr->currentChar());
			}
			Token *tk = tmgr->new_Token(ctx->buffer(), ctx->finfo);
			tk->info = tmgr->getTokenInfo(TokenType::Comment);
			ctx->clearBuffer();
			tmgr->add(tk);
		} else {
			for (; smgr->currentChar() != '\n' && !smgr->end(); smgr->next()) {}
		}
		tmgr->add(scanWhiteSpace(ctx));
		ctx->finfo.start_line_num++;
	}
	return ret;
}
Пример #2
0
Token *Scanner::scanSymbol(LexContext *ctx)
{
	Token *ret = NULL;
	ScriptManager *smgr = ctx->smgr;
	char symbol = smgr->currentChar();
	char next_ch = smgr->nextChar();
	char after_next_ch = smgr->afterNextChar();
	if (ctx->existsBuffer()) ctx->tmgr->add(scanPrevSymbol(ctx, symbol));
	if (!isRegexStarted) {
		ret = scanTripleCharacterOperator(ctx, symbol, next_ch, after_next_ch);
		if (!ret) ret = scanDoubleCharacterOperator(ctx, symbol, next_ch);
	}
	if (!ret) ret = scanCurSymbol(ctx, symbol);
	return ret;
}
Пример #3
0
Token *Scanner::scanWhiteSpace(LexContext *ctx)
{
	TokenManager *tmgr = ctx->tmgr;
	Token *prev_tk = tmgr->lastToken();
	TokenType::Type prev_type = (prev_tk) ? prev_tk->info.type : TokenType::Undefined;

	if (prev_type == TokenType::Comment || prev_type == TokenType::Pod) {
		// Add WhiteSpace token (data: '\n') for Comment or Pod token
		// Because the newline character is not on the trailing of those tokens
		ctx->writeBuffer('\n');
		ctx->finfo.start_line_num = prev_tk->finfo.start_line_num;
	} else {
		bool does_ws_continue = false;
		ScriptManager *smgr = ctx->smgr;
		for (; !smgr->end(); smgr->next()) {
			char ch = smgr->currentChar();
			if (ch == ' ' || ch == '\t') {
				// For normal whitespace.
				// It collects into one token when a whitespace continues.
				ctx->writeBuffer(ch);
				does_ws_continue = true;
				continue;
			} else if (!does_ws_continue && ch == '\n') {
				// For newline character.
				// It should be on the same line to before token.
				ctx->writeBuffer(ch);
				if (verbose && prev_type != TokenType::HereDocumentEnd) {
					ctx->finfo.start_line_num = prev_tk->finfo.start_line_num;
				}
				break;
			}
			smgr->back();
			break;
		}
	}

	if (!verbose) {
		ctx->clearBuffer();
		return NULL;
	}

	Token *token = tmgr->new_Token(ctx->buffer(), ctx->finfo);
	token->info = tmgr->getTokenInfo(TokenType::WhiteSpace);
	ctx->clearBuffer();
	return token;
}
Пример #4
0
bool Scanner::isSkip(LexContext *ctx)
{
	using namespace TokenType;
	bool ret = commentFlag;

	ScriptManager *smgr = ctx->smgr;
	TokenManager *tmgr = ctx->tmgr;
	char *script = smgr->raw_script;
	size_t idx = smgr->idx;
	char prev_ch = smgr->previousChar();
	char cur_ch = smgr->currentChar();

	if (prev_ch == '\n' && cur_ch == '=' &&
		isalnum(smgr->nextChar())) {
		if (smgr->compare(1, 3, "cut")) {
			DBG_PL("commentFlag => OFF");
			ctx->progress = 4;
			commentFlag = false;
			ret = false;
			if (verbose) {
				ctx->finfo.start_line_num++;
				ctx->writeBuffer("=cut");
				Token *tk = tmgr->new_Token(ctx->buffer(), ctx->finfo);
				tk->info = tmgr->getTokenInfo(TokenType::Pod);
				ctx->clearBuffer();
				tmgr->add(tk);
				tmgr->add(scanWhiteSpace(ctx));
			}
			ctx->finfo.start_line_num++;
		} else {
			DBG_PL("commentFlag => ON");
			commentFlag = true;
			ret = true;
		}
	}
	if (commentFlag) {
		if (verbose) ctx->writeBuffer(cur_ch);
		return ret;
	}
	if (prev_ch == '\n' && cur_ch == '_' && !hereDocumentFlag &&
			   smgr->compare(0, 7, "__END__")) {
		int progress_to_end = ctx->script_size - idx - 1;
		ctx->progress = progress_to_end;
		ret = false;
	} else if (prev_ch == '\n' && cur_ch == '_' && !hereDocumentFlag &&
			   smgr->compare(0, 8, "__DATA__")) {
		int progress_to_end = ctx->script_size - idx - 1;
		ctx->progress = progress_to_end;
		ret = false;
	}
	if (!skipFlag) return ret;

	if (isFormatStarted) {
		if (prev_ch == '\n' && cur_ch == '.') {
			Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
			tk->info = tmgr->getTokenInfo(Format);
			ctx->clearBuffer();
			tmgr->add(tk);

			tk = ctx->tmgr->new_Token((char *)".", ctx->finfo);
			tk->info = tmgr->getTokenInfo(TokenType::FormatEnd);
			tmgr->add(tk);

			ctx->progress = 1;
			isFormatStarted = false;
			skipFlag = false;
			ret = false;
		} else {
			ctx->writeBuffer(script[idx]);
			ret = true;
		}
    } else if (isRegexStarted) {
		char before_prev_ch = smgr->beforePreviousChar();
		if (prev_ch != '\\' || (prev_ch == '\\' && before_prev_ch == '\\')) {
			switch (cur_ch) {
			case '{': brace_count_inner_regex++;
				break;
			case '}': brace_count_inner_regex--;
				break;
			case '[': bracket_count_inner_regex++;
				break;
			case ']': bracket_count_inner_regex--;
				break;
			case '(': cury_brace_count_inner_regex++;
				break;
			case ')': cury_brace_count_inner_regex--;
				break;
			default:
				break;
			}
		}
		if (prev_ch == '\\' && before_prev_ch != '\\') {
			ctx->writeBuffer(cur_ch);
			ret = true;
		} else if (cur_ch != regex_delim && cur_ch != regex_middle_delim) {
			ctx->writeBuffer(cur_ch);
			ret = true;
		} else if (cur_ch == regex_middle_delim) {
			if ((regex_middle_delim == '}' && brace_count_inner_regex != 0) ||
				(regex_middle_delim == ')' && cury_brace_count_inner_regex != 0) ||
				(regex_middle_delim == ']' && bracket_count_inner_regex != 0)) {
				ctx->writeBuffer(cur_ch);
				ret = true;
			} else {
				Token *tk = NULL;
				if (regex_middle_delim != '{' &&
					regex_middle_delim != '(' &&
					regex_middle_delim != '<' &&
					regex_middle_delim != '[') {
					tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
					tk->info = tmgr->getTokenInfo(RegReplaceFrom);
					ctx->clearBuffer();
					tmgr->add(tk);
				}
				ctx->writeBuffer(regex_middle_delim);
				tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
				tk->info = tmgr->getTokenInfo(RegMiddleDelim);
				ctx->clearBuffer();
				tmgr->add(tk);

				switch (regex_middle_delim) {
				case '}':
					regex_middle_delim = '{';
					break;
				case ')':
					regex_middle_delim = '(';
					break;
				case '>':
					regex_middle_delim = '<';
					break;
				case ']':
					regex_middle_delim = '[';
					break;
				default:
					regex_middle_delim = '\0';
					break;
				}
				ret = true;
			}
		} else {
			if ((regex_delim == '}' && brace_count_inner_regex != 0) ||
				(regex_delim == ')' && cury_brace_count_inner_regex != 0) ||
				(regex_delim == ']' && bracket_count_inner_regex != 0)) {
				ctx->writeBuffer(cur_ch);
				ret = true;
			} else {
				Token *prev_tk = ctx->tmgr->lastToken();
				Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
				tk->info = (prev_tk->info.type == RegMiddleDelim) ? tmgr->getTokenInfo(RegReplaceTo) : tmgr->getTokenInfo(RegExp);
				ctx->clearBuffer();
				tmgr->add(tk);

				ret = false;
				isRegexStarted = false;
				skipFlag = false;
				regex_delim = 0;
				brace_count_inner_regex = 0;
				cury_brace_count_inner_regex = 0;
				bracket_count_inner_regex = 0;
			}
		}
	} else if (isPrototypeStarted) {
		if (script[idx] == ')') {
			Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
			tk->info = tmgr->getTokenInfo(Prototype);
			ctx->clearBuffer();
			tmgr->add(tk);

			isPrototypeStarted = false;
			skipFlag = false;
			ret = false;
		} else {
			ctx->writeBuffer(script[idx]);
			ret = true;
		}
	} else if (hereDocumentFlag) {
		size_t len = here_document_tag.size();
		if (smgr->previousChar() == '\n' && idx + len < ctx->script_size) {
			size_t i;
			for (i = 0; i < len && script[idx + i] == here_document_tag.at(i); i++) {}
			if (i == len) {
				ctx->progress = len;
				if (verbose) ctx->finfo.start_line_num++;
				Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
				tk->info = tmgr->getTokenInfo(TokenType::HereDocument);
				ctx->clearBuffer();
				tmgr->add(tk);

				tk = ctx->tmgr->new_Token((char *)here_document_tag_tk->_data, ctx->finfo);
				tk->info = tmgr->getTokenInfo(TokenType::HereDocumentEnd);
				tmgr->add(tk);
				if (!verbose) ctx->finfo.start_line_num++;
				here_document_tag = "";
				hereDocumentFlag = false;
				skipFlag = false;
				ret = false;
			} else {
				ctx->writeBuffer(script[idx]);
				ret = true;
			}
		} else {
			ctx->writeBuffer(script[idx]);
			ret = true;
		}
	}
	return ret;
}
Пример #5
0
Token *Scanner::scanQuote(LexContext *ctx, char quote)
{
	TokenManager *tmgr = ctx->tmgr;
	ScriptManager *smgr = ctx->smgr;
	for (smgr->next(); !smgr->end(); smgr->next()) {
		char ch = smgr->currentChar();
		if (ch == '\n') {
			ctx->writeBuffer(ch);
			ctx->finfo.start_line_num++;
			continue;
		} else if (ch == quote) {
			char prev_ch = smgr->previousChar();
			char before_prev_ch = smgr->beforePreviousChar();
			if ((prev_ch == '\\' && before_prev_ch == '\\') || prev_ch != '\\') break;
			ctx->writeBuffer(ch);
		} else {
			ctx->writeBuffer(ch);
		}
	}
	Token *prev_tk = ctx->tmgr->lastToken();
	int idx = ctx->tmgr->size() - 2;
	string prev_data = (prev_tk) ? string(prev_tk->_data) : "";
	string before_prev_data = (idx >= 0) ? string(ctx->tmgr->beforeLastToken()->_data) : "";

	char *token = ctx->buffer();
	Token *ret = ctx->tmgr->new_Token(token, ctx->finfo);
	switch (quote) {
	case '\'':
		ret->info = tmgr->getTokenInfo(TokenType::RawString);
		break;
	case '"':
		ret->info = tmgr->getTokenInfo(TokenType::String);
		break;
	case '`':
		ret->info = tmgr->getTokenInfo(TokenType::ExecString);
		break;
	default:
		break;
	}
	ctx->clearBuffer();

	if (prev_data == "<<" || (before_prev_data == "<<" && prev_data == "\\")) {
		/* String is HereDocument */
		here_document_tag = string(ret->_data);
		here_document_tag_tk = ret;
		if (here_document_tag == "") {
			here_document_tag = "\n";
			here_document_tag_tk->_data = "\n";
		}
		switch (quote) {
		case '\'':
			ret->info = tmgr->getTokenInfo(TokenType::HereDocumentRawTag);
			break;
		case '"':
			ret->info = tmgr->getTokenInfo(TokenType::HereDocumentTag);
			break;
		case '`':
			ret->info = tmgr->getTokenInfo(TokenType::HereDocumentExecTag);
			break;
		default:
			break;
		}
	}
	return ret;
}