Token *Scanner::scanSingleLineComment(LexContext *ctx) { Token *ret = NULL; ScriptManager *smgr = ctx->smgr; TokenManager *tmgr = ctx->tmgr; if (ctx->existsBuffer()) tmgr->add(scanPrevSymbol(ctx, '#')); Token *prev_tk = ctx->tmgr->lastToken(); TokenType::Type prev_type = (prev_tk) ? prev_tk->info.type : TokenType::Undefined; if (isRegexStarted || prev_type == TokenType::RegExp || prev_type == TokenType::RegReplaceTo) { ctx->writeBuffer('#'); ret = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); ret->info = tmgr->getTokenInfo(TokenType::RegDelim); ctx->clearBuffer(); } else { if (verbose) { for (; smgr->currentChar() != '\n' && !smgr->end(); smgr->next()) { ctx->writeBuffer(smgr->currentChar()); } Token *tk = tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::Comment); ctx->clearBuffer(); tmgr->add(tk); } else { for (; smgr->currentChar() != '\n' && !smgr->end(); smgr->next()) {} } tmgr->add(scanWhiteSpace(ctx)); ctx->finfo.start_line_num++; } return ret; }
Token *Scanner::scanSymbol(LexContext *ctx) { Token *ret = NULL; ScriptManager *smgr = ctx->smgr; char symbol = smgr->currentChar(); char next_ch = smgr->nextChar(); char after_next_ch = smgr->afterNextChar(); if (ctx->existsBuffer()) ctx->tmgr->add(scanPrevSymbol(ctx, symbol)); if (!isRegexStarted) { ret = scanTripleCharacterOperator(ctx, symbol, next_ch, after_next_ch); if (!ret) ret = scanDoubleCharacterOperator(ctx, symbol, next_ch); } if (!ret) ret = scanCurSymbol(ctx, symbol); return ret; }
Token *Scanner::scanWhiteSpace(LexContext *ctx) { TokenManager *tmgr = ctx->tmgr; Token *prev_tk = tmgr->lastToken(); TokenType::Type prev_type = (prev_tk) ? prev_tk->info.type : TokenType::Undefined; if (prev_type == TokenType::Comment || prev_type == TokenType::Pod) { // Add WhiteSpace token (data: '\n') for Comment or Pod token // Because the newline character is not on the trailing of those tokens ctx->writeBuffer('\n'); ctx->finfo.start_line_num = prev_tk->finfo.start_line_num; } else { bool does_ws_continue = false; ScriptManager *smgr = ctx->smgr; for (; !smgr->end(); smgr->next()) { char ch = smgr->currentChar(); if (ch == ' ' || ch == '\t') { // For normal whitespace. // It collects into one token when a whitespace continues. ctx->writeBuffer(ch); does_ws_continue = true; continue; } else if (!does_ws_continue && ch == '\n') { // For newline character. // It should be on the same line to before token. ctx->writeBuffer(ch); if (verbose && prev_type != TokenType::HereDocumentEnd) { ctx->finfo.start_line_num = prev_tk->finfo.start_line_num; } break; } smgr->back(); break; } } if (!verbose) { ctx->clearBuffer(); return NULL; } Token *token = tmgr->new_Token(ctx->buffer(), ctx->finfo); token->info = tmgr->getTokenInfo(TokenType::WhiteSpace); ctx->clearBuffer(); return token; }
bool Scanner::isSkip(LexContext *ctx) { using namespace TokenType; bool ret = commentFlag; ScriptManager *smgr = ctx->smgr; TokenManager *tmgr = ctx->tmgr; char *script = smgr->raw_script; size_t idx = smgr->idx; char prev_ch = smgr->previousChar(); char cur_ch = smgr->currentChar(); if (prev_ch == '\n' && cur_ch == '=' && isalnum(smgr->nextChar())) { if (smgr->compare(1, 3, "cut")) { DBG_PL("commentFlag => OFF"); ctx->progress = 4; commentFlag = false; ret = false; if (verbose) { ctx->finfo.start_line_num++; ctx->writeBuffer("=cut"); Token *tk = tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::Pod); ctx->clearBuffer(); tmgr->add(tk); tmgr->add(scanWhiteSpace(ctx)); } ctx->finfo.start_line_num++; } else { DBG_PL("commentFlag => ON"); commentFlag = true; ret = true; } } if (commentFlag) { if (verbose) ctx->writeBuffer(cur_ch); return ret; } if (prev_ch == '\n' && cur_ch == '_' && !hereDocumentFlag && smgr->compare(0, 7, "__END__")) { int progress_to_end = ctx->script_size - idx - 1; ctx->progress = progress_to_end; ret = false; } else if (prev_ch == '\n' && cur_ch == '_' && !hereDocumentFlag && smgr->compare(0, 8, "__DATA__")) { int progress_to_end = ctx->script_size - idx - 1; ctx->progress = progress_to_end; ret = false; } if (!skipFlag) return ret; if (isFormatStarted) { if (prev_ch == '\n' && cur_ch == '.') { Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(Format); ctx->clearBuffer(); tmgr->add(tk); tk = ctx->tmgr->new_Token((char *)".", ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::FormatEnd); tmgr->add(tk); ctx->progress = 1; isFormatStarted = false; skipFlag = false; ret = false; } else { ctx->writeBuffer(script[idx]); ret = true; } } else if (isRegexStarted) { char before_prev_ch = smgr->beforePreviousChar(); if (prev_ch != '\\' || (prev_ch == '\\' && before_prev_ch == '\\')) { switch (cur_ch) { case '{': brace_count_inner_regex++; break; case '}': brace_count_inner_regex--; break; case '[': bracket_count_inner_regex++; break; case ']': bracket_count_inner_regex--; break; case '(': cury_brace_count_inner_regex++; break; case ')': cury_brace_count_inner_regex--; break; default: break; } } if (prev_ch == '\\' && before_prev_ch != '\\') { ctx->writeBuffer(cur_ch); ret = true; } else if (cur_ch != regex_delim && cur_ch != regex_middle_delim) { ctx->writeBuffer(cur_ch); ret = true; } else if (cur_ch == regex_middle_delim) { if ((regex_middle_delim == '}' && brace_count_inner_regex != 0) || (regex_middle_delim == ')' && cury_brace_count_inner_regex != 0) || (regex_middle_delim == ']' && bracket_count_inner_regex != 0)) { ctx->writeBuffer(cur_ch); ret = true; } else { Token *tk = NULL; if (regex_middle_delim != '{' && regex_middle_delim != '(' && regex_middle_delim != '<' && regex_middle_delim != '[') { tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(RegReplaceFrom); ctx->clearBuffer(); tmgr->add(tk); } ctx->writeBuffer(regex_middle_delim); tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(RegMiddleDelim); ctx->clearBuffer(); tmgr->add(tk); switch (regex_middle_delim) { case '}': regex_middle_delim = '{'; break; case ')': regex_middle_delim = '('; break; case '>': regex_middle_delim = '<'; break; case ']': regex_middle_delim = '['; break; default: regex_middle_delim = '\0'; break; } ret = true; } } else { if ((regex_delim == '}' && brace_count_inner_regex != 0) || (regex_delim == ')' && cury_brace_count_inner_regex != 0) || (regex_delim == ']' && bracket_count_inner_regex != 0)) { ctx->writeBuffer(cur_ch); ret = true; } else { Token *prev_tk = ctx->tmgr->lastToken(); Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = (prev_tk->info.type == RegMiddleDelim) ? tmgr->getTokenInfo(RegReplaceTo) : tmgr->getTokenInfo(RegExp); ctx->clearBuffer(); tmgr->add(tk); ret = false; isRegexStarted = false; skipFlag = false; regex_delim = 0; brace_count_inner_regex = 0; cury_brace_count_inner_regex = 0; bracket_count_inner_regex = 0; } } } else if (isPrototypeStarted) { if (script[idx] == ')') { Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(Prototype); ctx->clearBuffer(); tmgr->add(tk); isPrototypeStarted = false; skipFlag = false; ret = false; } else { ctx->writeBuffer(script[idx]); ret = true; } } else if (hereDocumentFlag) { size_t len = here_document_tag.size(); if (smgr->previousChar() == '\n' && idx + len < ctx->script_size) { size_t i; for (i = 0; i < len && script[idx + i] == here_document_tag.at(i); i++) {} if (i == len) { ctx->progress = len; if (verbose) ctx->finfo.start_line_num++; Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::HereDocument); ctx->clearBuffer(); tmgr->add(tk); tk = ctx->tmgr->new_Token((char *)here_document_tag_tk->_data, ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::HereDocumentEnd); tmgr->add(tk); if (!verbose) ctx->finfo.start_line_num++; here_document_tag = ""; hereDocumentFlag = false; skipFlag = false; ret = false; } else { ctx->writeBuffer(script[idx]); ret = true; } } else { ctx->writeBuffer(script[idx]); ret = true; } } return ret; }
Token *Scanner::scanQuote(LexContext *ctx, char quote) { TokenManager *tmgr = ctx->tmgr; ScriptManager *smgr = ctx->smgr; for (smgr->next(); !smgr->end(); smgr->next()) { char ch = smgr->currentChar(); if (ch == '\n') { ctx->writeBuffer(ch); ctx->finfo.start_line_num++; continue; } else if (ch == quote) { char prev_ch = smgr->previousChar(); char before_prev_ch = smgr->beforePreviousChar(); if ((prev_ch == '\\' && before_prev_ch == '\\') || prev_ch != '\\') break; ctx->writeBuffer(ch); } else { ctx->writeBuffer(ch); } } Token *prev_tk = ctx->tmgr->lastToken(); int idx = ctx->tmgr->size() - 2; string prev_data = (prev_tk) ? string(prev_tk->_data) : ""; string before_prev_data = (idx >= 0) ? string(ctx->tmgr->beforeLastToken()->_data) : ""; char *token = ctx->buffer(); Token *ret = ctx->tmgr->new_Token(token, ctx->finfo); switch (quote) { case '\'': ret->info = tmgr->getTokenInfo(TokenType::RawString); break; case '"': ret->info = tmgr->getTokenInfo(TokenType::String); break; case '`': ret->info = tmgr->getTokenInfo(TokenType::ExecString); break; default: break; } ctx->clearBuffer(); if (prev_data == "<<" || (before_prev_data == "<<" && prev_data == "\\")) { /* String is HereDocument */ here_document_tag = string(ret->_data); here_document_tag_tk = ret; if (here_document_tag == "") { here_document_tag = "\n"; here_document_tag_tk->_data = "\n"; } switch (quote) { case '\'': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentRawTag); break; case '"': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentTag); break; case '`': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentExecTag); break; default: break; } } return ret; }