bool Scanner::isSkip(LexContext *ctx) { using namespace TokenType; bool ret = commentFlag; ScriptManager *smgr = ctx->smgr; TokenManager *tmgr = ctx->tmgr; char *script = smgr->raw_script; size_t idx = smgr->idx; char prev_ch = smgr->previousChar(); char cur_ch = smgr->currentChar(); if (prev_ch == '\n' && cur_ch == '=' && isalnum(smgr->nextChar())) { if (smgr->compare(1, 3, "cut")) { DBG_PL("commentFlag => OFF"); ctx->progress = 4; commentFlag = false; ret = false; if (verbose) { ctx->finfo.start_line_num++; ctx->writeBuffer("=cut"); Token *tk = tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::Pod); ctx->clearBuffer(); tmgr->add(tk); tmgr->add(scanWhiteSpace(ctx)); } ctx->finfo.start_line_num++; } else { DBG_PL("commentFlag => ON"); commentFlag = true; ret = true; } } if (commentFlag) { if (verbose) ctx->writeBuffer(cur_ch); return ret; } if (prev_ch == '\n' && cur_ch == '_' && !hereDocumentFlag && smgr->compare(0, 7, "__END__")) { int progress_to_end = ctx->script_size - idx - 1; ctx->progress = progress_to_end; ret = false; } else if (prev_ch == '\n' && cur_ch == '_' && !hereDocumentFlag && smgr->compare(0, 8, "__DATA__")) { int progress_to_end = ctx->script_size - idx - 1; ctx->progress = progress_to_end; ret = false; } if (!skipFlag) return ret; if (isFormatStarted) { if (prev_ch == '\n' && cur_ch == '.') { Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(Format); ctx->clearBuffer(); tmgr->add(tk); tk = ctx->tmgr->new_Token((char *)".", ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::FormatEnd); tmgr->add(tk); ctx->progress = 1; isFormatStarted = false; skipFlag = false; ret = false; } else { ctx->writeBuffer(script[idx]); ret = true; } } else if (isRegexStarted) { char before_prev_ch = smgr->beforePreviousChar(); if (prev_ch != '\\' || (prev_ch == '\\' && before_prev_ch == '\\')) { switch (cur_ch) { case '{': brace_count_inner_regex++; break; case '}': brace_count_inner_regex--; break; case '[': bracket_count_inner_regex++; break; case ']': bracket_count_inner_regex--; break; case '(': cury_brace_count_inner_regex++; break; case ')': cury_brace_count_inner_regex--; break; default: break; } } if (prev_ch == '\\' && before_prev_ch != '\\') { ctx->writeBuffer(cur_ch); ret = true; } else if (cur_ch != regex_delim && cur_ch != regex_middle_delim) { ctx->writeBuffer(cur_ch); ret = true; } else if (cur_ch == regex_middle_delim) { if ((regex_middle_delim == '}' && brace_count_inner_regex != 0) || (regex_middle_delim == ')' && cury_brace_count_inner_regex != 0) || (regex_middle_delim == ']' && bracket_count_inner_regex != 0)) { ctx->writeBuffer(cur_ch); ret = true; } else { Token *tk = NULL; if (regex_middle_delim != '{' && regex_middle_delim != '(' && regex_middle_delim != '<' && regex_middle_delim != '[') { tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(RegReplaceFrom); ctx->clearBuffer(); tmgr->add(tk); } ctx->writeBuffer(regex_middle_delim); tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(RegMiddleDelim); ctx->clearBuffer(); tmgr->add(tk); switch (regex_middle_delim) { case '}': regex_middle_delim = '{'; break; case ')': regex_middle_delim = '('; break; case '>': regex_middle_delim = '<'; break; case ']': regex_middle_delim = '['; break; default: regex_middle_delim = '\0'; break; } ret = true; } } else { if ((regex_delim == '}' && brace_count_inner_regex != 0) || (regex_delim == ')' && cury_brace_count_inner_regex != 0) || (regex_delim == ']' && bracket_count_inner_regex != 0)) { ctx->writeBuffer(cur_ch); ret = true; } else { Token *prev_tk = ctx->tmgr->lastToken(); Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = (prev_tk->info.type == RegMiddleDelim) ? tmgr->getTokenInfo(RegReplaceTo) : tmgr->getTokenInfo(RegExp); ctx->clearBuffer(); tmgr->add(tk); ret = false; isRegexStarted = false; skipFlag = false; regex_delim = 0; brace_count_inner_regex = 0; cury_brace_count_inner_regex = 0; bracket_count_inner_regex = 0; } } } else if (isPrototypeStarted) { if (script[idx] == ')') { Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(Prototype); ctx->clearBuffer(); tmgr->add(tk); isPrototypeStarted = false; skipFlag = false; ret = false; } else { ctx->writeBuffer(script[idx]); ret = true; } } else if (hereDocumentFlag) { size_t len = here_document_tag.size(); if (smgr->previousChar() == '\n' && idx + len < ctx->script_size) { size_t i; for (i = 0; i < len && script[idx + i] == here_document_tag.at(i); i++) {} if (i == len) { ctx->progress = len; if (verbose) ctx->finfo.start_line_num++; Token *tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::HereDocument); ctx->clearBuffer(); tmgr->add(tk); tk = ctx->tmgr->new_Token((char *)here_document_tag_tk->_data, ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::HereDocumentEnd); tmgr->add(tk); if (!verbose) ctx->finfo.start_line_num++; here_document_tag = ""; hereDocumentFlag = false; skipFlag = false; ret = false; } else { ctx->writeBuffer(script[idx]); ret = true; } } else { ctx->writeBuffer(script[idx]); ret = true; } } return ret; }
Token *Scanner::scanQuote(LexContext *ctx, char quote) { TokenManager *tmgr = ctx->tmgr; ScriptManager *smgr = ctx->smgr; for (smgr->next(); !smgr->end(); smgr->next()) { char ch = smgr->currentChar(); if (ch == '\n') { ctx->writeBuffer(ch); ctx->finfo.start_line_num++; continue; } else if (ch == quote) { char prev_ch = smgr->previousChar(); char before_prev_ch = smgr->beforePreviousChar(); if ((prev_ch == '\\' && before_prev_ch == '\\') || prev_ch != '\\') break; ctx->writeBuffer(ch); } else { ctx->writeBuffer(ch); } } Token *prev_tk = ctx->tmgr->lastToken(); int idx = ctx->tmgr->size() - 2; string prev_data = (prev_tk) ? string(prev_tk->_data) : ""; string before_prev_data = (idx >= 0) ? string(ctx->tmgr->beforeLastToken()->_data) : ""; char *token = ctx->buffer(); Token *ret = ctx->tmgr->new_Token(token, ctx->finfo); switch (quote) { case '\'': ret->info = tmgr->getTokenInfo(TokenType::RawString); break; case '"': ret->info = tmgr->getTokenInfo(TokenType::String); break; case '`': ret->info = tmgr->getTokenInfo(TokenType::ExecString); break; default: break; } ctx->clearBuffer(); if (prev_data == "<<" || (before_prev_data == "<<" && prev_data == "\\")) { /* String is HereDocument */ here_document_tag = string(ret->_data); here_document_tag_tk = ret; if (here_document_tag == "") { here_document_tag = "\n"; here_document_tag_tk->_data = "\n"; } switch (quote) { case '\'': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentRawTag); break; case '"': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentTag); break; case '`': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentExecTag); break; default: break; } } return ret; }