Token *Scanner::scanSingleLineComment(LexContext *ctx) { Token *ret = NULL; ScriptManager *smgr = ctx->smgr; TokenManager *tmgr = ctx->tmgr; if (ctx->existsBuffer()) tmgr->add(scanPrevSymbol(ctx, '#')); Token *prev_tk = ctx->tmgr->lastToken(); TokenType::Type prev_type = (prev_tk) ? prev_tk->info.type : TokenType::Undefined; if (isRegexStarted || prev_type == TokenType::RegExp || prev_type == TokenType::RegReplaceTo) { ctx->writeBuffer('#'); ret = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo); ret->info = tmgr->getTokenInfo(TokenType::RegDelim); ctx->clearBuffer(); } else { if (verbose) { for (; smgr->currentChar() != '\n' && !smgr->end(); smgr->next()) { ctx->writeBuffer(smgr->currentChar()); } Token *tk = tmgr->new_Token(ctx->buffer(), ctx->finfo); tk->info = tmgr->getTokenInfo(TokenType::Comment); ctx->clearBuffer(); tmgr->add(tk); } else { for (; smgr->currentChar() != '\n' && !smgr->end(); smgr->next()) {} } tmgr->add(scanWhiteSpace(ctx)); ctx->finfo.start_line_num++; } return ret; }
Token *Scanner::scanWhiteSpace(LexContext *ctx) { TokenManager *tmgr = ctx->tmgr; Token *prev_tk = tmgr->lastToken(); TokenType::Type prev_type = (prev_tk) ? prev_tk->info.type : TokenType::Undefined; if (prev_type == TokenType::Comment || prev_type == TokenType::Pod) { // Add WhiteSpace token (data: '\n') for Comment or Pod token // Because the newline character is not on the trailing of those tokens ctx->writeBuffer('\n'); ctx->finfo.start_line_num = prev_tk->finfo.start_line_num; } else { bool does_ws_continue = false; ScriptManager *smgr = ctx->smgr; for (; !smgr->end(); smgr->next()) { char ch = smgr->currentChar(); if (ch == ' ' || ch == '\t') { // For normal whitespace. // It collects into one token when a whitespace continues. ctx->writeBuffer(ch); does_ws_continue = true; continue; } else if (!does_ws_continue && ch == '\n') { // For newline character. // It should be on the same line to before token. ctx->writeBuffer(ch); if (verbose && prev_type != TokenType::HereDocumentEnd) { ctx->finfo.start_line_num = prev_tk->finfo.start_line_num; } break; } smgr->back(); break; } } if (!verbose) { ctx->clearBuffer(); return NULL; } Token *token = tmgr->new_Token(ctx->buffer(), ctx->finfo); token->info = tmgr->getTokenInfo(TokenType::WhiteSpace); ctx->clearBuffer(); return token; }
Token *Scanner::scanQuote(LexContext *ctx, char quote) { TokenManager *tmgr = ctx->tmgr; ScriptManager *smgr = ctx->smgr; for (smgr->next(); !smgr->end(); smgr->next()) { char ch = smgr->currentChar(); if (ch == '\n') { ctx->writeBuffer(ch); ctx->finfo.start_line_num++; continue; } else if (ch == quote) { char prev_ch = smgr->previousChar(); char before_prev_ch = smgr->beforePreviousChar(); if ((prev_ch == '\\' && before_prev_ch == '\\') || prev_ch != '\\') break; ctx->writeBuffer(ch); } else { ctx->writeBuffer(ch); } } Token *prev_tk = ctx->tmgr->lastToken(); int idx = ctx->tmgr->size() - 2; string prev_data = (prev_tk) ? string(prev_tk->_data) : ""; string before_prev_data = (idx >= 0) ? string(ctx->tmgr->beforeLastToken()->_data) : ""; char *token = ctx->buffer(); Token *ret = ctx->tmgr->new_Token(token, ctx->finfo); switch (quote) { case '\'': ret->info = tmgr->getTokenInfo(TokenType::RawString); break; case '"': ret->info = tmgr->getTokenInfo(TokenType::String); break; case '`': ret->info = tmgr->getTokenInfo(TokenType::ExecString); break; default: break; } ctx->clearBuffer(); if (prev_data == "<<" || (before_prev_data == "<<" && prev_data == "\\")) { /* String is HereDocument */ here_document_tag = string(ret->_data); here_document_tag_tk = ret; if (here_document_tag == "") { here_document_tag = "\n"; here_document_tag_tk->_data = "\n"; } switch (quote) { case '\'': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentRawTag); break; case '"': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentTag); break; case '`': ret->info = tmgr->getTokenInfo(TokenType::HereDocumentExecTag); break; default: break; } } return ret; }