// find the heredoc id, or else parse error looking for it // returns pointers to the matching end token (HEREDOC_ID) std::pair<pSourceCharIterator,pSourceCharIterator> find_heredoc_id(const std::string& HEREDOC_ID, const lexer::pLexer& lexer, lexer::rmatch& match, pSourceModule* pMod) { AST::pParseContext& context = pMod->context(); match.end--; look_for_id: if (lexer.sourceEnd() - match.end < HEREDOC_ID.length()) { // the remaining source text is shorter than the heredocid length, // which means we're never going to match it context.parseError("dangling HEREDOC", pSourceRange()); } pSourceCharIterator ms = match.end; pSourceCharIterator me = match.end+HEREDOC_ID.length(); std::string maybeID(ms, me); if (maybeID != HEREDOC_ID) { while ((*match.end != '\n') && (match.end != lexer.sourceEnd())) { match.end++; } match.end++; // skip newline goto look_for_id; } /* look_for_id: while ((*match.end != '\n') && (match.end != lexer.sourceEnd())) { match.end++; } if (lexer.sourceEnd() - match.end < HEREDOC_ID.length()) { // the remaining source text is shorter than the heredocid length, // which means we're never going to match it context.parseError("dangling HEREDOC"); // XXX does not reach this, parseError throws } match.end++; // skip newline pSourceCharIterator ms = match.end; pSourceCharIterator me = match.end+HEREDOC_ID.length(); std::string maybeID(ms, me); if (maybeID != HEREDOC_ID) goto look_for_id;*/ return std::pair<pSourceCharIterator, pSourceCharIterator>(ms, me); }
void pLexer::dumpTokens(void) { std::string tokID; std::stringstream val; std::string HEREDOC_ID; rmatch match(sourceBegin_, sourceEnd_); do { corvus_nextLangToken(match); if (match.id == 0) { // end of input break; } else if (match.id == match.npos()) { // if state is HTML, collect characters for INLINE HTML token if (match.state == 0) { while ((*match.end != '<') && (match.end != sourceEnd_)) { match.end++; } std::cout << match.str() << " " << getTokenDescription(T_INLINE_HTML) << std::endl; } // if state is HEREDOC, collect heredoc string, looking for heredoc id else if (match.state == 3) { // assert we have a heredoc ID assert(HEREDOC_ID.length() && "no heredoc id"); match.end--; // we need to reverse this once to check for the // case of a heredoc with no body, only a newline look_for_id: if (sourceEnd_ - match.end < HEREDOC_ID.length()) { // the remaining source text is shorter than the heredocid length, // which means we're never going to match it std::cout << "dangling HEREDOC looking for: \"" << HEREDOC_ID << "\"" << std::endl; break; } pSourceCharIterator ms = match.end; pSourceCharIterator me = match.end+HEREDOC_ID.length(); std::string maybeID(ms, me); if (maybeID != HEREDOC_ID) { while ((*match.end != '\n') && (match.end != sourceEnd_)) { match.end++; } match.end++; // skip newline goto look_for_id; } // if we get here, we matched the heredoc id std::cout << match.str() << " " << getTokenDescription(T_DQ_STRING) << std::endl; match.start = ms; match.end = me; std::cout << match.str() << " " << getTokenDescription(T_HEREDOC_END) << std::endl; match.state = 1; HEREDOC_ID.clear(); } else { // unmatched character in PHP state std::cout << "breaking on unmatched: " << match.str() << std::endl; break; } } else { // matched // skip plain newlines in html state val.str(""); if (match.id == T_HEREDOC_START) { // save the heredoc id so we can match the end pSourceCharIterator ms = match.start; while (*ms == '<' || *ms == ' ' || *ms == '\t' || *ms == '\'' || *ms == '"' ) ms++; if (*(match.end-2) == '"' || *(match.end-2) == '\'') HEREDOC_ID.assign(ms, match.end-2); // cut end quote and newline else HEREDOC_ID.assign(ms, match.end-1); // just cut newline std::cout << std::string(match.start, match.end-1) << " T_HEREDOC_START" << std::endl; continue; } if (match.id != T_WHITESPACE) val << match.str(); if ((match.state == 0) && (val.str() == "\n")) continue; tokID = getTokenDescription(match.id); if (tokID.size() == 0) tokID = val.str(); std::cout << val.str() << " " << tokID << std::endl; } } while (match.id != 0); }