コード例 #1
0
ファイル: pParser.cpp プロジェクト: weyrick/corvus
void parseSourceFile(pSourceModule* pMod, bool debug=false) {

    boost::object_pool<pSourceRef> tokenPool;
    lexer::pLexer lexer(pMod->source());

    void* pParser = corvusParseAlloc(malloc);

#ifndef NDEBUG
    // DEBUG
    if (debug)
        corvusParseTrace(stderr, (char*)"trace: ");
#endif

    // start at begining of source file
    AST::pParseContext& context = pMod->context();
    context.incLineNum(); // line 1
    context.setLastToken(tokenPool.construct(pSourceRef(lexer.sourceBegin(), 0)));
    context.setLastNewline(lexer.sourceBegin());

    pSourceRef* curRange;
    pSourceCharIterator lastNL;

    bool inlineHtml = false;
    std::string HEREDOC_ID;

    pSourceCharIterator sourceEnd(lexer.sourceEnd());
    lexer::rmatch match(lexer.sourceBegin(), lexer.sourceEnd());

    do {

        corvus_nextLangToken(match);

        //std::cout << "match: [" << match.str() << "]\n";

        // always make a range unless the scanner didn't match, in which case
        // we handle separately below
        if (match.id != match.npos()) {
            curRange = tokenPool.construct(pSourceRef(match.start, match.end-match.start));
            context.setTokenLine(curRange);
        }

        switch (match.id) {
            case 0:
            {
                // end of input (success)
                break;
            }
            case T_CLOSE_TAG:
            {
                // we swallow one newline if it follows
                //if ((match.end != sourceEnd) && (*match.end == '\n'))
                //    ++tokEnd;
                break;
            }
            case T_OPEN_TAG:
            {
                // state change (no parse), but count newlines from OPEN tag
                countNewlines(context, match, lastNL);
                break;
            }
            case ~0: // npos
            {
                // if state is HTML, collect characters for INLINE HTML token
                if (match.state == 0) {
                    // we go until a single < is found, or end of input
                    // this potentially breaks up inline htmls
                    // at tags that don't turn out to be php open tags,
                    // but that way we let the lexer handle the matching
                    // and limit the special handler code here                    
                    while ((*match.end != '<') && (match.end != sourceEnd)) {
                        match.end++;
                    }
                    inlineHtml = true;
                    curRange = tokenPool.construct(pSourceRef(match.start, match.end-match.start));
                    context.setTokenLine(curRange);
                    countNewlines(context, match, lastNL);
                }
                // if state is HEREDOC, collect heredoc string, looking for heredoc id
                else if (match.state == 3) {
                    // assert we have a heredoc ID
                    assert(HEREDOC_ID.length() && "no heredoc id");
                    std::pair<pSourceCharIterator,pSourceCharIterator> idr = find_heredoc_id(HEREDOC_ID, lexer, match, pMod);
                    countNewlines(context, match, lastNL);
                    curRange = tokenPool.construct(pSourceRef(match.start, match.end-match.start));
                    context.setTokenLine(curRange);
                    corvusParse(pParser, T_HEREDOC_STRING, curRange, pMod);
                    match.start = idr.first;
                    match.end = idr.second;
                    curRange = tokenPool.construct(pSourceRef(match.start, match.end-match.start));
                    context.setTokenLine(curRange);
                    corvusParse(pParser, T_HEREDOC_END, curRange, pMod);
                    match.state = 1;
                    HEREDOC_ID.clear();
                }
                else {
                    // unmatched token: error
                    pMod->context().parseError(curRange, pSourceRange());
                }
                break;
            }
            case T_HEREDOC_START:
            {
                // save the heredoc id so we can match the end
                pSourceCharIterator ms = match.start;
                while (*ms == '<' ||
                       *ms == ' ' ||
                       *ms == '\t' ||
                       *ms == '\'' ||
                       *ms == '"'
                       )
                    ms++;
                if (*(match.end-2) == '"' || *(match.end-2) == '\'')
                    HEREDOC_ID.assign(ms, match.end-2);
                else
                    HEREDOC_ID.assign(ms, match.end-1);
                countNewlines(context, match, lastNL);
                corvusParse(pParser, T_HEREDOC_START, curRange, pMod);
                break;
            }
            case T_WHITESPACE:
            case T_INLINE_HTML:
            case T_DOC_COMMENT:
            case T_MULTILINE_COMMENT:
            case T_SINGLELINE_COMMENT:
            {
                // handle newlines
                countNewlines(context, match, lastNL);
                break;
            }
            default:
            {
                // parse
                corvusParse(pParser, match.id, curRange, pMod);
                break;
            }
        }

        // next token
        context.setLastToken(curRange);

    }
    while (match.id != 0);

    // finish parse
    corvusParse(pParser, 0, 0, pMod); // note, this may generate a parse error still
    context.finishParse(); // so don't finish until here
    corvusParseFree(pParser, free);

}
コード例 #2
0
ファイル: pLexer.cpp プロジェクト: carriercomm/corvus
void pLexer::dumpTokens(void) {

    std::string tokID;
    std::stringstream val;
    std::string HEREDOC_ID;

    rmatch match(sourceBegin_, sourceEnd_);

    do {

        corvus_nextLangToken(match);
        if (match.id == 0) {
            // end of input
            break;
        }
        else if (match.id == match.npos()) {
            // if state is HTML, collect characters for INLINE HTML token
            if (match.state == 0) {
                while ((*match.end != '<') && (match.end != sourceEnd_)) {
                    match.end++;
                }
                std::cout << match.str() << " " << getTokenDescription(T_INLINE_HTML) << std::endl;
            }
            // if state is HEREDOC, collect heredoc string, looking for heredoc id
            else if (match.state == 3) {
                // assert we have a heredoc ID
                assert(HEREDOC_ID.length() && "no heredoc id");
                match.end--; // we need to reverse this once to check for the
                             // case of a heredoc with no body, only a newline
                look_for_id:
                if (sourceEnd_ - match.end < HEREDOC_ID.length()) {
                    // the remaining source text is shorter than the heredocid length,
                    // which means we're never going to match it
                    std::cout << "dangling HEREDOC looking for: \"" << HEREDOC_ID << "\"" << std::endl;
                    break;
                }
                pSourceCharIterator ms = match.end;
                pSourceCharIterator me = match.end+HEREDOC_ID.length();
                std::string maybeID(ms, me);
                if (maybeID != HEREDOC_ID) {
                    while ((*match.end != '\n') && (match.end != sourceEnd_)) {
                        match.end++;
                    }
                    match.end++; // skip newline
                    goto look_for_id;
                }
                // if we get here, we matched the heredoc id
                std::cout << match.str() << " " << getTokenDescription(T_DQ_STRING) << std::endl;
                match.start = ms;
                match.end = me;
                std::cout << match.str() << " " << getTokenDescription(T_HEREDOC_END) << std::endl;
                match.state = 1;
                HEREDOC_ID.clear();
            }
            else {
                // unmatched character in PHP state
                std::cout << "breaking on unmatched: " << match.str() << std::endl;
                break;
            }
        }
        else {
            // matched
            // skip plain newlines in html state
            val.str("");
            if (match.id == T_HEREDOC_START) {
                // save the heredoc id so we can match the end
                pSourceCharIterator ms = match.start;
                while (*ms == '<' ||
                       *ms == ' ' ||
                       *ms == '\t' ||
                       *ms == '\'' ||
                       *ms == '"'
                       )
                    ms++;
                if (*(match.end-2) == '"' || *(match.end-2) == '\'')
                    HEREDOC_ID.assign(ms, match.end-2); // cut end quote and newline
                else
                    HEREDOC_ID.assign(ms, match.end-1); // just cut newline
                std::cout << std::string(match.start, match.end-1) << " T_HEREDOC_START" << std::endl;
                continue;
            }
            if (match.id != T_WHITESPACE)
                val << match.str();
            if ((match.state == 0) && (val.str() == "\n"))
                continue;
            tokID = getTokenDescription(match.id);
            if (tokID.size() == 0)
                tokID = val.str();
            std::cout << val.str() << " " << tokID << std::endl;
        }

    }
    while (match.id != 0);


}