int NewickLex::ReadNodeChildren(string &str, int revstartpos, Node* curNode) { bool ok = true; int pos = revstartpos; //cout<<"POS="<<pos<<endl; while (ok && pos >= 0) { int closepos = str.find_last_of(')', pos); int openpos = str.find_last_of('(', pos); int commapos = str.find_last_of(',', pos); int maxpos = max(max(closepos, openpos), commapos); if (maxpos == string::npos) { ok = false; pos = -1; } else { string lbl = ""; lbl = Util::Trim(str.substr(maxpos + 1, pos - maxpos)); if (maxpos == closepos) { //cout<<"Close="<<maxpos<<" LBL="<<lbl<<endl; Node* newNode = curNode->InsertChild(0); ParseLabel(newNode, lbl); //newNode->SetLabel(lbl); pos = ReadNodeChildren(str, maxpos - 1, newNode); while (str[pos] != ',' && str[pos] != '(') pos--; if (str[pos] == ',') pos--; } else if (maxpos == commapos) { //cout<<"Comma="<<maxpos<<" LBL="<<lbl<<endl; int ptcomma = str.find_first_of(',', maxpos + 1); int ptopen = str.find_first_of('(', maxpos + 1); int ptclose = str.find_first_of(')', maxpos + 1); if ((ptcomma < ptopen && ptcomma < ptclose) || (ptopen == string::npos || ptopen > ptclose)) { Node* newNode = curNode->InsertChild(0); ParseLabel(newNode, lbl); //newNode->SetLabel(lbl); } pos = maxpos - 1; } else if (maxpos == openpos) { //cout<<"Open="<<maxpos<<" LBL="<<lbl<<endl; //EDIT ML AUG 2012 : an opening parenthese creates a node only if followed by a , // THIS WAS DONE DURING A PHASE OF TIREDNESS // IF SOMETHING IS BUGGY, IT'S PROBABLY AROUND HERE //NOTE ML AUG 2013 : it does seem to be holding up, even after extensive use int ptcomma = str.find_first_of(',', maxpos + 1); int ptopen = str.find_first_of('(', maxpos + 1); int ptclose = str.find_first_of(')', maxpos + 1); if (ptcomma != string::npos && (ptcomma < ptclose || ptclose == string::npos) && (ptcomma < ptopen || ptopen == string::npos)) { Node* newNode = curNode->InsertChild(0); ParseLabel(newNode, lbl); //newNode->SetLabel(lbl); } pos = maxpos - 1; ok = false; } } } return pos; }
bool cPasTokenizer::Parse(const char* strLine, bool bSkipWhiteSpaces, bool bSkipComments) { if (!GetTokenHandler()) return false; IncLine(); LogLine(strLine); if (m_bBlockComment) { strLine = AppendBlockComment(strLine, bSkipComments); } else if (GetLine() > 1) { if (!bSkipWhiteSpaces) PushToken(TOKEN_NEWLINE); } if (strLine == NULL) return false; while(char c = *strLine++) { tToken token; switch(c) { case ' ': case '\t': strLine = HandleWhiteSpace(strLine-1, bSkipWhiteSpaces); break; case '{': strLine = HandleBlockComment(strLine, bSkipComments); if (strLine == NULL) { return true; } break; case '+': PushToken(TOKEN_OPERATOR, PAS_OP_ADDITION); break; case '-': PushToken(TOKEN_OPERATOR, PAS_OP_SUBTRACTION); break; case '*': PushToken(TOKEN_OPERATOR, PAS_OP_MULTIPLICATION); break; case '/': switch(*strLine) { case '/': PushToken(TOKEN_LINECOMMENT, strLine+1); return true; default: PushToken(TOKEN_OPERATOR, PAS_OP_DIVISION); break; } break; case '=': PushToken(TOKEN_OPERATOR, PAS_OP_EQUAL); break; case '>': switch(*strLine) { case '=': PushToken(TOKEN_OPERATOR, PAS_OP_BIGGER_OR_EQUAL); strLine++; break; default: PushToken(TOKEN_OPERATOR, PAS_OP_BIGGER); break; }; break; case '<': switch(*strLine) { case '=': PushToken(TOKEN_OPERATOR, PAS_OP_SMALLER_OR_EQUAL); strLine++; break; case '>': PushToken(TOKEN_OPERATOR, PAS_OP_NOT_EQUAL); strLine++; break; default: PushToken(TOKEN_OPERATOR, PAS_OP_SMALLER); break; }; break; case ':': switch(*strLine) { case '=': PushToken(TOKEN_OPERATOR, PAS_OP_ASSIGNMENT); strLine++; break; default: PushToken(TOKEN_OPERATOR, PAS_OP_ISTYPE); break; }; break; case '^': PushToken(TOKEN_OPERATOR, PAS_OP_DEREFERENCE); break; case '@': PushToken(TOKEN_OPERATOR, PAS_OP_ADDRESS); break; case '&': PushToken(TOKEN_OPERATOR, PAS_OP_AMPERSAND); break; case ',': PushToken(TOKEN_OPERATOR, PAS_OP_LIST); break; case ';': PushToken(TOKEN_OPERATOR, PAS_OP_COMMAND_SEPARATOR); break; case '#': strLine = ParseLiteral(strLine-1, TOKEN_CHAR); if (strLine == NULL) return false; break; case '$': strLine = ParseLiteral(strLine-1, TOKEN_LITERAL); if (strLine == NULL) return false; break; case '.': switch(*strLine) { case '$': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': strLine = ParseLiteral(strLine-1, TOKEN_LITERAL); if (strLine == NULL) return false; break; case '.': PushToken(TOKEN_OPERATOR, PAS_OP_RANGE); strLine++; break; case ')': PushToken(TOKEN_OPERATOR, PAS_OP_INDEX_CLOSE); strLine++; break; default: PushToken(TOKEN_OPERATOR, PAS_OP_MEMBER_ACCESS); break; } break; case '\'': strLine = HandleString(strLine-1); if (strLine == NULL) return true; break; case '(': switch(*strLine) { case '.': PushToken(TOKEN_OPERATOR, PAS_OP_INDEX_OPEN); strLine++; break; case '*': strLine = HandleBlockComment(strLine, bSkipComments); if (strLine == NULL) { return true; } break; default: PushToken(TOKEN_OPERATOR, PAS_OP_BRACKET_OPEN); break; } break; case ')': PushToken(TOKEN_OPERATOR, PAS_OP_BRACKET_CLOSE); break; case '[': PushToken(TOKEN_OPERATOR, PAS_OP_INDEX_OPEN); break; case ']': PushToken(TOKEN_OPERATOR, PAS_OP_INDEX_CLOSE); break; default: if (isalpha(c) || c == '_') { strLine = ParseLabel(strLine-1); } else if (isdigit(c)) { strLine = ParseLiteral(strLine-1, TOKEN_LITERAL); } else { std::stringstream strLog; strLog << "unknown character " << c; GetTokenHandler()->HandleError(strLog.str().c_str(), GetLine()); } if (strLine == NULL) return false; break; } } return false; }
bool cPPTokenizer::Parse(const char* strLine, bool bSkipWhiteSpaces, bool bSkipComments) { LOG("strLine: %s", strLine); LOG("Stop: %s", m_bStop ? "true" : "false"); bool bSlashFound = false; char c; if (!GetTokenHandler()) return false; IncLine(); LogLine(strLine); if (m_bBlockComment) { strLine = AppendBlockComment(strLine); if (strLine == NULL) return true; } else if (m_bLineComment) { HandleLineComment(strLine); return true; } else if (m_bMultiLineString) { strLine = AppendString(strLine); if (strLine == NULL) return true; } else if (m_bMessage) { HandleMessage(strLine); return true; } else if (GetLine() > 1) { if (!bSkipWhiteSpaces) PushToken(TOKEN_NEWLINE); } if (m_bStop) { m_bStop = false; return false; } bool bLineStart = true; while(c = *strLine++) { tToken token; switch(c) { case ' ': case '\t': strLine = HandleWhiteSpace(strLine-1, bLineStart); break; case '#': if (m_bPreProcMode) { switch(*strLine) { case '#': PushToken(TOKEN_OPERATOR, PP_OP_CONCATENATION); strLine++; break; default: PushToken(TOKEN_OPERATOR, PP_OP_STRINGIFICATION); break; } } else { PushToken(TOKEN_OPERATOR, PP_OP_PREPROC); m_bPreProcMode = true; m_bExpectKeyword = true; } break; // ',', '(', ')' are needed to interpret parameter lists in macro calls case ',': PushToken(TOKEN_OPERATOR, PP_OP_COMMA); break; case '(': PushToken(TOKEN_OPERATOR, PP_OP_BRACKET_OPEN); break; case ')': PushToken(TOKEN_OPERATOR, PP_OP_BRACKET_CLOSE); break; case '+': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_ADDITION, "+"); break; case '-': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_SUBTRACTION, "-"); break; case '*': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_MULTIPLICATION, "*"); break; case '%': switch(*strLine) { case '>': PushToken(TOKEN_CHAR, '}'); strLine++; break; case ':': PushToken(TOKEN_CHAR, '#'); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_MODULUS, "%"); break; } break; case '~': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_BITWISE_NOT, "~"); break; case '^': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_BITWISE_XOR, "^"); break; case '!': switch(*strLine) { case '=': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_NOT_EQUAL, "!="); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_LOGICAL_NOT, "!"); break; } break; case '&': switch(*strLine) { case '&': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_LOGICAL_AND, "&&"); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_BITWISE_AND, "&"); break; } break; case '|': switch(*strLine) { case '|': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_LOGICAL_OR, "||"); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_BITWISE_OR, "|"); break; } break; case '=': switch(*strLine) { case '=': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_EQUAL, "=="); strLine++; break; default: PushToken(TOKEN_CHAR, '='); break; } break; case '.': if (strLine[0] == '.' && strLine[1] == '.') { strLine += 2; PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_ELLIPSIS, "..."); } else { switch(*strLine) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': strLine = ParseLiteral(strLine-1); if (strLine == NULL) return PushPreProcEnd(); break; default: PushToken(TOKEN_CHAR, '.'); break; } } break; case ':': switch(*strLine) { case '>': PushToken(TOKEN_CHAR, ']'); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_CONDITIONAL_ELSE, ":"); break; } break; case '/': switch(*strLine) { case '/': // line comments are removed HandleLineComment(strLine); return PushPreProcEnd(); case '*': strLine = HandleBlockComment(strLine-2); if (strLine == NULL) return PushPreProcEnd(); break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_DIVISION, "/"); break; } break; case '\'': strLine = HandleString(strLine-1, '\'', TOKEN_STRING); if (strLine == NULL) return PushPreProcEnd(); break; case '\"': strLine = HandleString(strLine-1, '\"', TOKEN_STRING); if (strLine == NULL) return PushPreProcEnd(); break; case '>': switch(*strLine) { case '>': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_SHR, ">>"); strLine++; break; case '=': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_BIGGER_OR_EQUAL, ">="); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_BIGGER, ">"); break; } break; case '<': if (m_bInclude) { strLine = HandleString(strLine-1, '>', TOKEN_STRING); m_bInclude = false; if (strLine == NULL) return PushPreProcEnd(); } else { switch(*strLine) { case '<': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_SHL, "<<"); strLine++; break; case '=': PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_SMALLER_OR_EQUAL, "<="); strLine++; break; case '%': PushToken(TOKEN_CHAR, '{'); strLine++; break; case ':': PushToken(TOKEN_CHAR, '['); strLine++; break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_SMALLER, "<"); break; } break; } break; case '\\': // check whether a line concatenation takes place if(*strLine == '\0') { return true; } break; case '?': switch(*strLine) { case '?': strLine++; switch(*strLine) { case '<': PushToken(TOKEN_CHAR, '{'); strLine++; break; case '>': PushToken(TOKEN_CHAR, '}'); strLine++; break; case '(': PushToken(TOKEN_CHAR, '['); strLine++; break; case ')': PushToken(TOKEN_CHAR, ']'); strLine++; break; case '\'': PushToken(TOKEN_CHAR, '^'); strLine++; break; case '!': PushToken(TOKEN_CHAR, '|'); strLine++; break; case '-': PushToken(TOKEN_CHAR, '~'); strLine++; break; case '=': PushToken(TOKEN_CHAR, '#'); strLine++; break; case '/': PushToken(TOKEN_CHAR, '\\'); strLine++; break; default: PushToken(TOKEN_TEXT, "??"); break; } break; default: PushTokenIfPreProcMode(TOKEN_OPERATOR, PP_OP_CONDITIONAL, "?"); break; } break; default: if (isalpha(c) || c == '_') { strLine = ParseLabel(strLine-1); if (m_bMessage) { HandleMessage(strLine); m_bInclude = false; return true; } } else if (isdigit(c)) { strLine = ParseLiteral(strLine-1); } else { PushToken(TOKEN_CHAR, c); } break; } bLineStart = false; } return PushPreProcEnd(); }