Token *Lexer::nextToken() { while(true) { switch(_currentChar.toAscii()) { case '\'': return scanCharacter(); case '"': return scanText(); case '(': return scan(Token::LeftParenthesis); case ')': return scan(Token::RightParenthesis); case '[': return scan(Token::LeftBracket); case ']': return scanRightBracket(); case '{': return scan(Token::LeftBrace); case '}': return scan(Token::RightBrace); case ';': return scan(Token::Semicolon); default: if(isEof()) return scan(Token::Eof); else if(isLineComment()) consumeLineComment(); else if(isBlockComment()) consumeBlockComment(); else if(isNewline()) return scanNewline(); else if(isSpace()) consumeSpaces(); else if(isName()) return scanName(); else if(isBackquotedName()) return scanBackquotedName(); else if(isNumber()) return scanNumber(); else if(isOperator()) return scanOperator(); else throw lexerException(QString("invalid character: '%1'").arg(_currentChar)); } } }
void scanStopTag(void *data, const char *name) { Scanner* self = data; xsMachine* the = self->the; scanText(data); xsArrayCacheEnd(xsVar(CHILDREN)); xsResult = xsGet(xsResult, xsID_parent); xsVar(CHILDREN) = xsGet(xsResult, xsID_children); }
void writeSeq(std::list<int> &pList, std::string s) { pList.clear(); int i = 0; while (i < s.length()) { std::string token = scanText(s, i, " "); pList.push_back(val(token)); } }
void Parser::addRule(std::string lhs, std::string rhs) { if (rhs.find("|") == -1) mRuleList.push_back(new BNF_Rule(lhs, rhs)); else { int i = 0; while (i < rhs.length()) { std::string rhs2 = trim(scanText(rhs, i, "|")); mRuleList.push_back(new BNF_Rule(lhs, rhs2)); } } }
void scanComment(void *data, const char *text) { Scanner* self = data; xsMachine* the = self->the; scanText(data); xsVar(VALUE) = xsString((xsStringValue)text); xsVar(LINE) = xsInteger(XML_GetCurrentLineNumber(self->expat)); xsVar(CHILD) = xsNewInstanceOf(xsVar(COMMENT_PROTOTYPE)); xsSet(xsVar(CHILD), xsID_path, xsVar(PATH)); xsSet(xsVar(CHILD), xsID_line, xsVar(LINE)); xsSet(xsVar(CHILD), xsID_parent, xsResult); xsSet(xsVar(CHILD), xsID_value, xsVar(VALUE)); xsArrayCacheItem(xsVar(CHILDREN), xsVar(CHILD)); }
void scanProcessingInstruction(void *data, const char *target, const char *text) { Scanner* self = data; xsMachine* the = self->the; scanText(data); scanName(the, target, 1); xsVar(VALUE) = xsString((xsStringValue)text); xsVar(LINE) = xsInteger(XML_GetCurrentLineNumber(self->expat)); xsVar(CHILD) = xsNewInstanceOf(xsVar(PI_PROTOTYPE)); xsSet(xsVar(CHILD), xsID_path, xsVar(PATH)); xsSet(xsVar(CHILD), xsID_line, xsVar(LINE)); xsSet(xsVar(CHILD), xsID_parent, xsResult); xsSet(xsVar(CHILD), xsID_name, xsVar(NAME)); xsSet(xsVar(CHILD), xsID_namespace, xsVar(NAMESPACE)); xsSet(xsVar(CHILD), xsID_prefix, xsVar(PREFIX)); xsSet(xsVar(CHILD), xsID_value, xsVar(VALUE)); xsArrayCacheItem(xsVar(CHILDREN), xsVar(CHILD)); }
std::string Parser::getAction( std::stack<std::string> &frameStack, std::stack<std::string> &pStack, std::list<int> &output, std::string expr, int &s, std::string &a, int &ip, int &ip2, std::map<std::string, bool> &visMap ) { std::string result; s = val(pStack.top()); // let "s" be stack top a = scanText(expr, ip2 = ip, " "); // let "a" be next token if ((result = (*mActionTable)[s][a]) == "_") // if error if (!frameStack.empty()) // if more alternatives to try { //========================================================= // pop branch info std::string name = popStack(frameStack); // get branch NAME int branchCnt = val(popStack(frameStack)); // get branch COUNT int branch = val(popStack(frameStack)); // get branch INDEX //========================================================= this->applyState( frameStack, pStack, output, s, a, ip, ip2, result ); // apply branch STATE //========================================================= // push branch info frameStack.push(cstr(branch)); // push branch INDEX frameStack.push(cstr(branchCnt)); // push branch COUNT frameStack.push(name); // push branch NAME //========================================================= } //========================================================= std::string name = cstr(s) + "_" + a + "_" + cstr(ip); // build branch NAME std::string state = name + "_" + right(readSeq(output), LOOP_SAMPLE_LEN); // build parser STATE //========================================================= if (result.find(" ") != -1) // if multiple actions possible { int branchCnt = 1; // init branch COUNT int branch = 0; // init branch INDEX if ( !frameStack.empty() && name == frameStack.top() ) // if revisiting branch { //========================================================= // pop branch info frameStack.pop(); // pop branch NAME branchCnt = val(popStack(frameStack)); // get branch COUNT branch = val(popStack(frameStack)); // get branch INDEX //========================================================= } //========================================================= state += "_" + cstr(branch); // build parser STATE //========================================================= if (branch < branchCnt) // if branch legal { if (!branch) // if first encounter with branch { this->pushState( frameStack, pStack, output, s, a, ip, ip2, result ); // push branch STATE branchCnt = countText(result, " ") + 1; // calc branch COUNT } //========================================================= result = getEntry(result, " ", branch); // choose this branch INDEX //========================================================= if (branch == branchCnt - 1) // if last encounter with branch this->popState(frameStack); // pop branch STATE else { //========================================================= // push branch info frameStack.push(cstr(branch + 1)); // push next branch INDEX frameStack.push(cstr(branchCnt)); // push branch COUNT frameStack.push(name); // push branch NAME //========================================================= } } else throw new Exception("parsing error"); } //========================================================= // check for infinite recursion if (visMap.find(state) != visMap.end()) throw new Exception("parsing error"); visMap[state] = true; // mark as visited //========================================================= return result; }
void Parser::load(std::string filename, std::string root) { this->reset(); //========================================================= // load input std::string fileData = deRepeat(replaceEx(loadText(filename), "\t", " "), ' ') + VBCRLF; fileData = replacePtn(fileData, "//", VBCRLF, VBCRLF); fileData = replacePtn(fileData, "/*", "*/", ""); //========================================================= bool table = (getPathPart(filename, PATH::EXTEN) == "par"); int mode = 0; int row = 0; //========================================================= int seed = 0; // init UID seed //========================================================= int i = 0; while (i < fileData.length()) { std::string token = trim(scanText(fileData, i, VBCRLF)); if (token == "") continue; if (token == "[cfg]") mode = 0; else if (token == "[par]") mode = 1; else { if (table) { // skip row prefix int j = 0; scanText(token, j, " "); token = right(token, token.length() - j); } switch (mode) { case 0: { std::string lhs = trim(getEntry(token, "->", 0)); std::string rhs = trim(getEntry(token, "->", 1)); int j = 0; this->addRule(lhs, "( " + rhs + " )", j, seed); break; } case 1: { std::string lhs = trim(getEntry(token, "|", 0)); std::string rhs = trim(getEntry(token, "|", 1)); if (!row) { mActionTable = new ParserTable(lhs); mGotoTable = new ParserTable(rhs); } else { mActionTable->addRow( new ParserRow(&mActionTable->getSymTableR(), lhs) ); mGotoTable->addRow( new ParserRow(&mGotoTable->getSymTableR(), rhs) ); } row++; } } } } //========================================================= // augment grammar if (BNF_Util::isTerm(&mRuleList, BNF_ROOT2)) mRuleList.push_back(new BNF_Rule(BNF_ROOT2, root)); //========================================================= // generate terminal and non-terminal list through rule list for (int j = 0; j < mRuleList.size(); j++) { BNF_Rule *rule = mRuleList[j]; if (rule->mLHS != BNF_ROOT2) // (exclude ROOT2) mNonTermList.insert(rule->mLHS); // LHS must be non-terminal for (int k = 0; k < rule->length(); k++) { std::string rhs = (*rule)[k]; if (rhs != BNF_EMPTY) // (epsilon fix; exclude EMPTY) mTermList.insert(rhs); // RHS can be terminal or non-terminal } } mTermList.insert(BNF_END); // first terminal is always END //========================================================= // remove non-terminals from terminal list std::set<std::string>::iterator p; for (p = mNonTermList.begin(); p != mNonTermList.end(); p++) if (mTermList.find(*p) != mTermList.end()) mTermList.erase(*p); //========================================================= }
void scanStartTag(void *data, const char *tag, const char **attributes) { Scanner* self = data; xsMachine* the = self->the; const char **attribute; char* name; char* value; char* colon; scanText(data); xsVar(LINE) = xsInteger(XML_GetCurrentLineNumber(self->expat)); xsVar(CHILD) = xsNewInstanceOf(xsVar(ELEMENT_PROTOTYPE)); xsSet(xsVar(CHILD), xsID_path, xsVar(PATH)); xsSet(xsVar(CHILD), xsID_line, xsVar(LINE)); xsSet(xsVar(CHILD), xsID_parent, xsResult); if (!self->root) { self->root = 1; xsSet(xsResult, xsID_element, xsVar(CHILD)); } xsArrayCacheItem(xsVar(CHILDREN), xsVar(CHILD)); xsResult = xsVar(CHILD); xsVar(CHILDREN) = xsNewInstanceOf(xsArrayPrototype); xsArrayCacheBegin(xsVar(CHILDREN)); attribute = attributes; while (*attribute) { name = (char*)*attribute; attribute++; value = (char*)*attribute; attribute++; if (c_strncmp(name, "xmlns", 5) == 0) { colon = name + 5; if (*colon == ':') { *colon = 0; xsVar(NAME) = xsString(colon + 1); *colon = ':'; xsVar(PREFIX) = xsVar(XML_PREFIX); } else { xsVar(NAME) = xsVar(XML_PREFIX); xsVar(PREFIX) = xsUndefined; } xsVar(NAMESPACE) = xsVar(XML_NAMESPACE); xsVar(VALUE) = xsString(value); xsVar(CHILD) = xsNewInstanceOf(xsVar(ATTRIBUTE_PROTOTYPE)); xsSet(xsVar(CHILD), xsID_parent, xsResult); xsSet(xsVar(CHILD), xsID_path, xsVar(PATH)); xsSet(xsVar(CHILD), xsID_line, xsVar(LINE)); xsSet(xsVar(CHILD), xsID_name, xsVar(NAME)); xsSet(xsVar(CHILD), xsID_namespace, xsVar(NAMESPACE)); xsSet(xsVar(CHILD), xsID_prefix, xsVar(PREFIX)); xsSet(xsVar(CHILD), xsID_value, xsVar(VALUE)); xsArrayCacheItem(xsVar(CHILDREN), xsVar(CHILD)); } } xsArrayCacheEnd(xsVar(CHILDREN)); xsSet(xsResult, xsID_xmlnsAttributes, xsVar(CHILDREN)); xsVar(CHILDREN) = xsNewInstanceOf(xsArrayPrototype); xsArrayCacheBegin(xsVar(CHILDREN)); attribute = attributes; while (*attribute) { name = (char*)*attribute; attribute++; value = (char*)*attribute; attribute++; if (c_strncmp(name, "xmlns", 5) != 0) { scanName(the, name, 0); xsVar(VALUE) = xsString(value); xsVar(CHILD) = xsNewInstanceOf(xsVar(ATTRIBUTE_PROTOTYPE)); xsSet(xsVar(CHILD), xsID_parent, xsResult); xsSet(xsVar(CHILD), xsID_path, xsVar(PATH)); xsSet(xsVar(CHILD), xsID_line, xsVar(LINE)); xsSet(xsVar(CHILD), xsID_name, xsVar(NAME)); xsSet(xsVar(CHILD), xsID_namespace, xsVar(NAMESPACE)); xsSet(xsVar(CHILD), xsID_prefix, xsVar(PREFIX)); xsSet(xsVar(CHILD), xsID_value, xsVar(VALUE)); xsArrayCacheItem(xsVar(CHILDREN), xsVar(CHILD)); } } xsArrayCacheEnd(xsVar(CHILDREN)); xsSet(xsResult, xsID__attributes, xsVar(CHILDREN)); scanName(the, tag, 1); xsSet(xsResult, xsID_name, xsVar(NAME)); xsSet(xsResult, xsID_namespace, xsVar(NAMESPACE)); xsSet(xsResult, xsID_prefix, xsVar(PREFIX)); xsVar(CHILDREN) = xsNewInstanceOf(xsArrayPrototype); xsArrayCacheBegin(xsVar(CHILDREN)); xsSet(xsResult, xsID_children, xsVar(CHILDREN)); }