int Scanner::consumeWhiteSpaceAndComments (const char *text) { regex_t* whiteSpace=makeRegex("^[\n\t\r ]+") ; regex_t* blockComment=makeRegex("^/\\*([^\\*]|\\*+[^\\*/])*\\*+/"); regex_t* lineComment=makeRegex("^//[^\n]*\n"); int numMatchedChars = 0 ; int totalNumMatchedChars = 0 ; int stillConsumingWhiteSpace ; do { stillConsumingWhiteSpace = 0 ; // exit loop if not reset by a match // Try to match white space numMatchedChars = matchRegex (whiteSpace, text) ; totalNumMatchedChars += numMatchedChars ; if (numMatchedChars > 0) { text = text + numMatchedChars ; stillConsumingWhiteSpace = 1 ; } // Try to match block comments numMatchedChars = matchRegex (blockComment, text) ; totalNumMatchedChars += numMatchedChars ; if (numMatchedChars > 0) { text = text + numMatchedChars ; stillConsumingWhiteSpace = 1 ; } // Try to match single-line comments numMatchedChars = matchRegex (lineComment, text) ; totalNumMatchedChars += numMatchedChars ; if (numMatchedChars > 0) { text = text + numMatchedChars ; stillConsumingWhiteSpace = 1 ; } } while ( stillConsumingWhiteSpace ) ; return totalNumMatchedChars ; }
bool MakeLeftRules() { RULELIST * Rules = klf.getRules(); //vtERs.clear(); for (EXPENDEDRULELIST::iterator it = vtERs.begin(); it != vtERs.end(); ++it) { structExpendedRule * r = &*it; delete r->match_pattern; delete r->vk; delete r->switches; delete r->regex; } vtERs.clear(); for (RULELIST::iterator it = Rules->begin(); it != Rules->end(); it++) { std::wstring * str_in = new std::wstring; VIRTUALKEYS * vks = new VIRTUALKEYS; SWITCHES * sws = new SWITCHES; if (makeRegex(str_in, vks, sws, it->strInRule, wcslen((wchar_t*)it->strInRule))) { structExpendedRule er; er.match_pattern = str_in; er.vk = vks; er.switches = sws; er.estimated_length = estimate_length(str_in->c_str()); extractCharClasses(str_in->c_str(), &er.cc); er.regex = new boost::wregex (str_in->c_str()); er.rule_it = it; vtERs.push_back(er); } } vtERs.sort(); vtERs.reverse(); return true; }
bool getOutputAndSend(InputRuleInfo * ir_info, WORD wVk, LPBYTE KeyStates) { boost::wcmatch matches; const wchar_t * srcString = InternalEditor.GetTextBackward(ir_info->it->estimated_length); if (!srcString) return false; boost::regex_match(srcString, matches, *ir_info->it->regex); int len = wcslen((wchar_t*)ir_info->it->rule_it->strOutRule); std::wstring outStr ; VIRTUALKEYS vks; SWITCHES sws; /*for (SWITCHES::iterator sws_it = ir_info->it->switches->begin(); sws_it != ir_info->it->switches->end(); sws_it++) { InternalEditor.invertSwitch(*sws_it); }*/ if (!makeRegex(&outStr, &vks, &sws, ir_info->it->rule_it->strOutRule, len, false, &matches, &ir_info->it->cc)) return false; sendSingleKey(VK_CONTROL, KEYEVENTF_KEYUP); sendSingleKey(VK_MENU, KEYEVENTF_KEYUP); //sendSingleKey(VK_SHIFT, KEYEVENTF_KEYUP); if (!outStr.length()) { if (ir_info->deleted == false) // Delete recent added temporary input InternalEditor.Delete(); sendBackspace(wcslen(srcString)); if (mksOldState.CTRL) sendSingleKey(VK_CONTROL, 0); if (mksOldState.ALT) sendSingleKey(VK_MENU, 0); //if (mksOldState.SHIFT) // sendSingleKey(VK_SHIFT, 0); return true; } std::wstring resultStr; //std::wstring * temp = new std::wstring(srcString); resultStr = boost::regex_replace((std::wstring)srcString, *ir_info->it->regex, outStr); //temp->clear(); if (!resultStr.size()){ //rise error //continue; return false; } if (ir_info->deleted == false) // Delete recently added temporary input InternalEditor.Delete(); int srcStringStringLen = wcslen(srcString); outStr.assign(resultStr); if (srcStringStringLen) { int matchLength = getMatchLength(outStr.c_str(), srcString); if (matchLength < 0){ sendBackspace(0 - matchLength); matchLength = getMatchLength(outStr.c_str(), srcString); sendBackspace(outStr.length() - matchLength); } else if (srcStringStringLen > matchLength){ sendBackspace(srcStringStringLen - matchLength); } outStr.erase(0, matchLength); } sendKeyStrokes(&outStr); int strOutLength = outStr.length(); if ( strOutLength == 1 && (outStr.at(0)>0x20 && outStr.at(0)<0x7F) ){} else { // KeyUp match VKs for (VIRTUALKEYS::iterator vk_it = ir_info->it->vk->begin(); vk_it != ir_info->it->vk->end(); vk_it++){ KeyStates[*vk_it] = 0; } // Do match again matchRules(0, 0, KeyStates, false); } if (mksOldState.CTRL) sendSingleKey(VK_CONTROL, 0); if (mksOldState.ALT) sendSingleKey(VK_MENU, 0); //if (mksOldState.SHIFT) // sendSingleKey(VK_SHIFT, 0); return true; }
void Scanner::buildDataBase(){ // malloc typeDatabase, this will be an array of pointers to regexes, there are 46 types of tokens, but we wont need all of them in regexes. this array will be easily extensible typeDatabase=(regex_t**)malloc(43*(sizeof(regex_t*))); typeDatabase[intKwd]=makeRegex("^Int");//intkwd typeDatabase[floatKwd]=makeRegex("^Float");//floatkwd typeDatabase[boolKwd]=makeRegex("^Bool");//boolkwd typeDatabase[trueKwd]=makeRegex("^True");//truekwd typeDatabase[falseKwd]=makeRegex("^False");//falsekwd typeDatabase[stringKwd]=makeRegex("^Str");//stringkwd typeDatabase[matrixKwd]=makeRegex("^Matrix");//matrixkwd typeDatabase[letKwd]=makeRegex("^let");//letkwd typeDatabase[inKwd]=makeRegex("^in");//inkwd typeDatabase[endKwd]=makeRegex("^end");//endkwd typeDatabase[ifKwd]=makeRegex("^if");//ifkwd typeDatabase[thenKwd]=makeRegex("^then");//thenkwd typeDatabase[elseKwd]=makeRegex("^else");//elsekwd typeDatabase[forKwd]=makeRegex("^for");//forkwd typeDatabase[whileKwd]=makeRegex("^while");//whilekwd typeDatabase[printKwd]=makeRegex("^print");//printkwd typeDatabase[intConst]=makeRegex("^[0-9]+");//intconst typeDatabase[floatConst]=makeRegex("(^[0-9]+\\.[0-9]+)"); //|(^[0-9]*\\.[0-9]+)");//floatconst typeDatabase[stringConst]=makeRegex("^\"[^\"]*\"");//stringconst typeDatabase[variableName]=makeRegex("^[a-zA-Z_]+[0-9_]*");//variableName typeDatabase[leftParen]=makeRegex("^\\(");//leftParen typeDatabase[rightParen]=makeRegex("^\\)");//rightParen typeDatabase[leftCurly]=makeRegex("^\\{");//leftCurly typeDatabase[rightCurly]=makeRegex("^\\}");//rightCurly typeDatabase[leftSquare]=makeRegex("^\\[");//leftsquare typeDatabase[rightSquare]=makeRegex("^\\]");//rightsquare typeDatabase[comma]=makeRegex("^,");//comma typeDatabase[semiColon]=makeRegex("^;");//semicolon typeDatabase[colon]=makeRegex("^:");//colon typeDatabase[assign]=makeRegex("^=");//assign typeDatabase[plusSign]=makeRegex("^\\+");//plusSign typeDatabase[star]=makeRegex("^\\*");//star typeDatabase[dash]=makeRegex("^\\-");//dash typeDatabase[forwardSlash]=makeRegex("^/");//forwardslash typeDatabase[lessThan]=makeRegex("^<");//lessThan typeDatabase[lessThanEqual]=makeRegex("^<=");//lessThanEqual typeDatabase[greaterThan]=makeRegex("^>");//greaterThan typeDatabase[greaterThanEqual]=makeRegex("^>=");//greaterThanEqual typeDatabase[equalsEquals]=makeRegex("^==");//equalsEquals typeDatabase[notEquals]=makeRegex("^!=");//notEquals typeDatabase[andOp]=makeRegex("^&&");//andOp typeDatabase[orOp]=makeRegex("^\\|\\|");//orOp typeDatabase[notOp]=makeRegex("^!");//notOp }
int main(int argc, char **argv) { char *text = readInput(argc, argv) ; // If reading in input failed, exit with return code of 1. if (text==NULL) { return 1 ; } // Create the compiled regular expressions. regex_t whiteSpace ; makeRegex (&whiteSpace, "^[\n\t\r ]+") ; regex_t blockComment ; makeRegex (&blockComment, "^/\\*([^\\*]|\\*+[^\\*/])*\\*+/"); regex_t lineComment ; makeRegex (&lineComment, "^//[^\n]*\n"); regex_t word ; makeRegex (&word, "^([a-zA-Z]+)") ; regex_t integerConst ; //makeRegex (&integerConst, "^[0-9]+") ; // modified to include the count of floating point numbers // somehow using / instead of [] complains, ^ is needed to show front makeRegex (&integerConst, "^[0-9]*[.]*[0-9]+"); //Add: Boot Regex is not ^[Boot]+ as that matches nonwords regex_t boot ; makeRegex (&boot, "^(Boot)") ; /* This enumerated type is used to keep track of what kind of construct was matched. */ enum MatchType { numMatch, wordMatch, bootMatch, noMatch } matchType ; int numMatchedChars = 0 ; // Consume leading white space and comments numMatchedChars = consumeWhiteSpaceAndComments (&whiteSpace, &blockComment, &lineComment, text) ; /* text is a character pointer that points to the current beginning of the array of characters in the input. Adding an integer value to it advances the pointer that many elements in the array. Thus, text is increased so that it points to the current location in the input. */ text = text + numMatchedChars ; int maxNumMatchedChars = 0 ; int numWords = 0, numNumericConsts = 0 ; int numBoot = 0; while ( text[0] != '\0' ) { maxNumMatchedChars = 0 ; matchType = noMatch ; /* maxNumMatchedChars is used to ensure that the regular expression that matched the longest string is the one that we use. The regexs for word and integerConst cannot match the same text, but if we extend this program to search for specific keywords, then the keyword regex and the word-regex may, in some cases, match the same input text. If two regexs match the same number of characters then the tie has to be broken. To break the tie, priority is given to the first one that was tried. Thus the comparison (numMatchedChars > maxNumMatchedChars) is strictly greater than. Not greater than or equal to. */ // Add: BootMatch, which has to be above the normal wordMatch // this is more important than word as it has to take precedence over the word regex numMatchedChars = matchRegex (&boot, text) ; if (numMatchedChars > maxNumMatchedChars) { maxNumMatchedChars = numMatchedChars ; matchType = bootMatch ; } // Try to match a word numMatchedChars = matchRegex (&word, text) ; if (numMatchedChars > maxNumMatchedChars) { maxNumMatchedChars = numMatchedChars ; matchType = wordMatch ; } // Try to match an integer constant numMatchedChars = matchRegex (&integerConst, text) ; if (numMatchedChars > maxNumMatchedChars) { maxNumMatchedChars = numMatchedChars ; matchType = numMatch ; } switch (matchType) { case bootMatch: ++numBoot; break; case wordMatch: ++numWords; break; case numMatch: ++numNumericConsts; break; case noMatch: ; } if (matchType == noMatch) { // If we didn't match anything, then just skip the first character. text = text + 1 ; } else { // Consume the characters that were matched. text = text + maxNumMatchedChars ; } // Consume white space and comments before trying again for // another word or integer. numMatchedChars = consumeWhiteSpaceAndComments (&whiteSpace, &blockComment, &lineComment, text) ; text = text + numMatchedChars ; } /* In this application the only information we collect is the number of words and number of integer constants. In a scanner we would need to accumulate the list of tokens. */ printf ("%d\n", numWords) ; printf ("%d\n", numNumericConsts) ; printf ("%d\n", numBoot) ; /* You will add another printf statement to print the number of "John" keywords. All of these numbers should be on separate lines. In assessing your work we will require that your output exactly match ours: no extra spaces and each number on a separate line. */ }