Token* Scanner::scan (const char *text){ int numMatchedChars; int maxchars; int topi; Token* current = new Token(); Token* root = current; text+=consumeWhiteSpaceAndComments(text); while(*text!='\0'){ maxchars=0; topi=-1; for(int i=intKwd; i<endOfFile; i++){ if(maxchars<(numMatchedChars=matchRegex(typeDatabase[i], text))){ maxchars=numMatchedChars; topi=i; } } if(maxchars>0){ //std::cout<<"matched lexeme"<< std::string(text,maxchars)<<"\n"; (*current).lexeme=std::string(text,maxchars); (*current).terminal= static_cast<tokenType>(topi); (*current).next=new Token(); current=(*current).next; text=text+maxchars; text+=consumeWhiteSpaceAndComments(text); } else{//maxchars==0, then its a lexical error (*current).lexeme=std::string(text,1); (*current).terminal= lexicalError; (*current).next=new Token(); current=(*current).next; text=text+1; text+=consumeWhiteSpaceAndComments(text); } } (*current).terminal=endOfFile; //(*current).lexeme="endOfFile"; current->next=NULL; return root; }
int main(int argc, char **argv) { char *text = readInput(argc, argv) ; // If reading in input failed, exit with return code of 1. if (text==NULL) { return 1 ; } // Create the compiled regular expressions. regex_t whiteSpace ; makeRegex (&whiteSpace, "^[\n\t\r ]+") ; regex_t blockComment ; makeRegex (&blockComment, "^/\\*([^\\*]|\\*+[^\\*/])*\\*+/"); regex_t lineComment ; makeRegex (&lineComment, "^//[^\n]*\n"); regex_t word ; makeRegex (&word, "^([a-zA-Z]+)") ; regex_t integerConst ; //makeRegex (&integerConst, "^[0-9]+") ; // modified to include the count of floating point numbers // somehow using / instead of [] complains, ^ is needed to show front makeRegex (&integerConst, "^[0-9]*[.]*[0-9]+"); //Add: Boot Regex is not ^[Boot]+ as that matches nonwords regex_t boot ; makeRegex (&boot, "^(Boot)") ; /* This enumerated type is used to keep track of what kind of construct was matched. */ enum MatchType { numMatch, wordMatch, bootMatch, noMatch } matchType ; int numMatchedChars = 0 ; // Consume leading white space and comments numMatchedChars = consumeWhiteSpaceAndComments (&whiteSpace, &blockComment, &lineComment, text) ; /* text is a character pointer that points to the current beginning of the array of characters in the input. Adding an integer value to it advances the pointer that many elements in the array. Thus, text is increased so that it points to the current location in the input. */ text = text + numMatchedChars ; int maxNumMatchedChars = 0 ; int numWords = 0, numNumericConsts = 0 ; int numBoot = 0; while ( text[0] != '\0' ) { maxNumMatchedChars = 0 ; matchType = noMatch ; /* maxNumMatchedChars is used to ensure that the regular expression that matched the longest string is the one that we use. The regexs for word and integerConst cannot match the same text, but if we extend this program to search for specific keywords, then the keyword regex and the word-regex may, in some cases, match the same input text. If two regexs match the same number of characters then the tie has to be broken. To break the tie, priority is given to the first one that was tried. Thus the comparison (numMatchedChars > maxNumMatchedChars) is strictly greater than. Not greater than or equal to. */ // Add: BootMatch, which has to be above the normal wordMatch // this is more important than word as it has to take precedence over the word regex numMatchedChars = matchRegex (&boot, text) ; if (numMatchedChars > maxNumMatchedChars) { maxNumMatchedChars = numMatchedChars ; matchType = bootMatch ; } // Try to match a word numMatchedChars = matchRegex (&word, text) ; if (numMatchedChars > maxNumMatchedChars) { maxNumMatchedChars = numMatchedChars ; matchType = wordMatch ; } // Try to match an integer constant numMatchedChars = matchRegex (&integerConst, text) ; if (numMatchedChars > maxNumMatchedChars) { maxNumMatchedChars = numMatchedChars ; matchType = numMatch ; } switch (matchType) { case bootMatch: ++numBoot; break; case wordMatch: ++numWords; break; case numMatch: ++numNumericConsts; break; case noMatch: ; } if (matchType == noMatch) { // If we didn't match anything, then just skip the first character. text = text + 1 ; } else { // Consume the characters that were matched. text = text + maxNumMatchedChars ; } // Consume white space and comments before trying again for // another word or integer. numMatchedChars = consumeWhiteSpaceAndComments (&whiteSpace, &blockComment, &lineComment, text) ; text = text + numMatchedChars ; } /* In this application the only information we collect is the number of words and number of integer constants. In a scanner we would need to accumulate the list of tokens. */ printf ("%d\n", numWords) ; printf ("%d\n", numNumericConsts) ; printf ("%d\n", numBoot) ; /* You will add another printf statement to print the number of "John" keywords. All of these numbers should be on separate lines. In assessing your work we will require that your output exactly match ours: no extra spaces and each number on a separate line. */ }