示例#1
0
Token* Scanner::scan (const char *text){

int numMatchedChars;
int maxchars;
int topi;
Token* current = new Token();
Token* root = current;
text+=consumeWhiteSpaceAndComments(text);

while(*text!='\0'){

		maxchars=0;
		topi=-1;
		for(int i=intKwd; i<endOfFile; i++){
			if(maxchars<(numMatchedChars=matchRegex(typeDatabase[i], text))){	
				maxchars=numMatchedChars;
				topi=i;
			}
		}
			if(maxchars>0){
				//std::cout<<"matched lexeme"<< std::string(text,maxchars)<<"\n";
				(*current).lexeme=std::string(text,maxchars);
				(*current).terminal= static_cast<tokenType>(topi);
				(*current).next=new Token();
				current=(*current).next;
				text=text+maxchars;
				text+=consumeWhiteSpaceAndComments(text);


			}
			else{//maxchars==0, then its a lexical error
				(*current).lexeme=std::string(text,1);
				(*current).terminal= lexicalError;
				(*current).next=new Token();
				current=(*current).next;
				text=text+1;
				text+=consumeWhiteSpaceAndComments(text);
			}

}



(*current).terminal=endOfFile;
//(*current).lexeme="endOfFile";
current->next=NULL;
return root;
}
示例#2
0
int main(int argc, char **argv) {

    char *text = readInput(argc, argv) ;

    // If reading in input failed, exit with return code of 1.
    if (text==NULL) {
        return 1 ;
    }

    // Create the compiled regular expressions.
    regex_t whiteSpace ;
    makeRegex (&whiteSpace, "^[\n\t\r ]+") ;

    regex_t blockComment ;
    makeRegex (&blockComment, "^/\\*([^\\*]|\\*+[^\\*/])*\\*+/");

    regex_t lineComment ;
    makeRegex (&lineComment, "^//[^\n]*\n");

    regex_t word ;
    makeRegex (&word, "^([a-zA-Z]+)") ;

    regex_t integerConst ;
    //makeRegex (&integerConst, "^[0-9]+") ;    
    // modified to include the count of floating point numbers
    // somehow using / instead of [] complains, ^ is needed to show front
    makeRegex (&integerConst, "^[0-9]*[.]*[0-9]+");
    
    //Add: Boot Regex is not ^[Boot]+ as that matches nonwords
    regex_t boot ;
    makeRegex (&boot, "^(Boot)") ;



    /* This enumerated type is used to keep track of what kind of
       construct was matched. 
     */
    enum MatchType { numMatch, wordMatch, bootMatch, noMatch } matchType ;

    int numMatchedChars = 0 ;

    // Consume leading white space and comments
    numMatchedChars = consumeWhiteSpaceAndComments (&whiteSpace, &blockComment, &lineComment, 
                                                    text) ;
    /* text is a character pointer that points to the current
       beginning of the array of characters in the input.  Adding an
       integer value to it advances the pointer that many elements in
       the array.  Thus, text is increased so that it points to the
       current location in the input. 
     */
    text = text + numMatchedChars ;

    int maxNumMatchedChars = 0 ;
    int numWords = 0, numNumericConsts = 0 ;
    int numBoot = 0;

    while ( text[0] != '\0' ) {
        maxNumMatchedChars = 0 ; matchType = noMatch ;

        /* maxNumMatchedChars is used to ensure that the regular
           expression that matched the longest string is the one that
           we use.  

           The regexs for word and integerConst cannot match the
           same text, but if we extend this program to search for
           specific keywords, then the keyword regex and the
           word-regex may, in some cases, match the same input text.

           If two regexs match the same number of characters
           then the tie has to be broken.  To break the tie, priority
           is given to the first one that was tried.  Thus the
           comparison
              (numMatchedChars > maxNumMatchedChars) 
           is strictly greater than.  Not greater than or  equal to.
        */

		// Add: BootMatch, which has to be above the normal wordMatch
		// this is more important than word as it has to take precedence over the word regex
		numMatchedChars = matchRegex (&boot, text) ;
        if (numMatchedChars > maxNumMatchedChars) {
            maxNumMatchedChars = numMatchedChars ;
            matchType = bootMatch ;
		}
		

        // Try to match a word
        numMatchedChars = matchRegex (&word, text) ;
        if (numMatchedChars > maxNumMatchedChars) {
            maxNumMatchedChars = numMatchedChars ;
            matchType = wordMatch ;
        }

        // Try to match an integer constant
        numMatchedChars = matchRegex (&integerConst, text) ;
        if (numMatchedChars > maxNumMatchedChars) {
            maxNumMatchedChars = numMatchedChars ;
            matchType = numMatch ;
        }
		
        switch (matchType) 
        {
		case bootMatch: ++numBoot; break; 
        case wordMatch: ++numWords; break;
        case numMatch: ++numNumericConsts; break;
        case noMatch: ;
        }

        if (matchType == noMatch) {
            // If we didn't match anything, then just skip the first character.
            text = text + 1 ;
        }
        else {
            // Consume the characters that were matched.
            text = text + maxNumMatchedChars ;
        }

        // Consume white space and comments before trying again for
        // another word or integer.
        numMatchedChars = consumeWhiteSpaceAndComments (&whiteSpace, &blockComment, &lineComment, 
                                                        text) ;
        text = text + numMatchedChars ;

    }

    /* In this application the only information we collect is the
       number of words and number of integer constants.  In a scanner
       we would need to accumulate the list of tokens. */
    printf ("%d\n", numWords) ;
    printf ("%d\n", numNumericConsts) ;
    printf ("%d\n", numBoot) ;

    /* You will add another printf statement to print the number of
       "John" keywords.  All of these numbers should be on separate
       lines.  In assessing your work we will require that your output
       exactly match ours: no extra spaces and each number on a
       separate line. */
}