Beispiel #1
0
void printTokens(TokenizerT *tk){
    int index=0;										// variable that stores the array index of a symbol from the symbol table
    while(tk->tokeninput[tk->ptrInput] != '\0'){		// until End of string
        char* temp = TKGetNextToken(tk);				// store the token obtained in temp
        if (temp != NULL) {
            
            if (tk->prevState == 'm') {
                printf("Invalid input\n");
                break;
            }
            
            //switch statement to print out type
            switch (tk->prevState) {
                case 'w':
                    printf("word ");
                    break;
                case '1':
                    printf("integer ");
                    break;
                case 'f':
                    printf("float ");
                    break;
                case 'e':
                    printf("float ");
                    break;
                case 'h':
                    printf("hex constant ");
                    break;
                case 'o':
                    printf("octal constant ");
                    break;
                case 'r':
                    index=iscombinedSymbol(temp);
                    printf("%s ",symbols[index][1]);
                    break;
                case 'q':
                    index=isEscape(temp);
                    printf("%s",esc[index][1]);
                    break;
            }
            
            printf("\"%s\"\n",temp); //print out token
        }
        else break;
        
    }
    
    //function here to figure out what type the token is
    
    //print type and then "token"
    
}
Beispiel #2
0
char *TKGetNextToken( TokenizerT * tk ) {
    
    int Mcheck= 0;
    if (tk->tokeninput == NULL) {
        return NULL;
    }
    
    //get first character of string into tk->tokenChecker, start counting the length of the input
    tk->tokenLengthCounter = 0;
    tk->tokenChecker = tk->tokeninput[tk->ptrInput];
    //see if token checker is NULL symbol
    while (tk->tokenChecker != '\0') {
        
        switch (tk->currentState) {
                /**/       case 'd':     //default case
                
                if (tk->tokenChecker=='0') {    //string starts with a 0, octal possibly
                    tk->currentState = '0';
                    break;
                }
                
                else if (isdigit(tk->tokenChecker)) {    //string starts with a digit
                    tk->currentState = '1';
                    break;
                }
                
                else if (isalpha(tk->tokenChecker)) {    //string starts with a letter
                    tk->currentState = 'w';
                    break;
                }
                
                else if (isspace(tk->tokenChecker)) {    //string starts with a whitespace
                    tk->currentState = 's';
                    break;
                }
                
                else if((isSymbol(tk->tokenChecker))>0) {	// string starts with a operator
                    if(isSymbol(tk->tokeninput[tk->ptrInput+1])>0) {
                        char *test;
                        test=malloc(3*sizeof(char));
                        test[0]=tk->tokenChecker;
                        test[1]=tk->tokeninput[tk->ptrInput+1];
                        test[2]='\0';
                        if(iscombinedSymbol(test)==0) {
                            Mcheck=1;
                            tk->prevState=tk->currentState;
                            tk->currentState='m';
                            tk->tokenLengthCounter++;
                            tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                            break;
                        }
                        else {
                            tk->ptrInput++;
                            tk->tokenLengthCounter++;
                            tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                            tk->prevState='r';
                            tk->currentState='d';
                            tokenReturn(tk);
                            return test;
                        }
                				}
                    else{
                        tk->currentState= 'r';
                        break;}
                    
                }
                
                else if(tk->tokenChecker=='\\')
                {
                    int index=0;
                    char *seq=malloc(3*sizeof(char));
                    seq[0]='\\';
                    if(tk->tokeninput[tk->ptrInput+1]=='\\')
                        seq[1]='\\';
                    else
                        seq[1]=tk->tokeninput[tk->ptrInput+1];
                    seq[2]='\0';
                    if(isEscape(seq)>0)
                    {
                        tk->ptrInput=tk->ptrInput+2;
                        tk->tokenLengthCounter++;
                        tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                        tk->prevState='q';
                        tk->currentState='d';
                        tokenReturn(tk);
                        return seq;
                        break;
                				}
                    else {
                        Mcheck=1;
                        tk->prevState=tk->currentState;
                        tk->currentState='m';
                    }
                    break;
                }
                
                else tk->currentState = 'c';                  //string starts with other character
                break;
                
                /**/       case 'w':     //word case
                while(isalnum((int)tk->tokenChecker) && tk->tokenChecker != '\0'){      //loop until non-alphanumeric
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                tk->prevState = tk->currentState;
                tokenReturn(tk);
                return tk->nextToken;
                
                /**/       case 's':      //whitespace case
                
                //go to next char
                tk->tokenLengthCounter++;
                tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                tk->tokenLengthCounter=0;
                tk->ptrInput++;
                tk->prevState = tk->currentState;
                tk->currentState = 'd'; //reset case to default after whitespace is skipped
                break;
                
                /**/       case '0':       //begins with a zero, possibly octal
                //see next digit
                tk->tokenLengthCounter++;
                tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                
                //if x is seen, go to hex
                if (tk->tokenChecker=='x' || tk->tokenChecker=='X') {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'h'; //set case to hex
                    break;
                }
                
                //if 8 or 9 is seen, go to integer
                if (tk->tokenChecker=='8' || tk->tokenChecker=='9') {
                    printf("Please format floats and integers without extraneous zeroes or letters\n");
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                
                //if . is seen, go to float
                if (tk->tokenChecker=='.') {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'f'; //set case to float
                    break;
                }
                
                //if next character is not alphanumeric, token is zero, return it
                if (!isalnum((int)tk->tokenChecker)) {
                    tk->prevState = 'i';
                    tk->currentState = 'd'; //set case to default after token is returned
                    tokenReturn(tk);
                    return tk->nextToken;
                }
                //otherwise, character is octal
                tk->prevState = tk->currentState;
                tk->currentState = 'o'; //set case to default after token is returned
                break;
                
                /**/       case '1':       //begins with a 1, decimal integer or float
                
                Mcheck=0;
                tk->tokenLengthCounter++;
                tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                
                while(1){      //loop until non-numeric
                    
                    //if it has a decimal point, send it to float
                    if (tk->tokenChecker=='.') {
                        tk->prevState = tk->currentState;
                        tk->currentState = 'f'; //set case to float
                        break;
                    }
                    
                    //if its a alphanumeric, keep going
                    if (!isalnum((int)tk->tokenChecker)) {
                        break;
                    }
                    
                    //but make sure it's not null
                    if (tk->tokenChecker == '\0') {
                        break;
                    }
                    
                    //if there is an E, move to exponential float type
                    if (tk->tokenChecker == 'e' || tk->tokenChecker == 'E') {
                        tk->prevState = tk->currentState;
                        tk->currentState = 'e'; //set case to exp float
                        break;
                    }
                    
                    
                    //if it is alpha, give error to correctly format numbers
                    if (isalpha((int)tk->tokenChecker)) {
                        Mcheck= 1;
                        printf("Please format floats and integers without extraneous zeroes or letters\n");
                        break;
                    }
                    
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                
                if (tk->currentState == 'f' || tk->currentState == 'e') {
                    break;
                }
                if (Mcheck == 1) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                else{
                    tk->prevState = tk->currentState;
                    tokenReturn(tk);
                    return tk->nextToken;
                }
                
                
                
                
                
                /**/       case 'h':       //hexadecimal case
                
                Mcheck=0;
                //skip past 'x'
                tk->tokenLengthCounter++;
                tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                
                //checks if there exists a digit after decimal point
                if (!isdigit((int)tk->tokenChecker)) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    printf("Invalid input, please format hexadecimals with values after the 0x");
                    break;
                }
                
                while(1){      //loop until non-numeric
                    
                    if (tk->tokenChecker == '.') {
                        printf("Please provide valid hex values, and format floats without extraneous zeroes or letters\n");
                        Mcheck= 1;
                        break;
                    }
                    
                    //if its a alphanumeric, keep going
                    if (!isalnum((int)tk->tokenChecker)) {
                        break;
                    }
                    //but make sure it's not null
                    if (tk->tokenChecker == '\0') {
                        break;
                    }
                    
                    //if it is alpha, give error to correctly format octals/numbers
                    if (!isxdigit((int)tk->tokenChecker)) {
                        Mcheck= 1;
                        printf("Please provide valid hex values, and do not begin words with 0\n");
                        break;
                    }
                    
                    
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                
                
                if (Mcheck == 1) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                else{
                    tk->prevState = tk->currentState;
                    tokenReturn(tk);
                    return tk->nextToken;
                }
                
                /**/       case 'o':       //octal case
                Mcheck=0;
                while(1){      //loop until non-numeric
                    
                    if (tk->tokenChecker == '.') {
                        printf("Please provide valid octals (0-7), and format floats without extraneous zeroes\n");
                        Mcheck= 1;
                        break;
                    }
                    
                    //if its a alphanumeric, keep going
                    if (!isalnum((int)tk->tokenChecker)) {
                        break;
                    }
                    //but make sure it's not null
                    if (tk->tokenChecker == '\0') {
                        break;
                    }
                    
                    //if it is alpha, give error to correctly format octals/numbers
                    if (isalpha((int)tk->tokenChecker)) {
                        Mcheck= 1;
                        printf("Please provide valid octals (0-7), and do not begin words with 0\n");
                        break;
                    }
                    
                    // if there are invalid numbers, give error for correct format
                    if (tk->tokenChecker == '8' || tk->tokenChecker == '9') {
                        Mcheck= 1;
                        printf("Please provide valid octals (0-7), and do not begin non-octal/hex numerical values with 0\n");
                        break;
                    }
                    
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                
                
                if (Mcheck == 1) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                else{
                    tk->prevState = tk->currentState;
                    tokenReturn(tk);
                    return tk->nextToken;
                }
                
                /**/       case 'f':       //float case
                Mcheck=0;
                //skip past '.'
                tk->tokenLengthCounter++;
                tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                
                //checks if there exists a digit after decimal point
                if (!isdigit((int)tk->tokenChecker)) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                
                while(1){      //loop until non-numeric
                    
                    //if its a alphanumeric, keep going
                    if (!isalnum((int)tk->tokenChecker)) {
                        break;
                    }
                    //but make sure it's not null
                    if (tk->tokenChecker == '\0') {
                        break;
                    }
                    
                    //if there is an E, move to exponential float type
                    if (tk->tokenChecker == 'e' || tk->tokenChecker == 'E') {
                        tk->prevState = tk->currentState;
                        tk->currentState = 'e'; //set case to exp float
                        break;
                    }
                    
                    //if it is alpha still, give error to correctly format octals/numbers
                    if (isalpha((int)tk->tokenChecker)) {
                        Mcheck= 1;
                        printf("Please provide valid floats with numbers and one exponential only\n");
                        break;
                    }
                    
                    
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                
                
                if (Mcheck == 1) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                else if(tk->currentState != 'e'){
                    tk->prevState = tk->currentState;
                    tokenReturn(tk);
                    return tk->nextToken;
                }
                
                /**/       case 'e':       //exponential float
                Mcheck=0;
                //skip past 'e'
                tk->tokenLengthCounter++;
                tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                
                //if first character is a positive or negative sign, skip past it and see if there's a number afterwards
                if (tk->tokenChecker=='-'||tk->tokenChecker=='+') {
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                if (!isdigit((int)tk->tokenChecker)) {
                    printf("Please provide valid floats with numbers and ONE exponential only, followed by an integer value\n");
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                
                
                while(1){      //loop until non-numeric
                    
                    
                    //if its a alphanumeric, keep going
                    if (!isalnum((int)tk->tokenChecker)) {
                        break;
                    }
                    //but make sure it's not null
                    if (tk->tokenChecker == '\0') {
                        break;
                    }
                    
                    //if it is alpha still, give error to correctly format octals/numbers
                    if (isalpha((int)tk->tokenChecker)) {
                        Mcheck= 1;
                        printf("Please provide valid floats with numbers and ONE exponential only, followed by an integer value\n");
                        break;
                    }
                    
                    
                    tk->tokenLengthCounter++;
                    tk->tokenChecker = tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                }
                
                
                if (Mcheck == 1) {
                    tk->prevState = tk->currentState;
                    tk->currentState = 'm'; //set case to malfunction
                    break;
                }
                else{
                    tk->prevState = tk->currentState;
                    tokenReturn(tk);
                    return tk->nextToken;
                }
                
                /**/ 		case 'r': 		// operators case
                tk->tokenLengthCounter++;
                tk->tokenChecker=tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                tk->prevState=tk->currentState;
                tokenReturn(tk);
                return tk->nextToken;
                break;
                
            case 'q':		// Escape sequences
                tk->tokenLengthCounter++;
                tk->tokenChecker=tk->tokeninput[tk->ptrInput+tk->tokenLengthCounter];
                tk->prevState=tk->currentState;
                tokenReturn(tk);
                return tk->nextToken;
                break;
                /**/       case 'm':       //malfunction, bad input case
                tk->prevState = tk->currentState;
                tokenReturn(tk);
                return tk->nextToken;
                break;
                
        }
        
    }
    
    return NULL;
}
Beispiel #3
0
// TODO: number escape sequence
bool Tokenizer::consumeString(std::streambuf::int_type ch)
{
    mpCurrent.reset(new Token);
    mpCurrent->setLine(line());
    mpCurrent->setBeginColumn(column());
    mpCurrent->setType(Token::TYPE_STRING_LITERAL);

    absorbed(ch);

    auto  openQuotationMark = ch;
    for (;;)
    {
        if (eof())
        {
            mpMessageCollector->addMessage(
                    Message::SEVERITY_ERROR, Message::t_missingTerminatingQuotationMark,
                    mpSourceId, line(), column());
            mpCurrent.reset();
            return false;
        }

        ch = mpInput->get();
        if (isEOL(ch))
        {
            mpMessageCollector->addMessage(
                    Message::SEVERITY_ERROR, Message::t_missingTerminatingQuotationMark,
                    mpSourceId, line(), column());
            mpCurrent.reset();
            return false;
        }

        absorbed(ch);
        if (ch == openQuotationMark)
        {
            break;
        }

        if (isEscape(ch))
        {
            if (eof())
            {
                mpMessageCollector->addMessage(
                        Message::SEVERITY_ERROR, Message::t_unknownEscapeSequence,
                        mpSourceId, line(), column());
                mpCurrent.reset();
                return false;
            }
            auto nch = mpInput->get();
            if (!isEscapee(nch))
            {
                mpMessageCollector->addMessage(
                        Message::SEVERITY_ERROR, Message::t_unknownEscapeSequence,
                        mpSourceId, line(), column());
                mpCurrent.reset();
                return false;
            }
            mpCurrent->addChar(ch);
            absorbed(nch);
            ch = nch;
        }

        mpCurrent->addChar(ch);
    }

    mpCurrent->setEndColumn(column());
    return true;
}