void Tokenizer::next() { // check end of line if ( left >= line.length() ) { current = Token::ENDOFLINE; word.clear(); return; } // Only scan forward when not at start of line if ( left != 0 ) { left = line.find_first_not_of( tokenDelimiters, right ); if ( left == string::npos ) { left = line.length(); right = left; current = Token::ENDOFLINE; word.clear(); return; } } // check for line starting symbols current = identifySymbol( line.at( left ) ); if ( current != Token::UNKNOWN ) { word = ZString(1, line.at( left )); left += 1; right = left; return; } // multi-symbol token, so get the whole thing. right = line.find_first_of( tokenDelimiters, left ); if ( right == string::npos ) { right = line.length(); } word = line.substr( left, right-left ); // check if it's a number if ( word.isNumber() ) { current = Token::INTEGER; return; } // identify a word TokenMapper::iterator iter = mapper.find(word.upper()); if ( iter != mapper.end() ) { current = iter->second; return; } // couldn't identify it. current = Token::UNKNOWN; }
//Functions accepts program as input file and generates lexeme list as output int scan(FILE* input) { FILE* lex; lex = fopen("lexlist.txt", "w"); if(input == NULL){ fprintf(output, "File does not exist.\n"); return -1; } char file[2000]; int i = 0; while(!feof(input)) { fscanf(input, "%c", &file[i]); i++; } //Declare arrays to store symbolic values, numbers, and identifiers char program[i-1]; int lexemeList[i-1]; int numberList[i-1]; int currLex = 0; int currNum = 0; int currID = 0; identifier* idList = NULL; //Copy all of file into properly sized character array int j; for (j = 0; j < i-1; j++) program[j] = file[j]; char token[200]; int t = 0; //Read each character in program for (j = 0; j < i-1; j++) { if (is_alpha(program[j])) //Letter and numbers are added to current token until symbol or space is reached { token[t] = program[j]; t++; } else if (is_digit(program[j])) { token[t] = program[j]; t++; } else //When symbol or space is reached { int tokenVal; if (t != 0) //If the token is one or more characters in length { //Add null character to terminate string token[t] = '\0'; //Get symbol value for token tokenVal = identifyToken(lex, token, t, lexemeList, &currLex, numberList, &currNum, &idList, &currID); //If the token was invalid, the program terminates if (tokenVal == -1) return -1; } if (!is_space(program[j])) //If one symbol has been read that is not white space, get its symbolic value j = identifySymbol(lex, program, i-1, j, lexemeList, &currLex); //If the symbol was invalid, the program terminates if (j == -1) return -1; t = 0; } } //Print list of lexemes with numbers and identifiers printLexList(lex, lexemeList, currLex, numberList, currNum, idList, currID); fclose(lex); return 1; }