void Scanner::getWord(char *str, char *token_ptr, Token *tok) { /* Write some code to Extract the word */ char ch = *line_ptr; while ((char_table[ch] == LETTER) || (char_table[ch] == DIGIT)) { *token_ptr++ = *line_ptr++; ch = *line_ptr; } *token_ptr = '\0'; //Downshift the word, to make it lower case downshiftWord(str); /* Write some code to Check if the word is a reserved word. if it is not a reserved word its an identifier. */ if (!isReservedWord(str, tok)) { //set token to identifier tok->setCode(IDENTIFIER); } tok->setTokenString(string(str)); }
void Scanner::getWord(char *str, char *token_ptr, Token **tok) { /* Write some code to Extract the word */ char ch = *line_ptr; while ((char_table[ch] == LETTER) || (char_table[ch] == DIGIT)) { *token_ptr++ = *line_ptr++; ch = *line_ptr; } *token_ptr = '\0'; //Downshift the word, to make it lower case downshiftWord(str); /* Write some code to Check if the word is a reserved word. if it is not a reserved word its an identifier. */ //This is not working properly. TokenCode code; if (isReservedWord(str, &code)) { //NOT an identifier *tok = new Token(code); (*tok)->setTokenString(string(str)); } else { *tok = new IdentifierToken(string(str)); } }
void parseFunction(struct token *functionName) /* Starting with ( parse function and make call graph. */ { char c; struct hash *uniqHash = hashNew(0); for (;;) { nextToken(); if (token->string[0] == ')') { nextToken(); break; } } c = token->string[0]; if (c == '{') { int blockDepth = 1; struct token *lastName = NULL; for (;;) { char c; nextToken(); c = token->string[0]; if (c == '{') { lastName = NULL; ++blockDepth; } else if (c == '}') { lastName = NULL; --blockDepth; if (blockDepth == 0) break; } else if (c == '_' || isalpha(c)) lastName = token; else if (c == '(') { if (lastName != NULL && !isReservedWord(lastName->string)) { // if (!hashLookup(uniqHash, lastName->string)) { // hashAdd(uniqHash, lastName->string, NULL); printf("%s -> %s\n", functionName->string, lastName->string); } } lastName = NULL; } else lastName = NULL; } } hashFree(&uniqHash); }
TokenT *_word(TokenizerT *tk) { nextChar(tk); if(isalnum(tk->inputIter[0]) || tk->inputIter[0] == '_') { return _word(tk); } else { if(isReservedWord(tk->tokenBuffer)) { return makeToken(tk, "reserved word"); } else { return makeToken(tk, "word"); } } }
void getWord(void) { while ((calcCharCode(curChar) == CHR_LETTER) || (calcCharCode(curChar) == CHR_DIGIT) || (curChar == '_')) { *tokenp = curChar; tokenp++; getChar(); } *tokenp = nullptr; downShiftWord(); if (curChar == '.') { TokenCodeType endToken = TKN_END_MODULE; if (CurLibrary) endToken = TKN_END_LIBRARY; else if (CurModuleIdPtr->defn.info.routine.flags & ROUTINE_FLAG_FSM) endToken = TKN_END_FSM; //------------------------------------------------------------------- // If we have an "endmodule" string, stop. Otherwise, keep reading... if (strcmp(wordString, TokenStrings[endToken])) { *tokenp = curChar; tokenp++; getChar(); while ((calcCharCode(curChar) == CHR_LETTER) || (calcCharCode(curChar) == CHR_DIGIT) || (curChar == '_')) { *tokenp = curChar; tokenp++; getChar(); } *tokenp = nullptr; downShiftWord(); } } if (!isReservedWord()) curToken = TKN_IDENTIFIER; }
bool IsValidNodeName( Dag *dag, const char *name, MyString &whynot ) { if( name == NULL ) { whynot = "missing node name"; return false; } if( strlen( name ) == 0 ) { whynot = "empty node name (name == \"\")"; return false; } if( isReservedWord( name ) ) { whynot.formatstr( "invalid node name: '%s' is a DAGMan reserved word", name ); return false; } ASSERT( dag != NULL ); if( dag->NodeExists( name ) ) { whynot.formatstr( "node name '%s' already exists in DAG", name ); return false; } return true; }
int main( void ){ int i; //Loop counter. int tempTokCtr = 0; //Count number of tokens. char buffer[ 10000 ]; //For reading in tokens. char temp[ 10000 ]; //Back-up buffer. //Prompt user for source program name. char sp[50]; //Name of input file. printf( "What is the file name for your source program?\n" ); scanf( "%s", sp ); //Open file input stream to main input (the program). FILE *fin = fopen( sp, "r" ); //Open file output stream to intermediary output file. //In this file, unprocessed tokens will be stored for further processing. FILE *temp_fout = fopen( "temp_output.txt", "w" ); //Procedure for creating intermediary output. /** BEGIN PROCEDURE **/ while( fscanf( fin, "%s", buffer ) != EOF ){ int len = (int) strlen( buffer ); //Length of read-in token. //For the length of the read-in token: for( i = 0; i < len; i++ ){ //If we've reached the end of the buffer, and it's not a special symbol, print the token. //If invalid symbols are present, they're ignored and taken care of later. if( i == len-1 && !isSpecialSymbol( buffer[ i ] ) ){ fprintf( temp_fout, "%s ", buffer ); tempTokCtr++; //Increment the token counter. } //If the i-th element of the token is a special symbol. else if( isSpecialSymbol( buffer[ i ] ) ){ //Brute force examine particular token cases: comment tokems, not equal, less-than-or-equal, ... , null, and odd. if( len > 1 ){ if( buffer[ i ] == '/' && buffer[ i+1 ] == '*' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "/* " ); tempTokCtr++; strncpy( buffer, &buffer[i]+2, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( buffer[ i ] == '*' && buffer[ i+1 ] == '/' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "*/ " ); tempTokCtr++; strncpy( buffer, &buffer[i]+2, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( buffer[ i ] == '!' && buffer[ i+1 ] == '=' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "!= " ); tempTokCtr++; strncpy( buffer, &buffer[i]+2, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( buffer[ i ] == '<' && buffer[ i+1 ] == '=' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "<= " ); tempTokCtr++; strncpy( buffer, &buffer[i]+2, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( buffer[ i ] == '>' && buffer[ i+1 ] == '=' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, ">= " ); tempTokCtr++; strncpy( buffer, &buffer[i]+2, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( buffer[ i ] == ':' && buffer[ i+1 ] == '=' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, ":= " ); tempTokCtr++; strncpy( buffer, &buffer[i]+2, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( len > 3 && buffer[ i ] == 'n' && buffer[ i+1 ] == 'u' && buffer[ i+2 ] == 'l' && buffer[ i+3 ] =='l' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "null " ); tempTokCtr++; strncpy( buffer, &buffer[i]+4, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } else if( len > 2 && buffer[ i ] == 'o' && buffer[ i+1 ] == 'd' && buffer[ i+2 ] == 'd' ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "odd " ); tempTokCtr++; strncpy( buffer, &buffer[i]+3, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } } //Special symbol token at the beginning. if( i == 0 ){ //Print symbol as individual token. fprintf( temp_fout, "%c ", buffer[i] ); tempTokCtr++; //Increment token counter. //Shift buffer, and update loop index. //Similar procedure used for brute force token examination, and further token examination. strncpy( buffer, &buffer[i]+1, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } //Valid symbol is splitting tokens within the buffer; split, print, and shift the tokens left of the valid symbol, including the symbol. if( i > 0 ){ strncpy( temp, buffer, i ); temp[ i ] = '\0'; fprintf( temp_fout, "%s ", temp ); tempTokCtr++; fprintf( temp_fout, "%c ", buffer[i] ); tempTokCtr++; strncpy( buffer, &buffer[i]+1, len-1 ); buffer[ len - 1 ] = '\0'; len = (int) strlen( buffer ); i = -1; } } } } /** END OF PROCEDURE **/ //Resource management. fclose( temp_fout ); fclose( fin ); //Open file input stream to intermediary output created by the procedure above (unprocessed tokens). fin = fopen( "temp_output.txt", "r" ); FILE *table = fopen( "lexeme_table.txt", "w" ); //Will hold the lexeme table. FILE *list = fopen( "lexeme_list.txt", "w" ); //Will hold the lexeme list. //Print header for lexeme table. fprintf( table, "Lexeme Table:\n%15s%15s\n", "lexeme", "token type"); //Procedure to examine printed tokens. /** BEGIN PROCEDURE **/ //Initialize. int isResWord = -1; //If currently examined token is a reserved word, != -1; otherwise, = -1. int len = 0; //Length of examined token. int isComment = 0; //If currently reading in a comment, = 1; otherwise, = 0. //For the length of tokens. while( fscanf( fin, "%s ", buffer ) != EOF ){ //fscanf( fin, "%s", buffer ); //Read in token. len = (int) strlen( buffer ); //Get token string length. //Recognize beginning of comment block. if( !strcmp( buffer, "/*" ) ){ isComment = 1; } //Recognize end of comment block. else if( isComment == 1 && !strcmp( buffer, "*/" ) ){ isComment = 0; } //Only read in tokens while not examining a comment block. else if( !isComment ){ //Error detection by token. if( detectError( buffer ) ){ fclose( fin ); fclose( table ); fclose( list ); exit( 0 ); } //Is the current token a reserved word? Print to table and list appropriately. isResWord = isReservedWord( buffer ); if( isResWord != -1 ){ fprintf( table, "%15s%15d\n", buffer, isResWord ); fprintf( list, "%d ", isResWord ); } //Is the current token an identifier? Print to table and list appropriately. else if( isalpha( buffer[0] ) ){ fprintf( table, "%15s%15d\n", buffer, 2 ); fprintf( list, "2 %s ", buffer ); } //Is the current token a number? Print to table and list appropriately. else if( isdigit( buffer[0] ) ){ fprintf( table, "%15s%15d\n", buffer, 3 ); fprintf( list, "3 %s ", buffer ); } //Otherwise, our token is some other valid symbol (checked for errors above). Print appropriately. else{ fprintf( table, "%15s%15d\n", buffer, isOtherValid( buffer ) ); fprintf( list, "%d ", isOtherValid( buffer ) ); } } } //Resource management. fclose( fin ); fclose( table ); fclose( list ); /** END OF PROCEDURE **/ //Procedure to copy and source program. /** BEGIN PROCEDURE **/ fin = fopen( sp, "r" ); //File containing source program. FILE *source = fopen( "source_program.txt", "w" ); //File to copy source program to. char copy; //Used to copy file, character by character. //Copy character by character, until end of file. while( ( copy = fgetc( fin ) ) != EOF ) fputc( copy, source ); //Resource management. fclose( fin ); fclose( source ); remove( "temp_output.txt" ); //Delete intermediary output. /** END OF PROCEDURE **/ return 0; } //End of main.
int main() { FILE *pascal_file,*lex_file; char file_name[40], file_name2[40]; char is_token[40]; char current_char; char before_current_char; int i=0,j,is_comment=0,is_assignment=0; printf("Please enter the source file name: "); fflush(stdin); gets(file_name); strcpy(file_name2,file_name); strcat(file_name,".pas"); strcat(file_name2,".lex"); for(j=0;j<40;j++) is_token[j]=0; if((pascal_file = fopen(file_name,"r")) == NULL) printf("dosya acilamadi!\n"); else { lex_file = fopen(file_name2,"w"); while(!feof(pascal_file)) { current_char=getc(pascal_file); if(isLetter(current_char)==1)//current_char harfse string e eklenir { is_token[i]=current_char; i++; } else if(isLetter(current_char)==0 && i!=0)// current char harf degilse ondan onceki stringi(is_letter) yazdýr { if(strcmp("writeln",is_token)==0 || strcmp("write",is_token)==0) { fputs("output(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(strcmp("readln",is_token)==0 || strcmp("read",is_token)==0) { fputs("input(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(isReservedWord(is_token)==1) { fputs("reservedWord(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(isAdvMathFunc(is_token)==1) { fputs("advancedMathFunction(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(isOrdinalFunc(is_token)==1) { fputs("ordinalFunction(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(isVariableType(is_token)==1) { fputs("variableType(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(isBoolOperator(is_token)==1) { fputs("booleanOperator(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(isFileHandlingFunc(is_token)==1) { fputs("fileHandlingFunction(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else if(strcmp(is_token,"div")==0 || strcmp(is_token,"mod")==0) { fputs("arithmeticOperation(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } else { fputs("variable(",lex_file); fputs(is_token,lex_file); fputs("),",lex_file); } i=0; for(j=0;j<40;j++) is_token[j]=0; } if(current_char==':') { current_char=getc(pascal_file); if(current_char=='=') { fputs("assignmentOperator(:=),",lex_file); is_assignment=1; } else { fputs("colon(:),",lex_file); ungetc(current_char,pascal_file); } } if(current_char=='{') { comment(pascal_file,current_char,lex_file); } if(current_char=='*' && is_comment==1) { comment(pascal_file,current_char,lex_file); is_comment=0; } if(is_comment==1) { fputs("leftParentheses((),",lex_file); } if(current_char==')') { fputs("rightParentheses()),",lex_file); } if(current_char==';') { fputs("endOfLine(;),",lex_file); } if(current_char=='[') { fputs("openingBracket([)",lex_file); } if(current_char==']') { fputs("closingBracket(])",lex_file); } if(is_assignment==0 && (current_char=='<' || current_char=='>' || current_char=='=')) { before_current_char=current_char; current_char=getc(pascal_file); if(before_current_char=='<' && (current_char=='=' || current_char=='>')) { fputs("compOperator(",lex_file); fputc(before_current_char,lex_file); fputc(current_char,lex_file); fputs("),",lex_file); } else if(before_current_char=='>' && current_char=='=') { fputs("compOperator(",lex_file); fputc(before_current_char,lex_file); fputc(current_char,lex_file); fputs("),",lex_file); } else if(before_current_char=='<' || before_current_char=='>' || before_current_char=='=') { fputs("compOperator(",lex_file); fputc(before_current_char,lex_file); fputs("),",lex_file); ungetc(current_char,pascal_file); } } is_assignment=0; if(current_char=='+' || current_char =='-' || current_char=='*' || current_char=='/') { fputs("arithOperator(",lex_file); fputc(current_char,lex_file); fputs("),",lex_file); } is_comment=0; if(current_char=='(') { is_comment=1; } if(current_char==39) { skip_string(pascal_file,lex_file); } if(current_char==9 || current_char==10 || current_char==' ') { fputc(current_char,lex_file); } /*if(current_char==';') { i=0; for(j=0;j<40;j++) is_token[j]='\0'; }*/ if(isNumeric(current_char)==1) { constant(pascal_file,lex_file,current_char); } } fclose(lex_file); lex_file = fopen(file_name2,"r"); current_char=getc(lex_file); while(!feof(lex_file)) { printf("%c",current_char); current_char=getc(lex_file); } } return 0; }
Token Scanner::handleWord() { /* Parses input file to try to read an identifier or reserved word. Assumes that file pointer is on first available character. */ bool done = false; bool accept = false; int state = 0; while (!done) { // see whats next, dont consume char next = peek(); switch (state) { case 0: // the start of an identifier accept = false; if (next == '_'){ state = 2; next = get(); _lexeme.push_back(next); } else if (isalpha(next) || isdigit(next)){ state = 1; next = get(); _lexeme.push_back(next); } else { state = 3; } break; case 1: // Accept state. accept = true; _token = MP_IDENTIFIER; if (isalpha(next) || isdigit(next)){ state = 1; next = get(); _lexeme.push_back(next); } else if (next == '_'){ state = 2; next = get(); _lexeme.push_back(next); } else { state = 3; } break; case 2: // do not accept accept = false; if (isalpha(next) || isdigit(next) ){ state = 1; next = get(); _lexeme.push_back(next); } else { state = 3; } break; case 3: done = true; break; } } // check to see if lexeme == '_' if (!accept && _lexeme.size() == 1){ _token = MP_ERROR; return _token; } // must check to see if this lexeme is a reserved word or not isReservedWord(_lexeme); return _token; }