//Functions accepts program as input file and generates lexeme list as output int scan(FILE* input) { FILE* lex; lex = fopen("lexlist.txt", "w"); if(input == NULL){ fprintf(output, "File does not exist.\n"); return -1; } char file[2000]; int i = 0; while(!feof(input)) { fscanf(input, "%c", &file[i]); i++; } //Declare arrays to store symbolic values, numbers, and identifiers char program[i-1]; int lexemeList[i-1]; int numberList[i-1]; int currLex = 0; int currNum = 0; int currID = 0; identifier* idList = NULL; //Copy all of file into properly sized character array int j; for (j = 0; j < i-1; j++) program[j] = file[j]; char token[200]; int t = 0; //Read each character in program for (j = 0; j < i-1; j++) { if (is_alpha(program[j])) //Letter and numbers are added to current token until symbol or space is reached { token[t] = program[j]; t++; } else if (is_digit(program[j])) { token[t] = program[j]; t++; } else //When symbol or space is reached { int tokenVal; if (t != 0) //If the token is one or more characters in length { //Add null character to terminate string token[t] = '\0'; //Get symbol value for token tokenVal = identifyToken(lex, token, t, lexemeList, &currLex, numberList, &currNum, &idList, &currID); //If the token was invalid, the program terminates if (tokenVal == -1) return -1; } if (!is_space(program[j])) //If one symbol has been read that is not white space, get its symbolic value j = identifySymbol(lex, program, i-1, j, lexemeList, &currLex); //If the symbol was invalid, the program terminates if (j == -1) return -1; t = 0; } } //Print list of lexemes with numbers and identifiers printLexList(lex, lexemeList, currLex, numberList, currNum, idList, currID); fclose(lex); return 1; }
/** * Reads the next Token from prog beginning with position cursor and moves * cursor to the next Token. */ Token * Tokenizer::readNextToken(const char * prog, int & cursor,int &line,size_t & startPos,tokenList_t & tokens) throw (Exception *) { char c=prog[cursor]; // Step over whitespace characters while ( isWhitechar(c) || c=='\0') { if (c=='\n') line++; if (c=='\0') return new TEndScript(); cursor++; c=prog[cursor]; } startPos = static_cast<size_t>(cursor); // Multiline Comment // Returns 0 if a comment is read. if (c=='/'&& prog[cursor+1]=='*') { cursor+=2; if (prog[cursor]=='\0') throw(new Error("Unclosed Comment",line)); while (true) { if (prog[cursor]=='\n') line++; cursor++; if ( prog[cursor] =='/' && prog[cursor-1] =='*') { cursor++; return NULL; //return new TEndCommand(); // Sure of this? } if (prog[cursor]=='\0') throw(new Error("Unclosed Comment",line)); } } // SingleLine Comment // Returns 0 if a comment is read. else if (c=='/'&& prog[cursor+1]=='/') { cursor++; while (true) { if (prog[cursor]=='\0'||prog[cursor]=='\n') return NULL; cursor++; } } // Numbers else if (isNumber(c)) { int to=cursor; double number=StringUtils::getNumber(prog,to); if (to>cursor && !isChar(prog[to])) { cursor=to; return new TObject(Number::create(number)); } else { std::cout << number ; throw(new Error( std::string("Syntax Error in Number."),line)); } // Identifiers, Control commands, true/false } else if (isChar(c)) { std::string accum; while ( isNumber(c) || isChar(c)) { accum+=c; cursor++; c=prog[cursor]; } identifierId id=EScript::stringToIdentifierId(accum); Token * o=identifyToken(id); if (o!=NULL) { return o->clone(); }else if (id==Consts::IDENTIFIER_LINE) { // __LINE__ return new TObject(Number::create(line)); } else { const Operator *op=Operator::getOperator(id); if (op!=NULL) return new TOperator(op); return new TIdentifier(id); } } else if (c==';') { cursor++; return new TEndCommand(); } else if (c=='{') { cursor++; return new TStartBlock(); } else if (c=='}') { cursor++; return new TEndBlock(); } else if (c=='(') { cursor++; return new TStartBracket(); } else if (c==')') { cursor++; return new TEndBracket(); } else if (c==',') { cursor++; return new TDelimiter(); } else if (c=='[') { cursor++; return new TStartIndex(); } else if (c==']') { cursor++; return new TEndIndex(); } else if (c==':' && prog[cursor+1]!='=' && prog[cursor+1]!=':' ) { cursor++; return new TColon(); } else if (c=='$' && isChar(prog[cursor+1]) ){ c=prog[++cursor]; // consume '$' std::string accum; while ( isNumber(c) || isChar(c)) { accum+=c; cursor++; c=prog[cursor]; } // std::cout << "FOUND ID :"<<accum<<":"<<cursor<<"\n"; return new TObject(Identifier::create(accum)); } else if ( isOperator(c) ) { int i=cursor; std::string accum; while (isOperator(c)) { accum+=c; i++; c=prog[i]; // if(accum!="-"&& c=='-') // break; } int size=accum.size(); const Operator * op=NULL; while (true) { std::string ops=accum.substr(0,size); op=Operator::getOperator(ops); if (op!=NULL) { cursor+=size; //if(size>1) cursor--; break; } size--; if (size<=0) { std::cout << std::endl<< accum << std::endl; throw(new Error(std::string("Unknown Operator: ")+accum,line)); } } // test for unary minus if (op->getString()=="-") { Token * last=tokens.size()>0?tokens.at(tokens.size()-1).get():NULL; // Bugfix[BUG:20090107] if ( last==NULL || (!(dynamic_cast<TEndBracket *>(last) || dynamic_cast<TEndIndex *>(last)|| dynamic_cast<TIdentifier *>(last) || dynamic_cast<TObject *>(last)))){ // dynamic_cast<Number *>(last)|| dynamic_cast<String *>(last)||dynamic_cast<Bool *>(last)))){ // TODO ++,-- op=Operator::getOperator("_-"); } } return new TOperator(op); } // String: ".*" | '.*' else if (c=='"' || c=='\'') { char stringEncloser=c; cursor++; c=prog[cursor]; std::ostringstream s; while (c!='\0' && c!= stringEncloser) { if (c=='\n') line++; if (c=='\\' ) { // http://de.wikipedia.org/wiki/Steuerzeichen cursor++; c=prog[cursor]; if (c==0) throw(new Error(std::string("Unclosed String. 1")+s.str().substr(0,10),line)); else if (c=='0') // NULL c='\0'; else if (c=='a') // BELL c='\a'; else if (c=='b') // BACKSPACE c='\b'; else if (c=='n') // LINEFEED c='\n'; else if (c=='r') // CARRIAGE RETURN c='\r'; else if (c=='t') // TAB c='\t'; else if (c=='\\') c='\\'; else if (c=='"') c='\"'; else if (c=='\'') c='\''; } s<<c; cursor++; c=prog[cursor]; } if (c=='\0') throw(new Error(std::string("Unclosed String. 2")+s.str().substr(0,10),line)); cursor++; return new TObject(String::create(s.str())); } // else if (c=='.') { // //cursor++; // return new TDot(); // } // cursor++; //std::cout << "\n"<<(int)(prog+cursor); std::cout << " \a !!!"; throw(new Error(std::string("Unknown Syntax Error near \n...")+(prog+ (cursor>10?(cursor-10):0) ),line)); return new Token(); }
int main() { char *valid[] = { "id34567890", "1234567890", "12345.12345", "12345.02345", "12345.12345E11", "12345.12345E+11", "12345.12345E-11", ">", "<", ":=", " ", "\n", "if", "[", "(", ":", ";", "..", }; char *invalid[] = { "id345678901", "12345678901", "02345678901", "12345.123451", "12345.12345E111", "12345.12345E+111", "12345.12345E-01", }; char *extra[] = { "...", "12345E11", }; ReservedWordList *rwl = parseResWordFile(fopen("../../data/reserved-words.txt", "r")); SymbolTable *s = (SymbolTable*)malloc(sizeof(SymbolTable)); for(int i = 0; i < sizeof(valid)/sizeof(valid[0]); i++) { MachineResult res = identifyToken(valid[i], rwl, s); if(res.error || *res.newString != 0) { printf("Something went wrong with string %s: ", valid[i]); printf("{%s, %s, %d, %d, %d}\n", res.lexeme, res.newString, res.type, res.attribute, res.error); } else { //printf("%s -> %s\n", res.lexeme, convertConstantToString(res.type)); } } for(int i = 0; i < sizeof(invalid)/sizeof(invalid[0]); i++) { MachineResult res = identifyToken(invalid[i], rwl, s); if(!res.error) { printf("Something went wrong with string %s: ", invalid[i]); printf("{%s, %s, %d, %d, %d}\n", res.lexeme, res.newString, res.type, res.attribute, res.error); } else { //printf("%s -> %s, %s\n", invalid[i], convertConstantToString(res.type), convertConstantToString(res.error)); } } for(int i = 0; i < sizeof(extra)/sizeof(extra[0]); i++) { MachineResult res = identifyToken(extra[i], rwl, s); if(res.newString == 0) { printf("Something went wrong with string %s: ", extra[i]); printf("{%s, %s, %d, %d, %d}\n", res.lexeme, res.newString, res.type, res.attribute, res.error); } else { //printf("%s -> %s, %s\n", extra[i], res.lexeme, res.newString); } } printf("If no other output, all tests passed.\n"); return 0; }
int main(int argc, char **argv) { //open the files given as an argument char *sfSrc, *sfListing, *sfToken, *sfSymbolTable, *sfReservedWords; sfSrc = sfListing = sfToken = sfSymbolTable = sfReservedWords = NULL; FILE *fSrc, *fListing, *fToken, *fSymbolTable, *fReservedWords; fSrc = fListing = fToken = fSymbolTable = fReservedWords = NULL; // format: ./a.out -r sfReservedWords [-l sfListing] [-t sfToken] [[-s] sfSrc] int i; for(i = 0; i < argc; i++) { if(!strcmp(argv[i], "-l")) { sfListing = argv[i+1]; } if(!strcmp(argv[i], "-t")) { sfToken = argv[i+1]; } if(!strcmp(argv[i], "-r")) { sfReservedWords = argv[i+1]; } if(!strcmp(argv[i], "-s")) { sfSymbolTable = argv[i+1]; } if(i == argc-1) { sfSrc = argv[i]; } } if(!sfSrc || (fSrc = fopen(sfSrc, "r")) == NULL) { fprintf(stderr, "Warning: source file not given or not found, using stdin.\n"); fSrc = stdin; } if(!sfToken || (fToken = fopen(sfToken, "w")) == NULL) { fprintf(stderr, "Warning: token file not given or not found, using stdout.\n"); fToken = stdout; } if(!sfSymbolTable || (fSymbolTable = fopen(sfSymbolTable, "w")) == NULL) { fprintf(stderr, "Warning: symbol table file not given or not found, not outputting symbol table.\n"); } if(!sfListing || (fListing = fopen(sfListing, "w")) == NULL) { fprintf(stderr, "Warning: listing file not given or not found, not outputting listing.\n"); } //create symbol table machinesInit(sfReservedWords); //begin reading a line at a time char sLine[80]; int cLine = 1; bool eof = false; while(!eof) { fgets(sLine, sizeof(sLine), fSrc); if(feof(fSrc)) { sLine[0] = EOF; sLine[1] = 0; } int length = strlen(sLine); if(fListing && !feof(fSrc)) fprintf(fListing, "%d\t%s", cLine, sLine); //split line into tokens char *psLine = sLine; while(psLine < sLine + length && fToken) { MachineResult res = identifyToken(psLine); if(res.type == T_WS) { //we don't care about whitespace } else if(res.type == T_ID) { fprintf(fToken, "%d\t%s\t%s\t%p\n", cLine, res.lexeme, convertConstantToString(res.type), res.pointer); if(res.error && fListing) { fprintf(fListing, "%s:\t%p:\t%s\n", convertConstantToString(res.type), res.pointer, res.lexeme); } } else if(res.type == T_EOF) { fprintf(fToken, "%d\t%s\t%s\t%s\n", cLine, "(EOF)", convertConstantToString(res.type), convertConstantToString(res.attribute)); eof = true; } else { fprintf(fToken, "%d\t%s\t%s\t%s\n", cLine, res.lexeme, convertConstantToString(res.type), convertConstantToString(res.attribute)); if(res.error && fListing) { fprintf(fListing, "%s:\t%s:\t%s\n", convertConstantToString(res.type), convertConstantToString(res.attribute), res.lexeme); } } psLine = res.newString; free(res.lexeme); } cLine++; } //print symbol table if(fSymbolTable) { for(SymbolTable* s = tab; s && s->entry && s->entry->word; s = s->next) { fprintf(fSymbolTable, "%s\t%p\n", s->entry->word, s->entry); } } }