void tokenizeAndStoreFile(FILE *fp, const char* filePath, struct hashtable *table){ char *words; int size=0; struct TokenizerT_ *root,*last; size_t temp; char *aToken; int i; struct TokenizerT_ *x; /*Jump to end, grab the relative location of EOF then jump back to beginning*/ fseek(fp,0L,SEEK_END); size=ftell(fp); fseek(fp,0L,SEEK_SET); words = (char*)malloc(size+1); /*read contents of entire file into words, appending a \0 at the end (reason for +1 above)*/ temp = fread(words,1,size,fp);/*temp is to hold the number of bytes written to words so we can append the \0 at the end*/ fclose(fp); words[temp]='\0'; x = TKCreate(words); aToken = TKGetNextToken(x); if(aToken==0){free(words);return;} while(aToken==0){//Skips over any unhandled characters. x->data++; if(x->data=='\0'){return;}//if you never got a root there is nothing to output so you should just return. aToken = TKGetNextToken(x); } root=last=TKCreate(aToken); root->type = x->type;//as discussed above I'm pulling the data from the token passed into TKGetNextToken i=0; while((last->data[i]==(*x->data)) && ((x->data[0])!='\0')) {x->data++; i++;}//move x's data until it is past the token that was just retrieved by TKGetNextToken (unless they hit end of string char) while(x->data[0]!='\0'){//until our pointer hits the null character aToken = TKGetNextToken(x); //get the next token from x if(aToken==0){ x->data++; continue; } last->next = TKCreate(aToken); //make this token into a Tokenizer and put it as the next in the linked list last = last->next; //move last to the new end of the list last->type = x->type; //pull the type of the new token from x and set it in the last i=0; while(isspace(x->data[0])){ x->data++; } //Eliminate whitespace before moving on while((last->data[i]==(*x->data)) && ((x->data[0])!='\0')){ x->data++; i++; }//};//move x->data pointer to the beginning of the string not yet tokenized } last = root;//no longer adding so don't need a pointer to the end of the list. Repurposing last as a curr pointer for output while(last!=NULL){ root=last; update(last->data,filePath,table); last = last->next; TKDestroy(root); } free(x); free(words); return; }
int hashToken(const char *fileName){ int fileCheck; char c, *sep, *token; char *string = (char*)malloc(sizeof(char)); string = ""; sep = " ~`!@#$%^&*()-_=+{[}]|;:\"<,.>/?\n"; fileCheck = access(fileName, F_OK); if(fileCheck == 0){ /*open file */ file_read = fopen(fileName, "r"); c = getc(file_read); while(c != EOF){ /*create string to send to tokenizer*/ c = tolower(c); string = Concat(string, c); c = getc(file_read); } } tokenizer = TKCreate(sep, string); /*free(string);*/ token = TKGetNextToken(tokenizer); while(token != NULL ){ /*parse and insert tokens into hash table*/ IndexInsert(indx, token, fileName); token = TKGetNextToken(tokenizer); } /*free(token);*/ fclose(file_read); return 0; }
void printList(TokenizerT tk){ TokenizerT printPTR=tk; /*points to TokenizerT*/ /*Prints the resulting list of TKCreate*/ while (printPTR!=NULL) { if (strcmp(TKGetNextToken(printPTR),"0")!=0){ printf("\t%s\n",TKGetNextToken(printPTR)); } printPTR=printPTR->next; } }
/* * Because the two functions below are for comments and NOT for tokens, we * can't store the string in the tokenBuffer. Comments could be arbitrarily * long and we don't have to recognize them, so it makes sense to skip them * while clearing the tokenBuffer at each new character. */ TokenT *_line_comment(TokenizerT *tk) { while(1) { nextChar(tk); clearBuffer(tk); if(tk->inputIter[0] == '\n') { nextChar(tk); clearBuffer(tk); return TKGetNextToken(tk); } if(tk->inputIter[0] == '\0') { return TKGetNextToken(tk); } } }
int main(int argc, char **argv) { // Begin Main Method /** * Build a structure of type TokenizerT. */ struct TokenizerT_ *tk = TKCreate(argv[1]); printf("Original string: %s\n", tk->str); while(tk->str[tk->curChar] != '\0') { // Begin while-loop if (tk->curChar > tk->strSize-1) { // Begin if-statement break; } // End if-statement tk->tokens[tk->manyTokens] = TKGetNextToken(tk); if(tk->tokenDesc[tk->manyTokens] != NULL) { // Begin if-statement printf("%s is a %s\n", tk->tokens[tk->manyTokens], tk->tokenDesc[tk->manyTokens]) ; tk->manyTokens++; } // End if-statement } // End while-loop // Destroy the Tokenizer structure. TKDestroy(tk); return 0; } // End Main Method
int main(int argc, char **argv) { struct TokenizerT_ *tk = TKCreate(argv[1]); printf("made it to while loop\n"); //printf("%s\n%d\n", tk->str, tk->manyTokens); while(tk->str[tk->curChar] != '\0') { if (tk->curChar > tk->strSize-1) { printf("%c\n", (char)tk->str[tk->curChar]); break; } printf("made it in the first loop\n"); // tk->tokens[tk->manyTokens] = malloc(sizeof(char)*tk->strSize+1); tk->tokens[tk->manyTokens] = calloc(tk->strSize+1 , sizeof(char)); tk->tokens[tk->manyTokens] = TKGetNextToken(tk); printf("returned success %s\n", tk->tokens[tk->manyTokens]); tk->manyTokens++; printf("manytokens = %d\n", tk->manyTokens); } printf("made it to second while loop\n"); printf("strSize = %d\n", tk->strSize); int num = tk->manyTokens; while(num > 0) { printf("manytokens = %d\n", num); //printf("%s\n", tk->tokens[--tk->manyTokens]); printf("%s\n", tk->tokens[--num]); } TKDestroy(tk); return 0; }
int main(int argc, char **argv) { //Starts with error check if no string at all is inputted return print. if (argc ==1){ printf("no String inputted\n"); return 0; } if (argc ==3) { printf("Please input ONE set of strings \n"); return 0; } char* string= argv[1]; TokenizerT *finaltok = TKCreate(string); //creates the string array into stream char* datToke; //char array that is sent over after TKGetNextToken while(finaltok->str[finaltok->pindex] != '\0'){ datToke = TKGetNextToken(finaltok); } TKDestroy(finaltok); free(datToke); return 0; }
int main(int argc, char **argv) { if (argc == 1) { printf("error: did not enter a string\n"); return 0; } if (argc != 2) { printf("error: too many arguments\n"); return 0; } char* str = argv[1]; TokenizerT* tokenizer = TKCreate(str); while (tokenizer->cursor<(tokenizer->strLength)) { tokenizer->curr = TKGetNextToken(tokenizer); if(tokenizer->error == 1) { tokenizer->error = 0; continue; } printf("\"%s\"\n", tokenizer->curr); } TKDestroy(tokenizer); return 0; }
int main(int argc, char **argv) { //Checks if the user enters the right amount of inputs if(argc != 2){ printf("Invalid Number of Input\n"); //If the amount of inputs is not 2, then prints out an error message and returns 1 return 1; } TokenizerT *tk = TKCreate(argv[1]); if((strcmp(argv[1], "0") == 0) || (strcmp(argv[1], " 0 ") == 0)){ printf("decimal constant \"0\" \n"); //If the amount of inputs is not 2, then prints out an error message and returns 1 return 0; } while (tk->cursorPosition < tk->tokenLength){ char *token = TKGetNextToken(tk); if (token == NULL){ tk->cursorPosition++; continue; } printf("%s\n", token); } TKDestroy(tk); return 0; }
/* * main will have two string arguments (in argv[1] and argv[2]). * The first string conatins the seperator characters. * The second string contains the tokens. * Print out the tokens in the second string in left-to-right order. * Each token should be printed on a separate line. */ int main ( int argc, char **argv ) { char *token; /* * Checks to make sure that we have the right amount of args and if * the right amount of args have been supplied, try to create a * TokenizerT. Otherwise it fails and exits. */ if ( argc != 3 ) { printf("Incorrect number of arguments\n"); return EXIT_FAILURE; } TokenizerT *const tk = TKCreate( argv[1], argv[2] ); if ( !tk ) { printf("Could not create tokenizer\n"); return EXIT_FAILURE; } /* Iterates over the tokens and prints them out */ while ( ( token = TKGetNextToken(tk) ) ) { printf("%s\n", token); free(token); } /* Cleanup and finish. */ TKDestroy(tk); return EXIT_SUCCESS; }
int main(int argc, char **argv) { TokenizerT *token; char * word; token = TKCreate(argv[1]); while ((word = TKGetNextToken(token)) != NULL) { switch(token->tokType) { case Octal: printf("Octal %s\n", word); break; case Decimal: printf("Decimal %s\n", word); break; case Hex: printf("Hexadecimal %s\n", word); break; case Zero: printf("Zero %s\n", word); break; case Float: printf("Float %s\n", word); break; case Malformed: printf("ERROR: Malformed [0x%02x]\n", * word); break; default: printf("Incorrect Input!\n"); break; } free(word); token->currState = State_0; } TKDestroy(token); return 0; }
void filetok(FILE *fp, StrMap *hash, char *filename) { if (fp == NULL) { printf("ERROR: Could not open file. %s\n", strerror(errno)); exit(1); } char *word = malloc(sizeof(char) * 256); char *token = NULL; while (fscanf(fp, "%s", word) != EOF) { TokenizerT *tokenizer = TKCreate(" ", word); while ( (token = TKGetNextToken(tokenizer)) != NULL) { sm_put(hash, token, filename); } } printf("done\n"); }
/* Creates and parses through an input file using a tokenizer object. Caller is responsible for opening and closing the input file. Tokenizer object is intialized and destroyed by this function. @param file : File object pointer to parse @param inputname : The filename pointed to by the file pointer */ int parseFile(FILE* file, char* inputname) { TokenizerT* tk = TKCreate(file); char* token; token = TKGetNextToken(tk); do{ //printf("\tCurrent token is: %s in %s \n",token, inputname); if(token == NULL){ break; } processToken(token, inputname); }while((token = TKGetNextToken(tk)) != 0); TKDestroy(tk); return 0; }
void output(TokenizerT * token, int numOfCharacters){ char *returnedToken = TKGetNextToken(token); //pass token into function, receieve token up to delimiter //printf("tmp hold %s\n", tmp); char *copy = (char *)malloc(sizeof(char)); // creating memory space size_t sepLength = (strlen(token->separators)); memcpy(copy,token->separators,sepLength); // store separators into var copy //printf("currently have this %s\n",copy); int returnedTokenLength = strlen(returnedToken)+1; // we need to know the length of the string numOfCharacters = numOfCharacters-returnedTokenLength; // The first token will be printed, so now we don't need that anymore, this updates the length of the tokens left char *output = (char *)malloc(sizeof(char)); // this is what we will be outputting size_t tokenLength = (strlen(token->tokens)); memcpy(output,token->tokens + returnedTokenLength, tokenLength); //I have a separate print function as defined below //This prints out the token before the first delimiter if(strlen(returnedToken)>0){ printf("%s\n", returnedToken); } TKDestroy(token); // free the dynamic allocated memory // here we recurse through the token stream until the end // we can now print out the tokens after the 1st delimiter using this token = TKCreate(copy, output); //initialise with the updated tokens and the initial set of delimiters while(numOfCharacters>0){ returnedToken = TKGetNextToken(token); // receive next token if(strlen(returnedToken)>0){ //print that token if it is non empty printf("%s\n", returnedToken); } returnedTokenLength = strlen(returnedToken)+1; // this and the next line update the tokens left to be printed numOfCharacters = numOfCharacters-returnedTokenLength; free(output);// updating the output string output = (char *)malloc(sizeof(char)); memcpy(output,token->tokens + returnedTokenLength, tokenLength); TKDestroy(token); //free token = TKCreate(copy, output); } TKDestroy(token); //the final freeing after we are finished! return; }
int main(int argc, char **argv) { if(argc!=2){ printf("Error! More then one input!"); return 0; } char* outputStream; TokenizerT* token = TKCreate(argv[1]); while((outputStream = TKGetNextToken(token))!=NULL){ //while outputStream (which is set to the val of TKGetNextToken) is not 0 THEN switch(token->type){ //switch case based on type of token set inside call of TKGetNextToken case 1: printf("word \"%s\"\n",outputStream); free(outputStream); //word break; case 2: //deciaml printf("decimal integer \"%s\"\n",outputStream); free(outputStream); break; case 3: //octal printf("octal integer \"%s\"\n",outputStream); free(outputStream); break; case 4: //Hexa printf("hexadecimal integer \"%s\"\n",outputStream); free(outputStream); break; case 5: //Float printf("float point \"%s\"\n",outputStream); free(outputStream); break; case 6: //COP printf("%s\n",outputStream); break; case 7: //quote printf("String %s\n",outputStream); free(outputStream); break; case 0: printf("Bad token %c\n",outputStream); break; case -1: //Error! printf("something happened that wasn't suppose to happen!\n"); break; } } return 0; }
TokenizerT *TKCreate(char *separators, char *ts) { TokenizerT *tokOb = malloc(sizeof(TokenizerT)); tokOb->stream = ts; tokOb->separ = separators; tokOb->currTok = TKGetNextToken(tokOb); return tokOb; }
void printTokens(TokenizerT *tk){ int index=0; // variable that stores the array index of a symbol from the symbol table while(tk->tokeninput[tk->ptrInput] != '\0'){ // until End of string char* temp = TKGetNextToken(tk); // store the token obtained in temp if (temp != NULL) { if (tk->prevState == 'm') { printf("Invalid input\n"); break; } //switch statement to print out type switch (tk->prevState) { case 'w': printf("word "); break; case '1': printf("integer "); break; case 'f': printf("float "); break; case 'e': printf("float "); break; case 'h': printf("hex constant "); break; case 'o': printf("octal constant "); break; case 'r': index=iscombinedSymbol(temp); printf("%s ",symbols[index][1]); break; case 'q': index=isEscape(temp); printf("%s",esc[index][1]); break; } printf("\"%s\"\n",temp); //print out token } else break; } //function here to figure out what type the token is //print type and then "token" }
int main(int argc, char **argv) { if(argc != 3){ fprintf(stderr,"****Incorrect number of CMD line arguments****\n"); /* Error check command line arguments */ return 0; } char* seperator = argv[1]; /* Storing command Arguments */ char* string = argv[2]; TokenizerT *tokenizer = TKCreate(seperator, string); trimStringLead(tokenizer); /* Trim leading and trailing seps */ trimStringTrail(tokenizer); char* newToken = TKGetNextToken(tokenizer); while(newToken != NULL ) /* print each token individually */ { printToken(newToken); newToken = TKGetNextToken(tokenizer); } free(newToken); TKDestroy(tokenizer); return 0; }
/* * main will have a string argument (in argv[1]). * The string argument contains the tokens. * Print out the tokens in the second string in left-to-right order. * Each token should be printed on a separate line. */ int main() { char *f=" "; char *w= "hey g(uys) this is just a test"; TokenizerT *hello; hello =TKCreate(f,w); TKGetNextToken(hello); while(1){} return 0; }
int main(int argc, char **argv) { TokenizerT *tokenizer = malloc(sizeof(TokenizerT)); char *delims; char *textStream; char *theTok; int m = 0; if (argc != 3) { fprintf(stderr, "ERROR: INVALID NUMBER OF INPUTS\n"); exit(EXIT_FAILURE); } if (strcmp(argv[2], "") == 0) { printf("NO STRING TO TOKENIZE\n"); } delims = argv[1]; textStream = argv[2]; tokenizer = TKCreate(delims, textStream); theTok = tokenizer->currTok; while(tokenizer->stream != '\0' && theTok != '\0') { m = 0; while(*(theTok+m) != '\0') { if(*(theTok+m) == '\a' || *(theTok+m) == '\b' || *(theTok+m) == '\f' || *(theTok+m) == '\n' || *(theTok+m) == '\r' || *(theTok+m) == '\t' || *(theTok+m) == '\v' || *(theTok+m) == '\"' || *(theTok+m) == '\\') printf("[0x%.2x]",*(theTok+m)); else printf("%c", *(theTok+m)); m++; } printf("\n"); printf("\n"); theTok = TKGetNextToken(tokenizer); } TKDestroy(tokenizer); return 0; }
int main(int argc, char **argv) { char *tokens; if(argc<3){ printf("You need more arguments you have: %d\n", argc); return 1; } TokenizerT *tk = TKCreate(argv[1], argv[2]); do{ tokens = TKGetNextToken(tk); if(tokens != NULL) { printf("main token: %s\n", tokens); free(tokens); } }while(tokens != NULL); //check if token == NULL TKDestroy(tk); return 0; }
/* * main will have a string argument (in argv[1]). * The string argument contains the tokens. * Print out the tokens in the second string in left-to-right order. * Each token should be printed on a separate line. */ int main(int argc, char **argv) { // make sure theres exactly 1 argument if(argc != 2) { printf("Invalid number of arguments. \n"); printf("Usage: \n ./tokenizer <C-code string>\n"); exit(1); } TokenizerT *tokenizer = TKCreate(argv[1]); TokenT *token; while((token = TKGetNextToken(tokenizer)) != NULL) { printToken(token); destroyToken(token); } TKDestroy(tokenizer); return 0; }
int main (int argc, char **argv){ if (argc == 2){ TokenizerT *tokenPTR; tokenPTR = TKCreate(argv[1]); char *tokenString; while ((tokenString = TKGetNextToken(tokenPTR)) != NULL){ printState(tokenPTR->type); printToken(tokenString); free(tokenString); } TKDestroy(tokenPTR); return 0; } else{ fprintf(stderr, "Error: Incorrect number of arguments"); printf("\n %i arguments given", argc); exit(1); return -1; } }
int main ( int argc, char **argv ) { if ( argc == 3 ) { /* Initialize a struct to contain all token data*/ TokenizerT *testObject = TKCreate( argv[ 1 ], argv[ 2 ] ); if ( testObject == NULL ) { printf( "\nInitialization failed. Insufficient memory" ); exit( 1 ); } /* Print all the tokens for the given string */ printf( "\ntokens:" ); char *string_start = testObject->inputString; char *delim_start = testObject->delimiter; char *output; int count = 0; while( (output = TKGetNextToken( testObject) ) != NULL ) { printf("\n%d: %s", count, output); count++; } printf("\n\n"); /* Clean up allocated memory */ testObject->inputString = string_start; testObject->delimiter = delim_start; TKDestroy(testObject); } else { printf("\nUsage:"); printf("\ntokenizer <delimiters> <string>\n\n"); } return 0; }
int main(int argc, char **argv) { //Create tokeinzer object with input arguments TokenizerT *start = TKCreate(argv[1], argv[2]); //If there are no separators, then print out the whole stream and return; if(strlen(argv[1]) == 0) { char *token = argv[2]; token = checkEscChar(token); printf("%s\n", token); return 0; } //If there is no string given in the argument if(strlen(argv[2]) == 0){ return 0; } //Call GetNextToken and print out each token until stream reaches a null terminator while((start->stream) != '\0' ) { char *token = TKGetNextToken(start); if(token == NULL) { break; } token = checkEscChar(token); printf("%s\n", token); } TKDestroy(start); return 0; }
////change 3 >>>>>>> upstream/master char * read (char *filen,char*key,struct list * head){ char *my_string; char *filecontent=malloc(100); FILE *fp; if ((fp = fopen(filen, "r")) == NULL) { printf(" can't open:%s.\n", filen); return 1; } else { struct list * temp=malloc(sizeof( struct list));; int bi=0; char *a; char *s;char *b; int loop=0; char *out=malloc(10000); struct nodeE * nod; struct nodeE * nodtemp; while (fgets(filecontent, 1000, fp)!= NULL) { TokenizerT *tk; tk= TKCreate(filecontent); a=TKGetNextToken(tk); if(strcmp(a,"<list>")==0){
int readFile(struct List *list, const char* filename){ int fileSize = 0; char* str; int i = 0; char c; char* buffer; int rval; rval =access(filename,R_OK); if (rval==0){ //you have read permission } else if(errno==EACCES){ printf("you do not have access to %s\n",filename); return 0; } //read in file, put in big string FILE *filePtr = fopen(filename, "r"); if(filePtr == NULL){ return -1; } fseek(filePtr, 0, SEEK_END); fileSize = ftell(filePtr); fseek(filePtr, 0, SEEK_SET); str = (char*)malloc(sizeof(char)*fileSize+1); while((c = fgetc(filePtr)) != EOF){ str[i] = tolower(c); i++; } str[i] = '\0'; fclose(filePtr); //tokenize string TokenizerT *tok; tok = TKCreate(str); indexPointer = tok->input; while (indexPointer != '\0'){ buffer = TKGetNextToken(tok); if(strlen(buffer) > 0){ SLInsert(list, buffer, filename); } free(buffer); } TKDestroy(tok); free(str); return 0; }
char *TKGetNextToken( TokenizerT * tk ) { //Here we will set type value as well as manipulate the string in TokenizerT Struct //TODO if(tk->token[tk->index]=='\0'){ return NULL; } tk->type = 0; while(tk->token[tk->index]=='\n' || tk->token[tk->index]=='\t' || tk->token[tk->index]==' ' || tk->token[tk->index]=='\v' || tk->token[tk->index]=='\f' || tk->token[tk->index]=='\r'){ tk->index++; } int beginning = tk->index; //check order if(tk->token[tk->index]=='0' && tk->token[tk->index+1]=='x'){ //Hexa tk->index++;tk->index++; while((tk->token[tk->index]>='A' && tk->token[tk->index]<='F')||(tk->token[tk->index]>='0' && tk->token[tk->index]<='9')||(tk->token[tk->index]>='a' && tk->token[tk->index]<='f')){ //while still a hexa tk->index++; } tk->type=HEXA; if(tk->index-beginning==2){ tk->type = 0; return "0x"; } char *temp= (char*) malloc(sizeof(char)*((tk->index-beginning)+1)); memcpy(temp, tk->token+beginning,(tk->index)-beginning); temp[tk->index-beginning]='\0'; return temp; }else if((tk->token[tk->index]>='A' && tk->token[tk->index]<='Z')||(tk->token[tk->index]>='a' && tk->token[tk->index]<='z')){ //word if(strncmp("if",tk->token+beginning,2)==0){ //if tk->index=tk->index+2; tk->type = COP; return "C Key Word \"if\""; }else if(strncmp("sizeof",tk->token+beginning,6)==0){ //sizeof tk->index=tk->index+6; tk->type = COP; return "C Key Word \"sizeof\""; }else if(strncmp("while",tk->token+beginning,5)==0){ //while tk->index=tk->index+5; tk->type = COP; return "C Key Word \"while\""; }else if(strncmp("for",tk->token+beginning,3)==0){ //for tk->index=tk->index+3; tk->type = COP; return "C Key Word \"for\""; }else if(strncmp("else",tk->token+beginning,4)==0){ //else tk->index=tk->index+4; tk->type = COP; return "C Key Word \"else\""; }else if(strncmp("return",tk->token+beginning,6)==0){ //return tk->index=tk->index+6; tk->type = COP; return "C Key Word \"return\""; }else if(strncmp("switch",tk->token+beginning,6)==0){ //switch tk->index=tk->index+6; tk->type = COP; return "C Key Word \"switch\""; }else if(strncmp("case",tk->token+beginning,4)==0){ //case tk->index=tk->index+4; tk->type = COP; return "C Key Word \"case\""; }else if(strncmp("int",tk->token+beginning,3)==0){ //int tk->index=tk->index+3; tk->type = COP; return "C Key Word \"int\""; }else if(strncmp("char",tk->token+beginning,4)==0){ //char tk->index=tk->index+4; tk->type = COP; return "C Key Word \"char\""; }else if(strncmp("double",tk->token+beginning,6)==0){ //double tk->index=tk->index+6; tk->type = COP; return "C Key Word \"double\""; }else if(strncmp("float",tk->token+beginning,5)==0){ //float tk->index=tk->index+5; tk->type = COP; return "C Key Word \"float\""; }else if(strncmp("short",tk->token+beginning,5)==0){ //short tk->index=tk->index+5; tk->type = COP; return "C Key Word \"short\""; }else if(strncmp("static",tk->token+beginning,6)==0){ //static tk->index=tk->index+6; tk->type = COP; return "C Key Word \"static\""; }else if(strncmp("struct",tk->token+beginning,6)==0){ //struct tk->index=tk->index+6; tk->type = COP; return "C Key Word \"struct\""; }else if(strncmp("union",tk->token+beginning,5)==0){ //union tk->index=tk->index+5; tk->type = COP; return "C Key Word \"union\""; }else if(strncmp("enum",tk->token+beginning,4)==0){ //enum tk->index=tk->index+4; tk->type = COP; return "C Key Word \"enum\""; }else if(strncmp("do",tk->token+beginning,2)==0){ //do tk->index=tk->index+2; tk->type = COP; return "C Key Word \"do\""; }else if(strncmp("continue",tk->token+beginning,8)==0){ //continue tk->index=tk->index+8; tk->type = COP; return "C Key Word \"continue\""; }else if(strncmp("default",tk->token+beginning,7)==0){ //default tk->index=tk->index+7; tk->type = COP; return "C Key Word \"default\""; }else if(strncmp("extern",tk->token+beginning,6)==0){ //extern tk->index=tk->index+6; tk->type = COP; return "C Key Word \"extern\""; }else if(strncmp("break",tk->token+beginning,5)==0){ //break tk->index=tk->index+5; tk->type = COP; return "C Key Word \"break\""; }else if(strncmp("goto",tk->token+beginning,4)==0){ tk->index=tk->index+4; tk->type = COP; return "C Key Word \"goto\""; }else if(strncmp("void",tk->token+beginning,4)==0){ tk->index=tk->index+4; tk->type = COP; return "C Key Word \"void\""; }else if(strncmp("const",tk->token+beginning,5)==0){ tk->index=tk->index+5; tk->type = COP; return "C Key Word \"const\""; }else if(strncmp("signed",tk->token+beginning,6)==0){ tk->index=tk->index+6; tk->type = COP; return "C Key Word \"signed\""; }else if(strncmp("volatile",tk->token+beginning,8)==0){ tk->index=tk->index+8; tk->type = COP; return "C Key Word \"volatile\""; }else if(strncmp("auto",tk->token+beginning,4)==0){ tk->index=tk->index+4; tk->type = COP; return "C Key Word \"auto\""; }else if(strncmp("long",tk->token+beginning,4)==0){ tk->index=tk->index+4; tk->type = COP; return "C Key Word \"long\""; }else if(strncmp("typedef",tk->token+beginning,7)==0){ tk->index=tk->index+7; tk->type = COP; return "C Key Word \"typedef\""; }else if(strncmp("unsigned",tk->token+beginning,8)==0){ tk->index=tk->index+8; tk->type = COP; return "C Key Word \"unsigned\""; } while((tk->token[tk->index]>='A' && tk->token[tk->index]<='Z')||(tk->token[tk->index]>='a' && tk->token[tk->index]<='z')||(tk->token[tk->index]>='0' && tk->token[tk->index]<='9')){ //while still a word tk->index++; } char *temp = (char*) malloc(sizeof(char)*((tk->index-beginning)+1)); memcpy(temp,tk->token+beginning,tk->index-beginning); temp[tk->index-beginning]='\0'; tk->type=WORD; return temp; }else if(tk->token[tk->index]>='0' && tk->token[tk->index]<='9'){ //Octal Decimal or Integer Decimal or FLOAT //TODO int beginning = tk->index; int foundE=0; tk->type = DECIMAL; if(tk->token[tk->index]=='0'){ tk->type = OCTAL; }else if(tk->token[tk->index]=='.'){ tk->type = FLOAT; tk->index++; } while((tk->token[tk->index]>='0' && tk->token[tk->index]<='9')||tk->token[tk->index]=='.'){ if(tk->type==DECIMAL){ if(tk->token[tk->index]=='.'){ tk->type = FLOAT; } }else if(tk->type==FLOAT){ if(tk->token[tk->index]=='.'){ //we hit 2 periods. most likely will end token here break; }else if(tk->token[tk->index+1]=='e'){ //found exponent if(foundE==0){ foundE=1; }else{ break; } if(tk->token[tk->index+2]=='-'){ //found negative sign tk->index++; } tk->index++; } }else if(tk->type == OCTAL){ if(tk->token[tk->index]>'7' && tk->token[tk->index]<='9'){ //end Octal token here break; }else if(tk->token[tk->index]=='.'){ //end token here break; } } tk->index++; } char *temp = (char*) malloc(sizeof(char)*((tk->index-beginning)+1)); memcpy(temp,tk->token+beginning,tk->index-beginning); temp[tk->index-beginning]='\0'; return temp; }else if(tk->token[tk->index]<'0' || (tk->token[tk->index]>'9' && tk->token[tk->index]<'A') || (tk->token[tk->index]>'Z' && tk->token[tk->index]<'a') || tk->token[tk->index]>'z'){ //COP //TODO tk->type = COP; switch(tk->token[tk->index]){//Switch START case '=': tk->index++; if(tk->token[tk->index]=='='){ //+ tk->index++; return "equal to \"==\""; }else{ return "basic assignment \"=\""; } break; case '<': tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "less than or equal to \"<=\""; }else if(tk->token[tk->index]=='<'){ tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "bitwise left shift assignment \"<<=\""; }else{ return "bitwise left shift \"<<\""; } }else{ return "less than \"<\""; } break; case '>': tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "greater than or equal to \">=\""; }else if(tk->token[tk->index]=='>'){ tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "bitwise right shift assignment \">>=\""; }else{ return "bitwise right shift \">>\""; } }else{ return "greater than \">\""; } break; case '+': tk->index++; if(tk->token[tk->index]=='+'){ tk->index++; return "increment \"++\""; }else if(tk->token[tk->index]=='='){ tk->index++; return "addition assignment \"+=\""; }else{ return "addition \"+\""; } break; case '-': tk->index++; if(tk->token[tk->index]=='-'){ tk->index++; return "decrement \"--\""; }else if(tk->token[tk->index]=='='){ tk->index++; return "subtraction assignment \"-=\""; }else if(tk->token[tk->index]=='>'){ }else{ return "subtraction \"-\""; } break; case '*': tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "multiplication assignment \"*=\""; }else{ return "multiplication \"*\""; } break; case '/': tk->index++; if(tk->token[tk->index]=='/'){ tk->index++; //single line comments while(tk->token[tk->index]!='\n' && tk->token[tk->index]!='\0'){ tk->index++; } return TKGetNextToken(tk); //comments }else if(tk->token[tk->index]=='*'){ //block comment tk->index++; while(tk->token[tk->index]!='\0'){ if(tk->token[tk->index]=='*' && tk->token[tk->index+1]=='/'){ tk->index = tk->index+2; break; } tk->index++; } return TKGetNextToken(tk); }else{ return "division \"/\""; } break; case '%': tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "modulo assignment \"%=\""; }else{ return "modulo %"; } break; case '!': tk->index++; if(tk->token[tk->index]=='='){ tk->index++; return "not equal to \"!=\""; }else{ return "logical NOT \"!\""; } break; case '&': tk->index++; if(tk->token[tk->index]=='&'){ tk->index++; return "logical AND \"&&\""; }else if(tk->token[tk->index]=='='){ tk->index++; return "bitwise AND assignment \"&=\""; }else{ return "bitwise AND \"&\""; } break; case '|': tk->index++; if(tk->token[tk->index]=='|'){ tk->index++; return "logical OR \"||\""; }else if(tk->token[tk->index]=='='){ tk->index++; return "bitwise OR assignment \"|=\""; }else{ return "bitwise OR \"|\""; } break; case '~': tk->index++; return "bitwise NOT \"~\""; break; case '[': tk->index++; return "left brace \"[\""; break; case ']': tk->index++; return "right brace \"]\""; break; case '(': tk->index++; return "left parentheses \"(\""; break; case ')': tk->index++; return "right parentheses \")\""; break; case ',': tk->index++; return "comma \",\""; break; case '?': tk->index++; if(tk->token[tk->index+1]==':'){ tk->index++; return "ternary conditional \"?:\""; }else{ //Have to check if the '?' is followed by an ':' tk->index++; //I'm not sure if this needs to be done again, will check later tk->type = BAD_TOKEN; return tk->token+beginning; } break; case '^': tk->index++; if(tk->token[tk->index+1]=='='){ tk->index++; return "bitwise XOR assignment \"^=\""; } else{ return "bitwise XOR \"^\""; } break; case '"':{ beginning = tk->index; tk->index++; int foundQuotes=0; while(tk->token[tk->index]!='"' && tk->token[tk->index]!='\0'){ tk->index++; if(tk->token[tk->index]=='"'){ foundQuotes=1; } } if(foundQuotes==1){ tk->index++; tk->type = QUOTE; char *temp = (char*) malloc(sizeof(char)*((tk->index-beginning)+1)); memcpy(temp,tk->token+beginning,tk->index-beginning); temp[tk->index-beginning]='\0'; return temp; }else{ tk->index=beginning+1; return "Quote \"\"\""; } break; } case '\'':{ beginning = tk->index; tk->index++; int foundQ = 0; while(tk->token[tk->index]!='\'' && tk->token[tk->index]!='\0'){ tk->index++; if(tk->token[tk->index]=='\''){ foundQ=1; } } if(foundQ==1){ tk->index++; tk->type = QUOTE; char *temp = (char*) malloc(sizeof(char)*((tk->index-beginning)+1)); memcpy(temp, tk->token+beginning, tk->index-beginning); temp[tk->index-beginning]='\0'; return temp; }else{ tk->index=beginning+1; return "Single Quote \"\'\""; } break; } }//SWITCH END tk->index++; tk->type = BAD_TOKEN; return tk->token+beginning; } return NULL; }
int main(int argc, char **argv) { char newline[7]="[0x0a]"; char hor_tab[7]="[0x09]"; char ver_tab[7]="[0x0b]"; char backspace[7]= "[0x08]"; char carriage[7]= "[0x0d]"; char form[7]= "[0x0c]"; char audible[7]="[0x07]"; char backslash[7]= "[0x5c]"; char double_quote[7]= "[0x22]"; if(argc<3) { fprintf(stderr, "Error: Invalid Amount of Arguments.\n"); return 0; } TokenizerT *overall= TKCreate(argv[1],argv[2]); overall->counter=0; int i=0; int j=0; int sep_flag=0; int print_flag=0; /*traverse through the token stream and create tokens out of them * Separators and Hex Modifications are handled within the bottom loop. * The returning token only has non-escape characters that acted as a separator*/ while(overall->counter<strlen(overall->next_token_ptr)) { char *token= TKGetNextToken(overall); i=0; if(token==NULL) { continue; /* this occurs when a single separator was found */ } print_flag=0; if(strlen(token)==2) { /*when the token string is just an escape character * acting as a separator */ j=0; int tester=0; if(token[i]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(token[i+1]!='\0') { if(overall->next_sep_ptr[j]==token[i+1]) { tester=1; } } j++; } if(tester==1) { free(token); continue; } } } while(token[i]!='\0') { j=0; sep_flag=0; /* Each of the 9 escape characters are individually handled below to * print either hex, ignore, or split as a separator. */ if(token[i]=='n') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='n') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",newline); print_flag=1; i++; continue; } } } } if(token[i]=='t') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='t') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",hor_tab); print_flag=1; i++; continue; } } } } if(token[i]=='v') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='v') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",ver_tab); print_flag=1; i++; continue; } } } } if(token[i]=='b') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='b') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",backspace); print_flag=1; i++; continue; } } } } if(token[i]=='r') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='r') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",carriage); print_flag=1; i++; continue; } } } } if(token[i]=='f') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='f') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",form); print_flag=1; i++; continue; } } } } if(token[i]=='a') { if(i!=0) { if(token[i-1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='a') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",audible); print_flag=1; i++; continue; } } } } if(token[i]=='"') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='"') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",double_quote); print_flag=1; i++; continue; } } if(token[i]=='\\') { if(token[i+1]=='\\') { while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\' && overall->next_sep_ptr[j+1]=='\\') { sep_flag=1; } j++; } if(sep_flag==0) { printf("%s",backslash); print_flag=1; i++; continue; } } } if(sep_flag!=1 && token[i]!='\\') { /* a normal character being printed */ printf("%c",token[i]); print_flag=1; } else if(sep_flag==1 && i!=strlen(token)-1) { /*checking to see if the next character is a separator. If so, it needs * to be skipped. */ if(token[i+1]!='\0' && token[i+2]!='\0') { if(token[i+1]=='\\') { if(token[i+2]=='n' || token[i+2]=='t'||token[i+2]=='v'||token[i+2]=='b'||token[i+2]=='r'||token[i+2]=='f'||token[i+2]=='a'|| token[i+2]=='"' ||token[i+2]=='\\') { j=0; int checker=0; while(overall->next_sep_ptr[j]!='\0') { if(overall->next_sep_ptr[j]=='\\') { if(overall->next_sep_ptr[j+1]!='\0') { if(overall->next_sep_ptr[j+1]==token[i+2]) { checker=1; } } } j++; } if(checker==1) { i++; continue; } else { if(print_flag==1) { printf("\n"); i++; continue; } else { i++; continue; } } } } } if(i!=1 && print_flag!=0) { printf("\n"); } } i++; } if(strlen(token)==1 && token[0]=='\\') { free(token); continue; } printf("\n"); free(token); } TKDestroy(overall); return 0; }
void addToList(char *fS, SortedList1Ptr SL, char *pathName) { TokenizerT* tokenizer = TKCreate(" ", fS); Node1* temp; if(tokenizer == NULL) { printf("Error: unable to create tokenizer\n"); } char* token = NULL; while((token = TKGetNextToken(tokenizer)) != NULL) { if(SL==NULL) { printf("SL CREATED\n"); SL = SLCreate(token, pathName); //printf(SL->root->token); temp = SL->root; while(temp!=NULL) { printf("%s->",temp->token); temp=temp->next; } printf("\n"); } else { #ifdef DEBUG printf("***************TOKEN ADDED: %s\n", token); printf("***************Pathname ADDED: %s\n", pathName); #endif SLInsert(SL, token, pathName); if(strcmp(pathName, "") == 0) printf("\n%spathname is blank%s\n",KRED, KNORM); else printf("\n%s%s%s\n", KRED, pathName, KNORM); printlist(SL); // printf(SL->root->token); // temp = SL->root; while(temp!=NULL) { printf("%s->",temp->token); temp=temp->next; } printf("\n"); } // free(token); ???????????? } printf("Main(): Final result, print the ROOT from Vertical SL: %s\n", SL->root->token ); printf("Main(): Final result, print the ROOT's filename from Vertical SL: %s\n", SL->root->accessToNode2->fileName ); printf("Main(): Final result, print the ROOT's frequency from Vertical SL: %d\n", SL->root->accessToNode2->frequency); // printf("Main(): 2 items in HSL : Final result, print the ROOT's frequency from Vertical SL: %s\n", SLMain->root->accessToNode2->next->fileName); // printf("Main(): 2 items in HSL : Final result, print the ROOT's frequency from Vertical SL: %d\n", SLMain->root->accessToNode2->next->frequency); printf("Main():22222 Final result, print the ROOT from Vertical SL: %s\n", SL->root->next->token ); printf("Main():22222 Final result, print the ROOT's filename from Vertical SL: %s\n", SL->root->next->accessToNode2->fileName ); printf("Main():22222 Final result, print the ROOT's frequency from Vertical SL: %d\n", SL->root->next->accessToNode2->frequency); printf("Main():33333 Final result, print the ROOT from Vertical SL: %s\n", SL->root->next->next->token ); printf("Main():33333 Final result, print the ROOT's filename from Vertical SL: %s\n", SL->root->next->next->accessToNode2->fileName ); printf("Main():33333 Final result, print the ROOT's frequency from Vertical SL: %d\n", SL->root->next->next->accessToNode2->frequency); printf("Main():55555 Final result, print the ROOT from Vertical SL: %s\n", SL->root->next->next->next->token ); TKDestroy(tokenizer); }