void constructorTokenizer(char *filename) { //TODO: As check for filename extenstion .asm char *strToken, *dupLine, tempStr[100], *strEndInverted; int multiLineComment = 0, counter, tempStrCounter;//, strFound=0, distance=0; int innerCounter, strConstStart, strConstEnd, strConstLen, strProcessed; file = fopen ( filename, "r" ); filelineno = 0; head = current = 0; if ( file == NULL ) { perror(filename); } /*Basic logic for tokenizing a file * 1. Read a line with fgets * 2. increment file line no * 3. Use strtok function to loop through all the * string tokens file current file read * 4. Look for multiline comments * 5. If found loop thorough string tokens and even * fgets function till end of multiline character * found. Imp keep updading filelineno on ever * successfull read of fgets * 6. Then look for Single line comments if found skip the * current line * 7. If none of the above is found, then it may be a valid * jack token or combination of two or three tokens or a * string token which would need special attentiong. * 8. Check string lenght of strToken. If its 1 then it might * be possible symbol of jack language. Add it to linked list * and proceed further. * 9. If not it may be a combination of symbols and other jack language * constructs. Now loop through strToken string and look for occurance of * symbols, string constants quotes * 10.If symbol is found add it to linked list. And loop through rest of * string. * * Add it to a linked list of tokens which * records token string, fileline no where string was found * 8. * 8. End */ while ( fgets ( line, sizeof(line), file ) != NULL ) // read a line { filelineno++; //printf("Tokens at line %d: ", filelineno); dupLine = strdup(line); for(strToken = strtok(dupLine," \t\n"); strToken != NULL; strToken = strtok(NULL, " \t\n")) { if(multiLineComment) { if(strstr(strToken,"*/")) //look for end of mulitline comment { multiLineComment = 0; //if found continue to next token in line } } else if(strstr(strToken,"/*")) //its a start of multiline comment { multiLineComment = 1; if(strstr(strToken,"*/")) //look for end of mulitline comment { multiLineComment = 0; //if found continue to next token in line } } else if(strstr(strToken,"//")) //single line comment found break out of for loop { break; } else // it may be a valid token or combination of symbols and tokens { if(strlen(strToken) == 1) //then it may a valid symbol add it { addTokenToList(strToken); //may be a need of static pointer here } else //it may contain symbols and string constant quotes " { memset(tempStr,0,100); //first clear tempString buffer tempStrCounter = 0; strProcessed = 0; for(counter=0;counter<strlen(strToken);counter++) { if(strToken[counter] == '{' || strToken[counter] == '}' || \ strToken[counter] == '(' || strToken[counter] == ')' || \ strToken[counter] == '[' || strToken[counter] == ']' || \ strToken[counter] == '.' || strToken[counter] == ',' || \ strToken[counter] == ';' || strToken[counter] == '+' || \ strToken[counter] == '-' || strToken[counter] == '*' || \ strToken[counter] == '/' || strToken[counter] == '&' || \ strToken[counter] == '|' || strToken[counter] == '<' || \ strToken[counter] == '>' || strToken[counter] == '=' || \ strToken[counter] == '~') { //we just found a symbol //first check if it was first character if(tempStrCounter > 0) //check if there was a previous { //token being extracted tempStr[tempStrCounter] = '\0'; //set the null indicator addTokenToList(tempStr); memset(tempStr, 0, 100); //clear tempStr buffer tempStrCounter = 0; //reset tempStrCounter } tempStr[0] = strToken[counter]; tempStr[1] = '\0'; addTokenToList(tempStr); memset(tempStr, 0, 100); } else if(strToken[counter] == '\"') //logic for string constant { if(tempStrCounter > 0) //check if there was a previous { //token being extracted tempStr[tempStrCounter] = '\0'; //set the null indicator addTokenToList(tempStr); memset(tempStr, 0, 100); //clear tempStr buffer tempStrCounter = 0; //reset tempStrCounter } strProcessed = 1; strConstStart = (strToken+counter) - dupLine; strEndInverted = strchr(line+strConstStart+1, '\"'); if(!strEndInverted) //not found end inverted commas copy till end of line { //may be a bug in this block of if code strcpy(tempStr, line+strConstStart); addTokenToList(tempStr); break; } else //we found end inverted commas { strConstEnd = strEndInverted - line; strConstLen = strConstEnd - strConstStart - 1; strToken = dupLine + strConstEnd + 1; strncpy(tempStr, line+strConstStart, strConstLen+2); //accomadate for two inverted commas addTokenToList(tempStr); memset(tempStr, 0, 100); for(innerCounter=0;innerCounter<strlen(strToken);innerCounter++) { if(strToken[innerCounter] == '{' || strToken[innerCounter] == '}' || \ strToken[innerCounter] == '(' || strToken[innerCounter] == ')' || \ strToken[innerCounter] == '[' || strToken[innerCounter] == ']' || \ strToken[innerCounter] == '.' || strToken[innerCounter] == ',' || \ strToken[innerCounter] == ';' || strToken[innerCounter] == '+' || \ strToken[innerCounter] == '-' || strToken[innerCounter] == '*' || \ strToken[innerCounter] == '/' || strToken[innerCounter] == '&' || \ strToken[innerCounter] == '|' || strToken[innerCounter] == '<' || \ strToken[innerCounter] == '>' || strToken[innerCounter] == '=' || \ strToken[innerCounter] == '~') { //we found a symbol if(tempStrCounter > 0) //check if there was a previous { //token being extracted tempStr[tempStrCounter] = '\0'; //set the null indicator addTokenToList(tempStr); memset(tempStr, 0, 100); //clear tempStr buffer tempStrCounter = 0; //reset tempStrCounter } tempStr[0] = strToken[innerCounter]; tempStr[1] = '\0'; addTokenToList(tempStr); memset(tempStr, 0, 100); } else { tempStr[tempStrCounter] = strToken[innerCounter]; tempStrCounter++; } } if(tempStrCounter > 0) //string had no symbol or string ended with no symbols { //this block of if may be a redudant code tempStr[tempStrCounter] = '\0'; printf("token: %s\n", tempStr); tempStrCounter = 0; memset(tempStr, 0, 100); } break; } } //sting constant logic ends here else { tempStr[tempStrCounter] = strToken[counter]; tempStrCounter++; } } if(tempStrCounter > 0) //string had no symbol or string ended with no symbols { tempStr[tempStrCounter] = '\0'; addTokenToList(tempStr); tempStrCounter = 0; memset(tempStr, 0, 100); } if(strProcessed) { strProcessed = 0; break; } } } } } displayTokens(); current = head; }
int main(int argc, char** argv){ /*main expression is one string of all args put together*/ char* mainExpression = getExpressionFromArgs(argc, argv); /*list of tokens*/ tokenNode *mainList = malloc(sizeof(tokenNode)); /*not useful yet *TODO: Something with variables*/ int totalVariables = 0; /*keep iterating i until it reaches a ; in main expression*/ for(int i = 0; mainExpression[i] != ';'; i++){ /*get necesarry tokens from expression*/ if(isalpha(mainExpression[i])){ addTokenToList(mainList, getVariableLexeme(mainExpression, &i)); } if(isdigit(mainExpression[i] )){ addTokenToList(mainList, getNumericLexeme(mainExpression, &i)); } if(isOperator(mainExpression[i])){ addTokenToList(mainList, getOperatorLexeme(mainExpression, &i)); } if(mainExpression[i] == '$'){ addTokenToList(mainList, getSpecialLexeme(mainExpression, &i)); } } /*conductor is the iterator to move across the list*/ tokenNode *conductor; /*tempNode is used for freeing memory*/ tokenNode *tempNode; /*used to determine if there are anymore *RELEVANT operators to compute with */ _Bool noMoreOperators = false; /*check if we have one number left over*/ while(mainList->size > 1){ /*i represents the current order of precedence to be checked, *in EMDAS order*/ for(int i = 0; i < 3; i++){ /*set/reset conductor back to start of list*/ conductor = mainList; /*set/reset noMoreOperators so that next loop can continue*/ noMoreOperators = false; while(!noMoreOperators){ /*default; if the list is traversed and there are no more operators, loop will end *this was inspired by traditional bubble sort*/ noMoreOperators = true; /*while not at end of list*/ while(conductor->next != NULL){ /*check if the next token is an operator*/ if(conductor->next->value.type == operator){ /*check if the operator token is the same precedence as the operator we are looking for*/ if(checkPrecedence(conductor->next->value) == i){ /*if so, we have found a RELEVANT operator and noMoreOperators is false*/ noMoreOperators = false; /*perform necesarry calculation depending on current precedence (i)*/ switch(getOperatorID(conductor->next->value)){ case 0: /*set conductor value to conductor^conductor->next->next*/ conductor->value = tokenExponentation(conductor->value, conductor->next->next->value); /*set tempNode to the 3 items down the list (next number)*/ tempNode = conductor->next->next->next; /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/ free(conductor->next); free(conductor->next->next); /*link conductor with tempNode*/ conductor->next = tempNode; /*subtract two tokens from mainList*/ mainList->size -= 2; break; case 1: /*set conductor value to conductor*conductor->next->next*/ conductor->value = tokenMultiplication(conductor->value, conductor->next->next->value); /*set tempNode to the 3 items down the list (next number)*/ tempNode = conductor->next->next->next; /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/ free(conductor->next); free(conductor->next->next); /*link conductor with tempNode*/ conductor->next = tempNode; /*subtract two tokens from mainList*/ mainList->size -= 2; break; case 2: /*set conductor value to conductor/conductor->next->next*/ conductor->value = tokenDivision(conductor->value, conductor->next->next->value); /*set tempNode to the 3 items down the list (next number)*/ tempNode = conductor->next->next->next; /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/ free(conductor->next); free(conductor->next->next); /*link conductor with tempNode*/ conductor->next = tempNode; /*subtract two tokens from mainList*/ mainList->size -= 2; break; case 3: /*set conductor value to conductor+conductor->next->next*/ conductor->value = tokenAddition(conductor->value, conductor->next->next->value); /*set tempNode to the 3 items down the list (next number)*/ tempNode = conductor->next->next->next; /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/ free(conductor->next); free(conductor->next->next); /*link conductor with tempNode*/ conductor->next = tempNode; /*subtract two tokens from mainList*/ mainList->size -= 2; break; case 4: /*set conductor value to conductor-conductor->next->next*/ conductor->value = tokenSubtraction(conductor->value, conductor->next->next->value); /*set tempNode to the 3 items down the list (next number)*/ tempNode = conductor->next->next->next; /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/ free(conductor->next); free(conductor->next->next); /*link conductor with tempNode*/ conductor->next = tempNode; /*subtract two tokens from mainList*/ mainList->size -= 2; break; default: /*if current token is not really a token*/ printf(ANSI_COLOR_MAGENTA"Error: Erroneous token: %s\n"ANSI_COLOR_RESET, conductor->next->value.lexeme); exit(1); } /*show your work!*/ for(int j = 0; j < mainList->size; j++){ printf("%s", getTokenFromList(mainList, j).lexeme); } printf("\n"); } } /*iterate down the list unless at end of list, unless the current operator is of the desired precedence this is to avoid skipping operators on accident*/ if(conductor->next != NULL && checkPrecedence(conductor->next->value) != i){ conductor = conductor->next; } } } } } /*print list*/ printf("Answer: %s\n",getTokenFromList(mainList, 0).lexeme); }