Ejemplo n.º 1
0
void constructorTokenizer(char *filename)
{
	//TODO: As check for filename extenstion .asm
	char *strToken, *dupLine, tempStr[100], *strEndInverted;
	int multiLineComment = 0, counter, tempStrCounter;//, strFound=0, distance=0;
	int innerCounter, strConstStart, strConstEnd, strConstLen, strProcessed;
	
	file = fopen ( filename, "r" );
	filelineno = 0;
	head = current = 0;
	if ( file == NULL )
	{
      perror(filename);
    }
    /*Basic logic for tokenizing a file
     * 1. Read a line with fgets
     * 2. increment file line no
     * 3. Use strtok function to loop through all the 
     * 	  string tokens file current file read
     * 4. Look for multiline comments
     * 5. If found loop thorough string tokens and even
     * 	  fgets function till end of multiline character
     * 	  found. Imp keep updading filelineno on ever
     *    successfull read of fgets
     * 6. Then look for Single line comments if found skip the
     *    current line
     * 7. If none of the above is found, then it may be a valid
     *    jack token or combination of two or three tokens or a 
     *    string token which would need special attentiong.
     * 8. Check string lenght of strToken. If its 1 then it might
     *    be possible symbol of jack language. Add it to linked list
     *    and proceed further.
     * 9. If not it may be a combination of symbols and other jack language 
     *    constructs. Now loop through strToken string and look for occurance of 
     *    symbols, string constants quotes
     * 10.If symbol is found add it to linked list. And loop through rest of
     *    string. 
     *    
     * Add it to a linked list of tokens which 
     *    records token string, fileline no where string was found
	 * 8. 
     * 8. End
     */
    while ( fgets ( line, sizeof(line), file ) != NULL ) // read a line 
    {
		filelineno++;
		//printf("Tokens at line %d: ", filelineno);
		dupLine = strdup(line);
		for(strToken = strtok(dupLine," \t\n"); strToken != NULL; strToken = strtok(NULL, " \t\n"))
		{
			if(multiLineComment)
			{
				if(strstr(strToken,"*/")) //look for end of mulitline comment
				{
					multiLineComment = 0; //if found continue to next token in line
				}
			}
			else if(strstr(strToken,"/*")) //its a start of multiline comment
			{
				multiLineComment = 1;
				if(strstr(strToken,"*/")) //look for end of mulitline comment
				{
					multiLineComment = 0; //if found continue to next token in line
				}
			}
			else if(strstr(strToken,"//")) //single line comment found break out of for loop
			{
				break;
			}
			else // it may be a valid token or combination of symbols and tokens
			{
				if(strlen(strToken) == 1) //then it may a valid symbol add it
				{
					
					addTokenToList(strToken); //may be a need of static pointer here
				}
				else //it may contain symbols and string constant quotes "
				{
					memset(tempStr,0,100); //first clear tempString buffer
					tempStrCounter = 0;
					strProcessed = 0;
					for(counter=0;counter<strlen(strToken);counter++)
					{
						if(strToken[counter] == '{' || strToken[counter] == '}' || \
						   strToken[counter] == '(' || strToken[counter] == ')' || \
						   strToken[counter] == '[' || strToken[counter] == ']' || \
						   strToken[counter] == '.' || strToken[counter] == ',' || \
						   strToken[counter] == ';' || strToken[counter] == '+' || \
						   strToken[counter] == '-' || strToken[counter] == '*' || \
						   strToken[counter] == '/' || strToken[counter] == '&' || \
						   strToken[counter] == '|' || strToken[counter] == '<' || \
						   strToken[counter] == '>' || strToken[counter] == '=' || \
						   strToken[counter] == '~')
						   { //we just found a symbol
							   //first check if it was first character
								if(tempStrCounter > 0) //check if there was a previous
								{					   //token being extracted
									tempStr[tempStrCounter] = '\0'; //set the null indicator
									addTokenToList(tempStr);
									memset(tempStr, 0, 100); //clear tempStr buffer
									tempStrCounter = 0; //reset tempStrCounter 
								}
								tempStr[0] = strToken[counter];
								tempStr[1] = '\0';
								addTokenToList(tempStr);
								memset(tempStr, 0, 100);
							}
							else if(strToken[counter] == '\"') //logic for string constant
							{
								if(tempStrCounter > 0) //check if there was a previous
								{					   //token being extracted
									tempStr[tempStrCounter] = '\0'; //set the null indicator
									addTokenToList(tempStr);
									memset(tempStr, 0, 100); //clear tempStr buffer
									tempStrCounter = 0; //reset tempStrCounter 
								}
								strProcessed = 1;
								strConstStart = (strToken+counter) - dupLine;
								strEndInverted = strchr(line+strConstStart+1, '\"');
								if(!strEndInverted) //not found end inverted commas copy till end of line
								{					//may be a bug in this block of if code
									strcpy(tempStr, line+strConstStart);
									addTokenToList(tempStr);
									break; 
								}
								else //we found end inverted commas
								{
									strConstEnd = strEndInverted - line;
									strConstLen = strConstEnd - strConstStart - 1;
									strToken = dupLine + strConstEnd + 1;
									strncpy(tempStr, line+strConstStart, strConstLen+2); //accomadate for two inverted commas
									addTokenToList(tempStr);
									memset(tempStr, 0, 100);
									for(innerCounter=0;innerCounter<strlen(strToken);innerCounter++)
									{
										if(strToken[innerCounter] == '{' || strToken[innerCounter] == '}' || \
										strToken[innerCounter] == '(' || strToken[innerCounter] == ')' || \
										strToken[innerCounter] == '[' || strToken[innerCounter] == ']' || \
										strToken[innerCounter] == '.' || strToken[innerCounter] == ',' || \
										strToken[innerCounter] == ';' || strToken[innerCounter] == '+' || \
										strToken[innerCounter] == '-' || strToken[innerCounter] == '*' || \
										strToken[innerCounter] == '/' || strToken[innerCounter] == '&' || \
										strToken[innerCounter] == '|' || strToken[innerCounter] == '<' || \
										strToken[innerCounter] == '>' || strToken[innerCounter] == '=' || \
										strToken[innerCounter] == '~')
										{ //we found a symbol
											if(tempStrCounter > 0) //check if there was a previous
											{					   //token being extracted
												tempStr[tempStrCounter] = '\0'; //set the null indicator
												addTokenToList(tempStr);
												memset(tempStr, 0, 100); //clear tempStr buffer
												tempStrCounter = 0; //reset tempStrCounter 
											}
											tempStr[0] = strToken[innerCounter];
											tempStr[1] = '\0';
											addTokenToList(tempStr);
											memset(tempStr, 0, 100);
										}
										else
										{
											tempStr[tempStrCounter] = strToken[innerCounter];
											tempStrCounter++;
										}
									}
									if(tempStrCounter > 0) //string had no symbol or string ended with no symbols
									{					   //this block of if may be a redudant code
										tempStr[tempStrCounter] = '\0';
										printf("token: %s\n", tempStr);
										tempStrCounter = 0;
										memset(tempStr, 0, 100);
									}
									break;
								}
							} //sting constant logic ends here
							else
							{
								tempStr[tempStrCounter] = strToken[counter];
								tempStrCounter++;
							}
						}
						if(tempStrCounter > 0) //string had no symbol or string ended with no symbols
						{
							tempStr[tempStrCounter] = '\0';
							addTokenToList(tempStr);
							tempStrCounter = 0;
							memset(tempStr, 0, 100);
						}
						if(strProcessed)
						{
							strProcessed = 0;
							break;
						}
					}
				
			}
		}
	}
	displayTokens();
	current = head;
}
Ejemplo n.º 2
0
int main(int argc, char** argv){


    /*main expression is one string of all args put together*/
  char* mainExpression = getExpressionFromArgs(argc, argv);

    /*list of tokens*/
  tokenNode *mainList = malloc(sizeof(tokenNode));

    /*not useful yet
     *TODO: Something with variables*/
  int totalVariables = 0;

    /*keep iterating i until it reaches a ; in main expression*/
  for(int i = 0; mainExpression[i] != ';'; i++){

      /*get necesarry tokens from expression*/

    if(isalpha(mainExpression[i])){
      addTokenToList(mainList, getVariableLexeme(mainExpression, &i));      
    }
    if(isdigit(mainExpression[i] )){
      addTokenToList(mainList, getNumericLexeme(mainExpression, &i));      
    }
    if(isOperator(mainExpression[i])){
      addTokenToList(mainList, getOperatorLexeme(mainExpression, &i));      
    }
    if(mainExpression[i] == '$'){
      addTokenToList(mainList, getSpecialLexeme(mainExpression, &i));      
    }
  }
  
    /*conductor is the iterator to move across the list*/
  tokenNode *conductor;
    /*tempNode is used for freeing memory*/
  tokenNode *tempNode;
    /*used to determine if there are anymore
     *RELEVANT operators to compute with */
  _Bool noMoreOperators = false;

    /*check if we have one number left over*/
  while(mainList->size > 1){  

      /*i represents the current order of precedence to be checked,
       *in EMDAS order*/
    for(int i = 0; i < 3; i++){    
        /*set/reset conductor back to start of list*/
      conductor = mainList;

        /*set/reset noMoreOperators so that next loop can continue*/
      noMoreOperators = false;
      while(!noMoreOperators){           
          /*default; if the list is traversed and there are no more operators, loop will end
           *this was inspired by traditional bubble sort*/
        noMoreOperators = true;

          /*while not at end of list*/
        while(conductor->next != NULL){                          

            /*check if the next token is an operator*/            
          if(conductor->next->value.type == operator){
              /*check if the operator token is the same precedence as the operator we are looking for*/              
            if(checkPrecedence(conductor->next->value) == i){              
                /*if so, we have found a RELEVANT operator and noMoreOperators is false*/
              noMoreOperators = false;
                /*perform necesarry calculation depending on current precedence (i)*/            
              switch(getOperatorID(conductor->next->value)){
                case 0:                

                    /*set conductor value to conductor^conductor->next->next*/
                  conductor->value = tokenExponentation(conductor->value, conductor->next->next->value);
                    /*set tempNode to the 3 items down the list (next number)*/                  
                  tempNode = conductor->next->next->next;
                    /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/
                  free(conductor->next);
                  free(conductor->next->next);

                    /*link conductor with tempNode*/
                  conductor->next = tempNode;
                    /*subtract two tokens from mainList*/
                  mainList->size -= 2;

                  break;
                case 1:
                    /*set conductor value to conductor*conductor->next->next*/
                  conductor->value = tokenMultiplication(conductor->value, conductor->next->next->value);
                    /*set tempNode to the 3 items down the list (next number)*/                  
                  tempNode = conductor->next->next->next;
                    /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/
                  free(conductor->next);                  
                  free(conductor->next->next);

                    /*link conductor with tempNode*/
                  conductor->next = tempNode;

                    /*subtract two tokens from mainList*/
                  mainList->size -= 2;

                  break;
                case 2:
                    /*set conductor value to conductor/conductor->next->next*/
                  conductor->value = tokenDivision(conductor->value, conductor->next->next->value);
                    /*set tempNode to the 3 items down the list (next number)*/                  
                  tempNode = conductor->next->next->next;
                    /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/
                  free(conductor->next);                  
                  free(conductor->next->next);

                    /*link conductor with tempNode*/
                  conductor->next = tempNode;

                    /*subtract two tokens from mainList*/
                  mainList->size -= 2;

                  break;
                case 3:
                    /*set conductor value to conductor+conductor->next->next*/
                  conductor->value = tokenAddition(conductor->value, conductor->next->next->value);
                    /*set tempNode to the 3 items down the list (next number)*/                  
                  tempNode = conductor->next->next->next;
                    /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/
                  free(conductor->next);                  
                  free(conductor->next->next);

                    /*link conductor with tempNode*/
                  conductor->next = tempNode;

                    /*subtract two tokens from mainList*/
                  mainList->size -= 2;          

                  break;
                case 4:
                    /*set conductor value to conductor-conductor->next->next*/
                  conductor->value = tokenSubtraction(conductor->value, conductor->next->next->value);
                    /*set tempNode to the 3 items down the list (next number)*/                  
                  tempNode = conductor->next->next->next;
                    /*free the nodes between current node (conductor) and tempNode (conductor->next->next->next)*/
                  free(conductor->next);                  
                  free(conductor->next->next);

                    /*link conductor with tempNode*/
                  conductor->next = tempNode;

                    /*subtract two tokens from mainList*/
                  mainList->size -= 2;          

                  break;
                default:
                    /*if current token is not really a token*/
                  printf(ANSI_COLOR_MAGENTA"Error: Erroneous token: %s\n"ANSI_COLOR_RESET, conductor->next->value.lexeme);
                  exit(1);

              }

                /*show your work!*/
              for(int j = 0; j < mainList->size; j++){
                printf("%s", getTokenFromList(mainList, j).lexeme);
              }
              printf("\n");              
            }
          }
            /*iterate down the list unless at end of list,
            unless the current operator is of the desired precedence
            this is to avoid skipping operators on accident*/          
          if(conductor->next != NULL  && checkPrecedence(conductor->next->value) != i){            
            conductor = conductor->next;            
          }
        }
      }          
    }
  }
    /*print list*/
  printf("Answer: %s\n",getTokenFromList(mainList, 0).lexeme);

}