コード例 #1
0
ファイル: stable.c プロジェクト: Drag0n5layer/SymbolTable
/*
Purpose: Installs a new entry (VID record) in the symbol table.
Authors: Kwok Hong Kelvin Chan, Kyle Hinskens
Version: 13.11.14
Called functions: st_lookup(), b_setmark(), b_getsize(), strlen(), b_addc(), printf(),
				b_get_r_flag(), b_get_chmemloc(), b_getmark(), st_incoffset()
Parameters:
			STD sym_table The symbol table
			char *lexeme The lexeme
			int line The line number
Return value: int The offset of the lexeme or -1 on error
*/
int st_install(STD sym_table, char *lexeme, int line) {
	int offset; /* lookup() return value*/
	unsigned int i; /* loop counter for strlen */
	int j; /* loop counter */
	unsigned short buffer_offset = 0; /* buffer_offset, used for reorganize plex on the symbol table */

	offset = st_lookup(sym_table, lexeme);

	/* check if the symbol table is full */
	if (sym_table.st_offset == sym_table.st_size) return R_FAIL_1;

	/* lexeme is not found from the symbol table */ 
	if (offset == -1) {
		b_setmark(sym_table.plsBD, b_getsize(sym_table.plsBD)); /* set mark on the Buffer at the addc_offset index */
		/* store lexeme into the symbol table Buffer */
		for (i = 0; i < strlen(lexeme) + 1; ++i) {
			/* check if b_addc() executed successfully, just in case! */
			if (b_addc(sym_table.plsBD, lexeme[i]) == NULL) {
				return R_FAIL_2;
			}

			/* Buffer reallocation, reorganize plex on the symbol table */
			if (b_get_r_flag(sym_table.plsBD)) {
				/* traverse the symbol table, and reassign the correct memory address from the new Buffer */
				 for (j = 0; j < sym_table.st_offset; ++j) {
					sym_table.pstvr[j].plex = b_get_chmemloc(sym_table.plsBD, buffer_offset); /* get the character pointer from the buffer_offset */
					buffer_offset += (unsigned short)strlen(b_get_chmemloc(sym_table.plsBD, buffer_offset)) + 1; /* increment buffer_offset by the strlen from current string  */
				}
			}
		}

		sym_table.pstvr[sym_table.st_offset].plex = b_get_chmemloc(sym_table.plsBD, b_getmark(sym_table.plsBD)); /* set plex as the beginning of the lexeme from the Buffer */
		sym_table.pstvr[sym_table.st_offset].o_line = line; /* set line number */
		sym_table.pstvr[sym_table.st_offset].status_field = DEFAULT; /* set default status field */

		/* variable of type string */ 
		if (lexeme[strlen(lexeme) -1] == '#') {
			sym_table.pstvr[sym_table.st_offset].status_field |= STRING_MASK; /* set status field as string by performing a OR operation with the STRING_MASK */
			sym_table.pstvr[sym_table.st_offset].status_field |= UPDATE_MASK; /* Set update flag */
			sym_table.pstvr[sym_table.st_offset].i_value.str_offset = -1; /* set i_value */
		}
		/* variable of type int */ 
		else if (lexeme[0] == 'i' || lexeme[0] == 'o' || lexeme[0] == 'd' || lexeme[0] == 'n') {
			sym_table.pstvr[sym_table.st_offset].status_field |= INT_MASK; /* set status field as int by performing a OR operation with the INT_MASK*/
			sym_table.pstvr[sym_table.st_offset].i_value.int_val = 0; /* set i_value */
		}
		/* variable of type float */ 
		else {
			sym_table.pstvr[sym_table.st_offset].status_field |= FLOAT_MASK; /* set status field as float by performing a OR operation with the FLOAT_MASK*/
			sym_table.pstvr[sym_table.st_offset].i_value.fpl_val = 0.0f; /* set i_value */
		}

		st_incoffset(); /* increment global symbol table st_offset field */
		return sym_table.st_offset; /* The function returns the current offset of that entry from the beginning of the array of STVR (the array pointed by pstvr). */
	}

	return offset;
}
コード例 #2
0
ファイル: scanner.c プロジェクト: Nuuka/Compilers
/**********************************************************************************************************
Purpose:			Set the proper token depending on the lexeme read from the buffer. 
Author:				Thom Palmer and Chris Whitten
History/Versions:	10.20.13
Called functions:	b_setmark(), b_getc(), b_getmark(), SEOF(), WHTSPACE(), is_assop(), strncmp(),
					b_set_getc_offset(), b_retract(), b_get_getc_offset(),b_eob(), isalnum(), b_addc()
					get_next_state(), b_create(), b_pack(), b_destroy() 
Parameters:			Buffer * const pBD
Return value:		Token t
Algorithm:			Validate parameters, read char from buffer, check the char to see if its a legal character.
					depending on the char peak forward to see what the following char is in order to 
					determine if it's part of the lexeme or if it's time to return. once a valid lexeme is
					found set the token and return. if a valid lexeme is not found set the token to error 
					state and return. 
**********************************************************************************************************/
Token mlwpar_next_token(Buffer * sc_buf)
{
	Token t;			/* token to return after recognition */
	unsigned char c;	/* input symbol */
	int state = 0;		/* initial state of the FSM */
	unsigned int retrCtr;
	short lexstart;		/* start offset of a lexeme in the input buffer */
	short lexend;		/* end offset of a lexeme in the input buffer */
	int accept = NOAS;	/* type of state - initially not accepting */  
	unsigned int i=0;	/* Used throughout the function as iterator */
   
	/* Ensure the buffer is not null before trying to access it */

	if(sc_buf == NULL)
	{	   
		scerrnum = BUFFNULL;  
	     t_set_err_t(RUNTIMERR, t);
	}     
                
	while (1) /* Endless loop broken by token returns it will generate a warning */
	{ 
                
		/* Set mark before getting the next character. This prevents the need for unneccessary decrements. */
		b_setmark(sc_buf, b_get_getc_offset(sc_buf));
		lexstart = b_getmark(sc_buf);
	
		/* Get the next char from the buffer. */
		c = b_getc(sc_buf); 	
	
		/* Ensure SEOF has not been read before processing any further. */
		if(SEOF(c))
		{
			t.code = SEOF_T;
			return t;
		}

		/* If it's whitespace ignore and return to the start of the loop. */
		if(WHTSPC(c))
		{
			continue;
		}

		/* Drop into switch statement to determine what the character can potentially represent and handle it appropriatly. */
		switch(c)
		{
			/* If c is '=' the token can either be a relation operatory or an assignment operator,  so peak forward */
			case EQSIGN:
				{ 
					c = b_getc(sc_buf);
					/* If the next character is '=' then we have found a relational operator */
					if(c == EQSIGN) 
					{
						/* Set the code and attribute and return t */
						t.code = REL_OP_T;
						t.attribute.rel_op = EQ;
						return t;
					}
					/* Otherwise retract and return an assignment operator token */
					b_retract(sc_buf);
					t.code = ASS_OP_T;
					return t;
				} 
			/* If the token starts with ! it can either be a comment or the != relational operator, so peak forward and act appropriatly. */
			case EXCLAMTN:
				c = b_getc(sc_buf);
				/* If the next token is < then we have a comment therfore ignore everything in the line.*/
				if(c == LESSTHN)
				{
					do
					{
						c = b_getc(sc_buf);
						if(SEOF(c))
						{ 
							t.code = ERR_T;
							t.attribute.err_lex[0] = EXCLAMTN;
							t.attribute.err_lex[1] = LESSTHN;
							t.attribute.err_lex[2] = c;
							t.attribute.err_lex[3] = STRTERM;
							b_retract(sc_buf);
							return t;
						}
					}while ( c != NEWLINE && c != CARRTRN);

					++line;
					continue;
				}
				/* If the next token we have the NE relational operator. */
				if(c == EQSIGN)
				{
					t.code = REL_OP_T;
					t.attribute.rel_op = NE ;
					return t;
				}
				/* if the next char is neither = or < we have an error set ERR_T*/
				t.code = ERR_T;
				t.attribute.err_lex[0] = EXCLAMTN;
				t.attribute.err_lex[1] = c;
				t.attribute.err_lex[2] = STRTERM;
	   
				/* We assume the error was meant to be a token so ignore everything in the line. */
				do
				{
					c = b_getc(sc_buf);
					/* If SEOF or b_eob is found retract the buffer and return t */
					if(SEOF(c))
					{
						b_retract(sc_buf);
						return t;
					}
				}while ( c != NEWLINE);
				++line;
				return t;

			/*If we have a plus sign '+' set the token and it's attirbute then return. */
			case POS:
				t.code = ART_OP_T;
				t.attribute.arr_op = PLUS;
				return t;

			/*If we have a minus sign '-' set the token and it's attirbute then return. */
			case NEG:
				t.code = ART_OP_T;
				t.attribute.arr_op = MINUS;
				return t;

			/*If we have a a period '.' it could be a logical operator or an error. */
			case PERIOD:
				retrCtr = 1;
				i = 0;
				c = b_getc(sc_buf);
				
				/* Switch on the first character read after the period '.' */
				switch (c) 
				{
				/* If its an 'A' we might have .AND. */
				case'A' :
					/* Compare the string the string read from the buffer to the string literal .AND.  */
					++retrCtr;
					if(b_getc(sc_buf) == 'N')
					{
						++retrCtr;
						if(b_getc(sc_buf) == 'D')
						{
							++retrCtr;
							if(b_getc(sc_buf) == PERIOD)
							{
								t.code = LOG_OP_T;
								t.attribute.log_op = AND;
								return t;
							}
						}
					}
					break;
				case'O':
					/* Comapre the string the string read from the buffer to the string literal .OR.  */
					++retrCtr;
					if(b_getc(sc_buf) == 'R')
					{
						++retrCtr;
						if(b_getc(sc_buf) == PERIOD)
						{
							t.code = LOG_OP_T;
							t.attribute.log_op = OR;
							return t;
						}
					}

					break;
				}

				while(i<retrCtr)
				{
					b_retract(sc_buf);
					i++;
				}
				t.code = ERR_T;
				/* Add char which caused the error to the err_lex */
				t.attribute.err_lex[0] = PERIOD;
				t.attribute.err_lex[1] = STRTERM;
				return t;

			/* If we have an astrix sign '*' set the token and it's attirbute then return. */
			case ASTRX:
				t.code = ART_OP_T;
				t.attribute.arr_op = MULT;
				return t;

			/* If c is a forward slash '/' set the token and it's attirbute then return. */
			case FWDSLSH:
				t.code = ART_OP_T;
				t.attribute.arr_op = DIV;
				return t;

			/* If c is a left brace '{' set the token and it's attirbute then return. */
			case LBRACE:
				t.code = LBR_T;
				return t;

			/* If c is a right brace '}' set the token and it's attirbute then return. */
			case RBRACE:
				t.code = RBR_T;
				return t;

			/* If c is a left parenthesis '(' set the token and it's attirbute then return. */
			case LPRNTHS:
				t.code = LPR_T;
				return t;

			/* If c is a right parenthesis ')' set the token and it's attirbute then return. */
			case RPRNTHS:
				t.code = RPR_T;
				return t;

			/* If c is a less than symbol '<' check the next char. */
			case LESSTHN:
				c = b_getc(sc_buf);

				/* If the next char is the greater than symbol '>' set the proper token and return. */
				if(c == GRTRTHN){
					t.code = SCC_OP_T;
					return t;
				}

				/* If the next char is not recognized restract the buffer and set the token */
				t.code = REL_OP_T;
				t.attribute.rel_op = LT;
				b_retract(sc_buf);
				return t;

			/* If c is a greater than symbol '>' set the proper token and return. */
			case GRTRTHN:
				t.code = REL_OP_T;
				t.attribute.rel_op = GT;
				return t;

			/* If c is a new line '\n' increment the line number and return to the start of the loop. */
			case CARRTRN:
				NEWLINE_TEST
				continue;
				
			/* If c is a NEWLINE character increment the line number and continue */
			case NEWLINE:
				++line;
				continue;

			/* If c is a comma ',' set the proper token and return. */
			case COMMA:
				t.code = COM_T;  
				return t;

			/* If c is a semi colon ';' set the proper token and return. */
			case SEMICLN:
				t.code = EOS_T;
				return t;

			/* If c is a quotation mark '"' we have the start of a string, analyze the next chars until '"' or SEOF is hit. */
			case QUOTE:
				/* read all the chars in from the input buffer until a '"' */
				do
				{
					c = b_getc(sc_buf);
					lexend = b_get_getc_offset(sc_buf);
					/* If eob has be set or SEOF is read in from the buffer prior to closing the string */
					/* Break into the error token setup. */
					if( SEOF(c))
					{	
						/* Set the getc_offset to the start of the string */
						b_set_getc_offset(sc_buf,lexstart);
						/* Iterate over the buffer and copy the contents of the error string into the err_lex */
						for( i = 0; i < lexend-lexstart; i++) /* Comparison of unsigned int and short will generate warning. 
															     lexend-lexstart will always be positive no need to cast */
						{				
							c = b_getc(sc_buf);
							/* For the first 20 characters */
							if(i<=ERR_LEN)
							{
								/* Copy c into the current index of err_lex for first 16 characters */
								if(i<=16)
								t.attribute.err_lex[i] = c;
								/* Copy a decimal into the indexes between 17 and ERR_LEN */
								if(i>16 && i<ERR_LEN)
								{
									t.attribute.err_lex[i] = PERIOD;
								}
								/* Copy a string terminator into the last index of err_lex */
								if (i==ERR_LEN)
								{
									t.attribute.err_lex[i]= STRTERM;
								}

							}
					
						}
						t.code = ERR_T;							
						return t;
					}
					/* Increment the line number each time a line terminator is found */
					if(c == NEWLINE)
					{
						++line;
					}
					if(c == CARRTRN)
					{
						NEWLINE_TEST
					}
				}while ( c != QUOTE );
			
				/* Closing quote found. Valid String */
				/* Set the getc_offset back to the start of the string */
				b_set_getc_offset(sc_buf,lexstart);
				/* Set the str_offset attribute to the location of the current getc_offset in the str_LTBL */
				t.attribute.str_offset = b_getsize(str_LTBL);

				/* Add the characters of the string into the str_LTBL */
				for(  i = 0; i< lexend-lexstart;i++) /* Comparison of unsigned int and short will generate warning. 
														lexend-lexstart will always be positive no need to cast */
				{			
					c = b_getc(sc_buf);
					/* Ensure that quotes are not added to the string */
					if(c != '"')
					{
						if(!b_addc(str_LTBL, c))
						{
							scerrnum = FAILADDC;
							t_set_err_t(RUNTIMERR, t);
						}
					}
				}
				/* Add the string terminator to the string and set the Token Code */
				if (!b_addc(str_LTBL,STRTERM))
				{
					scerrnum = FAILADDC;
					t_set_err_t(RUNTIMERR, t);
				}
				t.code = STR_T;
				return t;
			}
	
		/* Special symbol scanning completed. Now checking for lexeme type */
		if (isalnum(c))
		{
			/* Use for loop to iterate over the transition table based on the current state and character */
			/* Continue iterating until an accepting state has been found */
			for(state = get_next_state(state,c,&accept);accept==NOAS; state = get_next_state(state,c,&accept))
			{
				c = b_getc(sc_buf);
			}
		
			/* Retract the buffer if is is an accepting state with a retract */
			if(accept==ASWR)
			{
				b_retract(sc_buf);
			}
			/* Set the end of the lexeme at the current getc_offset */
			lexend = b_get_getc_offset(sc_buf);
			/* Create a temporary buffer to store the lexeme */
			lex_buf = b_create((lexend - lexstart +1),0,'f');
			/* If buffer creation was not successful. Set the error token for a runtime error. */
			if (!lex_buf)
			{
				scerrnum = BUFFNULL;
				t_set_err_t(RUNTIMERR, t);
			}

			
			
			/* Reset the getc_offset to the start of the lexeme */
			b_set_getc_offset(sc_buf,lexstart);
			/* Add the characters of the lexeme to lex_buf */
			for( i = 0;i<lexend-lexstart;i++) /* Comparison of unsigned int and short will generate warning. 
												 lexend-lexstart will always be positive no need to cast */
			{
				if (!b_addc(lex_buf,b_getc(sc_buf)))
				{
					scerrnum = FAILADDC;
					t_set_err_t(RUNTIMERR, t);
				}
			}
			/* Pack lex_buf and add the string terminator to it */
			b_pack(lex_buf);
			/* If b_addc fails set the token for a runtime error and return t  */
			if (!b_addc(lex_buf,STRTERM))
			{
				scerrnum = FAILADDC;
				t_set_err_t(RUNTIMERR, t);
			}
			/* Call the accepting function at the current state index and pass the lexeme */
			t = aa_table[state](b_get_chmemloc(lex_buf,0));
			b_destroy(lex_buf);
			return t;
		}
		/* This code will be executed if c was an invalid symbol*/
		/* Set error token and return t. */
		t.code = ERR_T;
		t.attribute.err_lex[0] = c;
		t.attribute.err_lex[1] = STRTERM; /*Probably a better way to do this.*/
		return t;             
   }
}