示例#1
0
void Scanner::getWord(char *str, char *token_ptr, Token *tok)
{
    /*
     Write some code to Extract the word
     */
    char ch = *line_ptr;
    while ((char_table[ch] == LETTER) || (char_table[ch] == DIGIT))
    {
        *token_ptr++ = *line_ptr++;
        ch = *line_ptr;
    }
    *token_ptr = '\0';
    
    //Downshift the word, to make it lower case
    downshiftWord(str);
    
    /*
     Write some code to Check if the word is a reserved word.
     if it is not a reserved word its an identifier.
     */
    if (!isReservedWord(str, tok))
    {
        //set token to identifier
        tok->setCode(IDENTIFIER);
    }
    tok->setTokenString(string(str));
}
示例#2
0
文件: Scanner.cpp 项目: achasse/Lab5
void Scanner::getWord(char *str, char *token_ptr, Token **tok)
{
    /*
     Write some code to Extract the word
     */
    char ch = *line_ptr;
    while ((char_table[ch] == LETTER) || (char_table[ch] == DIGIT))
    {
        *token_ptr++ = *line_ptr++;
        ch = *line_ptr;
    }
    *token_ptr = '\0';
    
    //Downshift the word, to make it lower case
    downshiftWord(str);
    
    /*
     Write some code to Check if the word is a reserved word.
     if it is not a reserved word its an identifier.
     */
     
    //This is not working properly. 
    TokenCode code;
    if (isReservedWord(str, &code))
    {
        //NOT an identifier
	*tok = new Token(code);
        (*tok)->setTokenString(string(str));
    }
    else
    {
    	*tok = new IdentifierToken(string(str));
    }
}
void parseFunction(struct token *functionName)
/* Starting with ( parse function and make call graph. */
{
char c;
struct hash *uniqHash = hashNew(0);
for (;;)
    {
    nextToken();
    if (token->string[0] == ')')
	 {
	 nextToken();
	 break;
	 }
    }
c = token->string[0];
if (c == '{')
    {
    int blockDepth = 1;
    struct token *lastName = NULL;
    for (;;)
	{
	char c;
	nextToken();
	c = token->string[0];
	if (c == '{')
	    {
	    lastName = NULL;
	    ++blockDepth;
	    }
	else if (c == '}')
	    {
	    lastName = NULL;
	    --blockDepth;
	    if (blockDepth == 0)
		break;
	    }
	else if (c == '_' || isalpha(c))
	    lastName = token;
	else if (c == '(')
	    {
	    if (lastName != NULL && !isReservedWord(lastName->string))
		{
		// if (!hashLookup(uniqHash, lastName->string))
		    {
		  //   hashAdd(uniqHash, lastName->string, NULL);
		    printf("%s -> %s\n", functionName->string, lastName->string);
		    }
		}
	    lastName = NULL;
	    }
	else
	    lastName = NULL;
	}
    }
hashFree(&uniqHash);
}
示例#4
0
TokenT *_word(TokenizerT *tk) {
    nextChar(tk);
    if(isalnum(tk->inputIter[0]) || tk->inputIter[0] == '_') {
        return _word(tk);
    } else {
        if(isReservedWord(tk->tokenBuffer)) {
            return makeToken(tk, "reserved word");
        } else {
            return makeToken(tk, "word");
        }
    }
}
示例#5
0
void getWord(void)
{
	while ((calcCharCode(curChar) == CHR_LETTER) || (calcCharCode(curChar) == CHR_DIGIT) ||
		(curChar == '_'))
	{
		*tokenp = curChar;
		tokenp++;
		getChar();
	}
	*tokenp = nullptr;
	downShiftWord();
	if (curChar == '.')
	{
		TokenCodeType endToken = TKN_END_MODULE;
		if (CurLibrary)
			endToken = TKN_END_LIBRARY;
		else if (CurModuleIdPtr->defn.info.routine.flags & ROUTINE_FLAG_FSM)
			endToken = TKN_END_FSM;
		//-------------------------------------------------------------------
		// If we have an "endmodule" string, stop. Otherwise, keep reading...
		if (strcmp(wordString, TokenStrings[endToken]))
		{
			*tokenp = curChar;
			tokenp++;
			getChar();
			while ((calcCharCode(curChar) == CHR_LETTER) || (calcCharCode(curChar) == CHR_DIGIT) ||
				(curChar == '_'))
			{
				*tokenp = curChar;
				tokenp++;
				getChar();
			}
			*tokenp = nullptr;
			downShiftWord();
		}
	}
	if (!isReservedWord())
		curToken = TKN_IDENTIFIER;
}
示例#6
0
bool
IsValidNodeName( Dag *dag, const char *name, MyString &whynot )
{
	if( name == NULL ) {
		whynot = "missing node name";
		return false;
	}
	if( strlen( name ) == 0 ) {
		whynot = "empty node name (name == \"\")";
		return false;
	}
	if( isReservedWord( name ) ) {
		whynot.formatstr( "invalid node name: '%s' is a DAGMan reserved word",
					name );
		return false;
	}
	ASSERT( dag != NULL );
	if( dag->NodeExists( name ) ) {
		whynot.formatstr( "node name '%s' already exists in DAG", name );
		return false;
	}
	return true;
}
int main( void ){

    int i; //Loop counter.
    int tempTokCtr = 0; //Count number of tokens.

    char buffer[ 10000 ]; //For reading in tokens.
    char temp[ 10000 ]; //Back-up buffer.

    //Prompt user for source program name.
    char sp[50]; //Name of input file.
    printf( "What is the file name for your source program?\n" );
    scanf( "%s", sp );

    //Open file input stream to main input (the program).
    FILE *fin = fopen( sp, "r" );

    //Open file output stream to intermediary output file.
    //In this file, unprocessed tokens will be stored for further processing.
    FILE *temp_fout = fopen( "temp_output.txt", "w" );

    //Procedure for creating intermediary output.
    /** BEGIN PROCEDURE **/
    while( fscanf( fin, "%s", buffer ) != EOF ){

        int len = (int) strlen( buffer ); //Length of read-in token.

        //For the length of the read-in token:
        for( i = 0; i < len; i++ ){

            //If we've reached the end of the buffer, and it's not a special symbol, print the token.
            //If invalid symbols are present, they're ignored and taken care of later.
            if( i == len-1 && !isSpecialSymbol( buffer[ i ] ) ){
                fprintf( temp_fout, "%s ", buffer );
                tempTokCtr++; //Increment the token counter.
            }

            //If the i-th element of the token is a special symbol.
            else if( isSpecialSymbol( buffer[ i ] ) ){

                //Brute force examine particular token cases: comment tokems, not equal, less-than-or-equal, ... , null, and odd.
                if( len > 1 ){
                    if( buffer[ i ] == '/' && buffer[ i+1 ] == '*' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "/* " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '*' && buffer[ i+1 ] == '/' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "*/ " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '!' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "!= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '<' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "<= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '>' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, ">= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == ':' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, ":= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( len > 3 && buffer[ i ] == 'n' && buffer[ i+1 ] == 'u' && buffer[ i+2 ] == 'l' && buffer[ i+3 ] =='l' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "null " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+4, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( len > 2 && buffer[ i ] == 'o' && buffer[ i+1 ] == 'd' && buffer[ i+2 ] == 'd' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "odd " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+3, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }

                }

                //Special symbol token at the beginning.
                if( i == 0 ){
                    //Print symbol as individual token.
                    fprintf( temp_fout, "%c ", buffer[i] );
                    tempTokCtr++; //Increment token counter.

                    //Shift buffer, and update loop index.
                    //Similar procedure used for brute force token examination, and further token examination.
                    strncpy( buffer, &buffer[i]+1, len-1 );
                    buffer[ len - 1 ] = '\0';
                    len = (int) strlen( buffer );
                    i = -1;
                }

                //Valid symbol is splitting tokens within the buffer; split, print, and shift the tokens left of the valid symbol, including the symbol.
                if( i > 0 ){
                    strncpy( temp, buffer, i );
                    temp[ i ] = '\0';

                    fprintf( temp_fout, "%s ", temp );
                    tempTokCtr++;

                    fprintf( temp_fout, "%c ", buffer[i] );
                    tempTokCtr++;

                    strncpy( buffer, &buffer[i]+1, len-1 );
                    buffer[ len - 1 ] = '\0';
                    len = (int) strlen( buffer );
                    i = -1;
                }

            }

        }

    }
    /** END OF PROCEDURE **/

    //Resource management.
    fclose( temp_fout );
    fclose( fin );

    //Open file input stream to intermediary output created by the procedure above (unprocessed tokens).
    fin = fopen( "temp_output.txt", "r" );
    FILE *table = fopen( "lexeme_table.txt", "w" ); //Will hold the lexeme table.
    FILE *list = fopen( "lexeme_list.txt", "w" ); //Will hold the lexeme list.

    //Print header for lexeme table.
    fprintf( table, "Lexeme Table:\n%15s%15s\n", "lexeme", "token type");

    //Procedure to examine printed tokens.

    /** BEGIN PROCEDURE **/

    //Initialize.
    int isResWord = -1; //If currently examined token is a reserved word, != -1; otherwise, = -1.
    int len = 0; //Length of examined token.
    int isComment = 0; //If currently reading in a comment, = 1; otherwise, = 0.

    //For the length of tokens.
    while( fscanf( fin, "%s ", buffer ) != EOF ){

        //fscanf( fin, "%s", buffer ); //Read in token.
        len = (int) strlen( buffer ); //Get token string length.

        //Recognize beginning of comment block.
        if( !strcmp( buffer, "/*" ) ){
            isComment = 1;
        }

        //Recognize end of comment block.
        else if( isComment == 1 && !strcmp( buffer, "*/" ) ){
            isComment = 0;
        }

        //Only read in tokens while not examining a comment block.
        else if( !isComment ){

            //Error detection by token.
            if( detectError( buffer ) ){
                fclose( fin );
                fclose( table );
                fclose( list );
                exit( 0 );
            }

            //Is the current token a reserved word? Print to table and list appropriately.
            isResWord = isReservedWord( buffer );

            if( isResWord != -1 ){

                fprintf( table, "%15s%15d\n", buffer, isResWord );
                fprintf( list, "%d ", isResWord );

            }

            //Is the current token an identifier? Print to table and list appropriately.
            else if( isalpha( buffer[0] ) ){

                fprintf( table, "%15s%15d\n", buffer, 2 );
                fprintf( list, "2 %s ", buffer );

            }

            //Is the current token a number? Print to table and list appropriately.
            else if( isdigit( buffer[0] ) ){

                fprintf( table, "%15s%15d\n", buffer, 3 );
                fprintf( list, "3 %s ", buffer );

            }

            //Otherwise, our token is some other valid symbol (checked for errors above). Print appropriately.
            else{

                fprintf( table, "%15s%15d\n", buffer, isOtherValid( buffer ) );
                fprintf( list, "%d ", isOtherValid( buffer ) );

            }
        }


    }

    //Resource management.
    fclose( fin );
    fclose( table );
    fclose( list );

    /** END OF PROCEDURE **/

    //Procedure to copy and source program.
    /** BEGIN PROCEDURE **/

    fin = fopen( sp, "r" ); //File containing source program.
    FILE *source = fopen( "source_program.txt", "w" ); //File to copy source program to.
    char copy; //Used to copy file, character by character.

    //Copy character by character, until end of file.
    while( ( copy = fgetc( fin ) ) != EOF )
        fputc( copy, source );

    //Resource management.
    fclose( fin );
    fclose( source );
    remove( "temp_output.txt" ); //Delete intermediary output.

    /** END OF PROCEDURE **/

    return 0;

} //End of main.
示例#8
0
int main()
{
    FILE *pascal_file,*lex_file;
    char file_name[40], file_name2[40];
    char is_token[40];
    char current_char;
    char before_current_char;
    int i=0,j,is_comment=0,is_assignment=0;
    printf("Please enter the source file name: ");
    fflush(stdin);
    gets(file_name);
    strcpy(file_name2,file_name);
    strcat(file_name,".pas");
    strcat(file_name2,".lex");


    for(j=0;j<40;j++)
        is_token[j]=0;
    if((pascal_file = fopen(file_name,"r")) == NULL)
        printf("dosya acilamadi!\n");
    else
    {
        lex_file = fopen(file_name2,"w");
        while(!feof(pascal_file))
        {

            current_char=getc(pascal_file);
            if(isLetter(current_char)==1)//current_char harfse string e eklenir
            {
                is_token[i]=current_char;
                i++;
            }
            else if(isLetter(current_char)==0 && i!=0)// current char harf degilse ondan onceki stringi(is_letter) yazdýr
            {
                if(strcmp("writeln",is_token)==0 || strcmp("write",is_token)==0)
                {
                    fputs("output(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(strcmp("readln",is_token)==0 || strcmp("read",is_token)==0)
                {
                    fputs("input(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(isReservedWord(is_token)==1)
                {
                    fputs("reservedWord(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(isAdvMathFunc(is_token)==1)
                {
                    fputs("advancedMathFunction(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(isOrdinalFunc(is_token)==1)
                {
                    fputs("ordinalFunction(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(isVariableType(is_token)==1)
                {
                    fputs("variableType(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(isBoolOperator(is_token)==1)
                {
                    fputs("booleanOperator(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(isFileHandlingFunc(is_token)==1)
                {
                    fputs("fileHandlingFunction(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else if(strcmp(is_token,"div")==0 || strcmp(is_token,"mod")==0)
                {
                    fputs("arithmeticOperation(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                else
                {
                    fputs("variable(",lex_file);
                    fputs(is_token,lex_file);
                    fputs("),",lex_file);
                }
                i=0;
                for(j=0;j<40;j++)
                    is_token[j]=0;
            }

            if(current_char==':')
            {
                current_char=getc(pascal_file);
                if(current_char=='=')
                {
                    fputs("assignmentOperator(:=),",lex_file);
                    is_assignment=1;
                }
                else
                {
                    fputs("colon(:),",lex_file);
                    ungetc(current_char,pascal_file);
                }
            }


            if(current_char=='{')
            {
                comment(pascal_file,current_char,lex_file);
            }


            if(current_char=='*' && is_comment==1)
            {
                comment(pascal_file,current_char,lex_file);
                is_comment=0;
            }
            if(is_comment==1)
            {
                fputs("leftParentheses((),",lex_file);
            }
            if(current_char==')')
            {
                fputs("rightParentheses()),",lex_file);
            }
            if(current_char==';')
            {
                fputs("endOfLine(;),",lex_file);
            }
            if(current_char=='[')
            {
                fputs("openingBracket([)",lex_file);
            }
            if(current_char==']')
            {
                fputs("closingBracket(])",lex_file);
            }



           if(is_assignment==0 && (current_char=='<' || current_char=='>' || current_char=='='))
            {
                before_current_char=current_char;
                current_char=getc(pascal_file);
                if(before_current_char=='<' && (current_char=='=' || current_char=='>'))
                {
                    fputs("compOperator(",lex_file);
                    fputc(before_current_char,lex_file);
                    fputc(current_char,lex_file);
                    fputs("),",lex_file);
                }
                else if(before_current_char=='>' && current_char=='=')
                {
                    fputs("compOperator(",lex_file);
                    fputc(before_current_char,lex_file);
                    fputc(current_char,lex_file);
                    fputs("),",lex_file);
                }
                else if(before_current_char=='<' || before_current_char=='>' || before_current_char=='=')
                {
                    fputs("compOperator(",lex_file);
                    fputc(before_current_char,lex_file);
                    fputs("),",lex_file);
                    ungetc(current_char,pascal_file);
                }

            }
            is_assignment=0;




            if(current_char=='+' || current_char =='-' || current_char=='*' || current_char=='/')
            {
                fputs("arithOperator(",lex_file);
                fputc(current_char,lex_file);
                fputs("),",lex_file);
            }

            is_comment=0;
            if(current_char=='(')
            {
                is_comment=1;
            }
            if(current_char==39)
            {
                skip_string(pascal_file,lex_file);
            }

            if(current_char==9 || current_char==10 || current_char==' ')
            {
                fputc(current_char,lex_file);
            }
            /*if(current_char==';')
            {

                i=0;
                for(j=0;j<40;j++)
                    is_token[j]='\0';
            }*/
            if(isNumeric(current_char)==1)
            {
                constant(pascal_file,lex_file,current_char);
            }

        }
        fclose(lex_file);
        lex_file = fopen(file_name2,"r");
        current_char=getc(lex_file);
        while(!feof(lex_file))
        {
            printf("%c",current_char);
            current_char=getc(lex_file);
        }
    }

    return 0;
}
示例#9
0
Token Scanner::handleWord()
{
	/*
		Parses input file to try to read an identifier or reserved word.
		Assumes that file pointer is on first available character.
	*/
	bool done = false;
	bool accept = false;
	int state = 0;

	while (!done)
	{
		// see whats next, dont consume
		char next = peek();

		switch (state)
		{
		case 0:
			// the start of an identifier
			accept = false;
			if (next == '_'){
				state = 2;
				next = get();
				_lexeme.push_back(next);				
			} else if (isalpha(next) || isdigit(next)){
				state = 1;
				next = get();
				_lexeme.push_back(next);
			} else {
				state = 3;
			}
			break;
		case 1:
			// Accept state. 
			accept = true;
			_token = MP_IDENTIFIER;
			if (isalpha(next) || isdigit(next)){
				state = 1;
				next = get();
				_lexeme.push_back(next);
			} else if (next == '_'){
				state = 2;
				next = get();
				_lexeme.push_back(next);
			} else {
				state = 3;
			}
			break;
		case 2:
			// do not accept
			accept = false;
			if (isalpha(next) || isdigit(next) ){
				state = 1;
				next = get();
				_lexeme.push_back(next);
			} else {
				state = 3;
			}
			break;
		case 3:
			done = true;
			break;
		}
	}

	// check to see if lexeme == '_'
	if (!accept && _lexeme.size() == 1){
		_token = MP_ERROR;
		return _token;
	}		
	// must check to see if this lexeme is a reserved word or not
	isReservedWord(_lexeme);
	return _token;
}