int detectError( char *s ){

    int i;
    int len = (int) strlen( s );

    //Check for single colon token.
    if( len == 1 && s[0] == ':' ){
        printf( "Error: Invalid Symbol: :\n" );
        return 1;
    }

    //Check for invalid symbol.
    for( i = 0; i < len; i++ ){

        if( !isalnum(s[i]) && !isSpecialSymbol( s[i] ) ){
            printf( "Error: Invalid Symbol: %c\n", s[i] );
            return 1;
        }
    }

    //Single letter & single number accepted, so if s > 1.
    if( len > 1 ){

        //Check identifier length.
        if( isalpha( s[0] ) ){
            for( i = 1; i < len; i++ ){
                if( i > MAX_ID_LEN ){
                    printf( "Error: Variable name: %s is too long.\n", s );
                    return 1;
                }
            }
        }

        if( isdigit( s[0] ) ){

            //Check invalid identifier (starts with number).
            for( i = 1; i < len; i++ ){

                if( isalpha( s[i] ) ){
                    printf( "Error: Variable name: %s begins with a number.\n", s );
                    return 1;
                }

            }

            //Check number length.
            if( i > MAX_NUM_LEN ){
                printf( "Error: Number %s is too long.\n", s );
                return 1;
            }

        }

    }

    return 0;

} //End of detectError.
Exemplo n.º 2
0
enum status typeCheck(char* lookAhead)
{
	if (isLetter(*lookAhead))
		return ID;
	if (isDigit(*lookAhead))
		return DIGIT;
	if (isWhiteSpace(*lookAhead))
		return WHITE_SPACE;
	if (isSpecialSymbol(*lookAhead))
		return SPECIAL_SYMBOL;
	if (*lookAhead == -1)
		return ENDFILE;

	return ERROR;
}
int main( void ){

    int i; //Loop counter.
    int tempTokCtr = 0; //Count number of tokens.

    char buffer[ 10000 ]; //For reading in tokens.
    char temp[ 10000 ]; //Back-up buffer.

    //Prompt user for source program name.
    char sp[50]; //Name of input file.
    printf( "What is the file name for your source program?\n" );
    scanf( "%s", sp );

    //Open file input stream to main input (the program).
    FILE *fin = fopen( sp, "r" );

    //Open file output stream to intermediary output file.
    //In this file, unprocessed tokens will be stored for further processing.
    FILE *temp_fout = fopen( "temp_output.txt", "w" );

    //Procedure for creating intermediary output.
    /** BEGIN PROCEDURE **/
    while( fscanf( fin, "%s", buffer ) != EOF ){

        int len = (int) strlen( buffer ); //Length of read-in token.

        //For the length of the read-in token:
        for( i = 0; i < len; i++ ){

            //If we've reached the end of the buffer, and it's not a special symbol, print the token.
            //If invalid symbols are present, they're ignored and taken care of later.
            if( i == len-1 && !isSpecialSymbol( buffer[ i ] ) ){
                fprintf( temp_fout, "%s ", buffer );
                tempTokCtr++; //Increment the token counter.
            }

            //If the i-th element of the token is a special symbol.
            else if( isSpecialSymbol( buffer[ i ] ) ){

                //Brute force examine particular token cases: comment tokems, not equal, less-than-or-equal, ... , null, and odd.
                if( len > 1 ){
                    if( buffer[ i ] == '/' && buffer[ i+1 ] == '*' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "/* " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '*' && buffer[ i+1 ] == '/' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "*/ " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '!' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "!= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '<' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "<= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == '>' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, ">= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( buffer[ i ] == ':' && buffer[ i+1 ] == '=' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, ":= " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+2, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( len > 3 && buffer[ i ] == 'n' && buffer[ i+1 ] == 'u' && buffer[ i+2 ] == 'l' && buffer[ i+3 ] =='l' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "null " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+4, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }
                    else if( len > 2 && buffer[ i ] == 'o' && buffer[ i+1 ] == 'd' && buffer[ i+2 ] == 'd' ){
                        strncpy( temp, buffer, i );
                        temp[ i ] = '\0';

                        fprintf( temp_fout, "%s ", temp );
                        tempTokCtr++;

                        fprintf( temp_fout, "odd " );
                        tempTokCtr++;

                        strncpy( buffer, &buffer[i]+3, len-1 );
                        buffer[ len - 1 ] = '\0';
                        len = (int) strlen( buffer );
                        i = -1;
                    }

                }

                //Special symbol token at the beginning.
                if( i == 0 ){
                    //Print symbol as individual token.
                    fprintf( temp_fout, "%c ", buffer[i] );
                    tempTokCtr++; //Increment token counter.

                    //Shift buffer, and update loop index.
                    //Similar procedure used for brute force token examination, and further token examination.
                    strncpy( buffer, &buffer[i]+1, len-1 );
                    buffer[ len - 1 ] = '\0';
                    len = (int) strlen( buffer );
                    i = -1;
                }

                //Valid symbol is splitting tokens within the buffer; split, print, and shift the tokens left of the valid symbol, including the symbol.
                if( i > 0 ){
                    strncpy( temp, buffer, i );
                    temp[ i ] = '\0';

                    fprintf( temp_fout, "%s ", temp );
                    tempTokCtr++;

                    fprintf( temp_fout, "%c ", buffer[i] );
                    tempTokCtr++;

                    strncpy( buffer, &buffer[i]+1, len-1 );
                    buffer[ len - 1 ] = '\0';
                    len = (int) strlen( buffer );
                    i = -1;
                }

            }

        }

    }
    /** END OF PROCEDURE **/

    //Resource management.
    fclose( temp_fout );
    fclose( fin );

    //Open file input stream to intermediary output created by the procedure above (unprocessed tokens).
    fin = fopen( "temp_output.txt", "r" );
    FILE *table = fopen( "lexeme_table.txt", "w" ); //Will hold the lexeme table.
    FILE *list = fopen( "lexeme_list.txt", "w" ); //Will hold the lexeme list.

    //Print header for lexeme table.
    fprintf( table, "Lexeme Table:\n%15s%15s\n", "lexeme", "token type");

    //Procedure to examine printed tokens.

    /** BEGIN PROCEDURE **/

    //Initialize.
    int isResWord = -1; //If currently examined token is a reserved word, != -1; otherwise, = -1.
    int len = 0; //Length of examined token.
    int isComment = 0; //If currently reading in a comment, = 1; otherwise, = 0.

    //For the length of tokens.
    while( fscanf( fin, "%s ", buffer ) != EOF ){

        //fscanf( fin, "%s", buffer ); //Read in token.
        len = (int) strlen( buffer ); //Get token string length.

        //Recognize beginning of comment block.
        if( !strcmp( buffer, "/*" ) ){
            isComment = 1;
        }

        //Recognize end of comment block.
        else if( isComment == 1 && !strcmp( buffer, "*/" ) ){
            isComment = 0;
        }

        //Only read in tokens while not examining a comment block.
        else if( !isComment ){

            //Error detection by token.
            if( detectError( buffer ) ){
                fclose( fin );
                fclose( table );
                fclose( list );
                exit( 0 );
            }

            //Is the current token a reserved word? Print to table and list appropriately.
            isResWord = isReservedWord( buffer );

            if( isResWord != -1 ){

                fprintf( table, "%15s%15d\n", buffer, isResWord );
                fprintf( list, "%d ", isResWord );

            }

            //Is the current token an identifier? Print to table and list appropriately.
            else if( isalpha( buffer[0] ) ){

                fprintf( table, "%15s%15d\n", buffer, 2 );
                fprintf( list, "2 %s ", buffer );

            }

            //Is the current token a number? Print to table and list appropriately.
            else if( isdigit( buffer[0] ) ){

                fprintf( table, "%15s%15d\n", buffer, 3 );
                fprintf( list, "3 %s ", buffer );

            }

            //Otherwise, our token is some other valid symbol (checked for errors above). Print appropriately.
            else{

                fprintf( table, "%15s%15d\n", buffer, isOtherValid( buffer ) );
                fprintf( list, "%d ", isOtherValid( buffer ) );

            }
        }


    }

    //Resource management.
    fclose( fin );
    fclose( table );
    fclose( list );

    /** END OF PROCEDURE **/

    //Procedure to copy and source program.
    /** BEGIN PROCEDURE **/

    fin = fopen( sp, "r" ); //File containing source program.
    FILE *source = fopen( "source_program.txt", "w" ); //File to copy source program to.
    char copy; //Used to copy file, character by character.

    //Copy character by character, until end of file.
    while( ( copy = fgetc( fin ) ) != EOF )
        fputc( copy, source );

    //Resource management.
    fclose( fin );
    fclose( source );
    remove( "temp_output.txt" ); //Delete intermediary output.

    /** END OF PROCEDURE **/

    return 0;

} //End of main.
Exemplo n.º 4
0
int scanner(char* fileName)
{
	tokenPointer tokenList = NULL;

	FILE* sourceCode = fopen(fileName, "r");
	char lookAhead; /* 탐색을 위한 Character */

	if (sourceCode == NULL) /* 파일 입출력 실패 */
	{
		printf("File open fail\n");
		return FALSE;
	}
	else /* 파일 입출력 성공 */
	{
		printf("=====File open successfully=====\n");

		/* File read */
		while ((lookAhead = fgetc(sourceCode)) != EOF) /* lookAhead로 문자열을 하나씩 컨슘함 */
		{
			/* 타입 체크 */
			if (type != COMMENT)
				type = typeCheck(&lookAhead);

			do
			{
				noConsume = FALSE; /* 서로 다른 타입의 심볼이 붙어 있는지 판별 */

				switch (type)
				{
				case WHITE_SPACE:
					break;

				case COMMENT: /* 코멘트일 경우 끝날 때까지 모두 컨슘 */
					checkInComment(sourceCode, &lookAhead, &line, &type);
					break;

				case ID:
					if (checkInID(sourceCode, &lookAhead, identifier))
						makeToken(&tokenList, identifier, type, line, &count);

					if (!isLetter(lookAhead) && !isWhiteSpace(lookAhead))
					{
						noConsume = TRUE;
						type = typeCheck(&lookAhead);
					}
					break;

				case DIGIT:
					if (checkInDigit(sourceCode, &lookAhead, identifier))
						makeToken(&tokenList, identifier, type, line, &count);

					if (!isDigit(lookAhead) && !isWhiteSpace(lookAhead))
					{
						noConsume = TRUE;
						type = typeCheck(&lookAhead);
					}
					break;

				case SPECIAL_SYMBOL:
					switch (checkInSpecialSymbol(sourceCode, &lookAhead, identifier))
					{
					case TRUE: /* 일반적인 특수문자 일 경우 */
						makeToken(&tokenList, identifier, type, line, &count);
						break;

					case COMMENT: /* 코멘트일 경우 */
						type = COMMENT;
						break;

					case ERROR: /* 에러가 났을 경우 */
						type = ERROR;
						break;
					}

					if (!isSpecialSymbol(lookAhead) && !isWhiteSpace(lookAhead))
					{
						noConsume = TRUE;
						type = typeCheck(&lookAhead);
					}
					break;

				case ERROR:
					checkInError(sourceCode, &lookAhead, identifier);
					makeToken(&tokenList, identifier, ERROR, line, &count);
					type = typeCheck(&lookAhead);

					if (type != ERROR) /* 에러를 처리한 후 타입이 에러가 아니라도 미리 앞을 봤으므로 컨슘을 또 하지 않음 */
						noConsume = TRUE;
					break;
				}
				
				if (lookAhead == '\n')
					line++;

			} while (noConsume || type == ERROR); /* 서로 다른 타입의 심볼이 붙어 있을 경우, 에러일 경우 fgetc로 컨슘 하지 않고 바로 다시 체크 */
		}

		printOut(tokenList, sourceCode);

		if (fclose(sourceCode) == 0)
			printf("=====File close successfully=====\n");
		else
			printf("=====File close fail=====\n");
	}
}