C++ (Cpp) identifyToken 예제들

예제 #1

0

파일 보기

파일: Compiler.c 프로젝트: mcastagna/PL0-Compiler

//Functions accepts program as input file and generates lexeme list as output
int scan(FILE* input)
{
    FILE* lex;

    lex = fopen("lexlist.txt", "w");

    if(input == NULL){
        fprintf(output, "File does not exist.\n");
        return -1;
    }

    char file[2000];
    int i = 0;

    while(!feof(input))
    {
        fscanf(input, "%c", &file[i]);
        i++;
    }

//Declare arrays to store symbolic values, numbers, and identifiers
    char program[i-1];
    int lexemeList[i-1];
    int numberList[i-1];
    int currLex = 0;
    int currNum = 0;
    int currID = 0;
    identifier* idList = NULL;

//Copy all of file into properly sized character array
    int j;
    for (j = 0; j < i-1; j++)
        program[j] = file[j];

    char token[200];

    int t = 0;

    //Read each character in program
    for (j = 0; j < i-1; j++)
    {
        if (is_alpha(program[j])) //Letter and numbers are added to current token until symbol or space is reached
        {
            token[t] = program[j];
            t++;
        }
        else if (is_digit(program[j]))
        {
            token[t] = program[j];
            t++;
        }
        else //When symbol or space is reached
        {
            int tokenVal;
            if (t != 0) //If the token is one or more characters in length
            {
                //Add null character to terminate string
                token[t] = '\0';

                //Get symbol value for token
                tokenVal = identifyToken(lex, token, t, lexemeList, &currLex, numberList, &currNum, &idList, &currID);

                //If the token was invalid, the program terminates
                if (tokenVal == -1)
                    return -1;
            }
            if (!is_space(program[j])) //If one symbol has been read that is not white space, get its symbolic value
                j = identifySymbol(lex, program, i-1, j, lexemeList, &currLex);

            //If the symbol was invalid, the program terminates
            if (j == -1)
                return -1;

            t = 0;
        }
    }

    //Print list of lexemes with numbers and identifiers
    printLexList(lex, lexemeList, currLex, numberList, currNum, idList, currID);

    fclose(lex);

    return 1;
}

예제 #2

0

파일 보기

파일: Tokenizer.cpp 프로젝트: BackupTheBerlios/escript-svn

/**
 *   Reads the next Token from prog beginning with position cursor and moves
 *   cursor to the next Token.
 */
Token * Tokenizer::readNextToken(const char * prog, int & cursor,int &line,size_t & startPos,tokenList_t & tokens)  throw (Exception *) {

	char c=prog[cursor];

	// Step over whitespace characters
	while ( isWhitechar(c) || c=='\0') {
		if (c=='\n') line++;
		if (c=='\0')
			return new TEndScript();
		cursor++;
		c=prog[cursor];
	}
	startPos = static_cast<size_t>(cursor);

	// Multiline Comment
	// Returns 0 if a comment is read.
	if (c=='/'&& prog[cursor+1]=='*') {
		cursor+=2;
		if (prog[cursor]=='\0')
			throw(new Error("Unclosed Comment",line));

		while (true) {
			if (prog[cursor]=='\n') line++;
			cursor++;
			if ( prog[cursor] =='/' && prog[cursor-1] =='*') {
				cursor++;
				return NULL;
				//return new TEndCommand(); // Sure of this?
			}

			if (prog[cursor]=='\0')
				throw(new Error("Unclosed Comment",line));

		}
	}
	// SingleLine Comment
	// Returns 0 if a comment is read.
	else  if (c=='/'&& prog[cursor+1]=='/') {
		cursor++;
		while (true) {
			if (prog[cursor]=='\0'||prog[cursor]=='\n')
				return NULL;
			cursor++;
		}
	}
	// Numbers
	else if (isNumber(c)) {
		int to=cursor;
		double number=StringUtils::getNumber(prog,to);
		if (to>cursor && !isChar(prog[to])) {
			cursor=to;
			return new TObject(Number::create(number));
		} else {
			std::cout << number ;
			throw(new Error(  std::string("Syntax Error in Number."),line));
		}

		// Identifiers, Control commands, true/false
	} else if (isChar(c)) {
		std::string accum;
		while ( isNumber(c) || isChar(c)) {
			accum+=c;
			cursor++;
			c=prog[cursor];
		}
		identifierId id=EScript::stringToIdentifierId(accum);
		Token * o=identifyToken(id);
		if (o!=NULL) {
			return o->clone();
		}else if (id==Consts::IDENTIFIER_LINE) { // __LINE__
			return new TObject(Number::create(line));
		}  else  {
			const Operator *op=Operator::getOperator(id);
			if (op!=NULL)
				return new TOperator(op);
			return new TIdentifier(id);
		}
	} else if (c==';') {
		cursor++;
		return new TEndCommand();
	} else if (c=='{') {
		cursor++;
		return new TStartBlock();
	} else if (c=='}') {
		cursor++;
		return new TEndBlock();
	} else if (c=='(') {
		cursor++;
		return new TStartBracket();
	} else if (c==')') {
		cursor++;
		return new TEndBracket();
	} else if (c==',') {
		cursor++;
		return new TDelimiter();
	} else if (c=='[') {
		cursor++;
		return new TStartIndex();
	} else if (c==']') {
		cursor++;
		return new TEndIndex();
	} else if (c==':' && prog[cursor+1]!='=' && prog[cursor+1]!=':' ) {
		cursor++;
		return new TColon();
	} else if (c=='$' && isChar(prog[cursor+1]) ){
	c=prog[++cursor]; // consume '$'
	std::string accum;
		while ( isNumber(c) || isChar(c)) {
			accum+=c;
			cursor++;
			c=prog[cursor];
		}
//        std::cout << "FOUND ID :"<<accum<<":"<<cursor<<"\n";
		return new TObject(Identifier::create(accum));

	} else if ( isOperator(c) ) {
		int i=cursor;
		std::string accum;
		while (isOperator(c)) {
			accum+=c;
			i++;
			c=prog[i];
			//  if(accum!="-"&& c=='-')
			//      break;
		}
		int size=accum.size();
		const Operator * op=NULL;
		while (true) {
			std::string ops=accum.substr(0,size);
			op=Operator::getOperator(ops);
			if (op!=NULL) {
				cursor+=size;
				//if(size>1) cursor--;
				break;
			}
			size--;
			if (size<=0) {
				std::cout  << std::endl<< accum << std::endl;
				throw(new Error(std::string("Unknown Operator: ")+accum,line));
			}
		}
		// test for unary minus
		if (op->getString()=="-") {
			Token * last=tokens.size()>0?tokens.at(tokens.size()-1).get():NULL; // Bugfix[BUG:20090107]
			if ( last==NULL ||
					(!(dynamic_cast<TEndBracket *>(last) || dynamic_cast<TEndIndex *>(last)||
					   dynamic_cast<TIdentifier *>(last) || dynamic_cast<TObject *>(last)))){
//					dynamic_cast<Number *>(last)|| dynamic_cast<String *>(last)||dynamic_cast<Bool *>(last)))){
				// TODO ++,--

				op=Operator::getOperator("_-");
			}
		}

		return new TOperator(op);
	}
	// String: ".*" | '.*'
	else if (c=='"' || c=='\'') {
		char stringEncloser=c;

		cursor++;
		c=prog[cursor];
		std::ostringstream s;

		while (c!='\0' && c!= stringEncloser) {
			if (c=='\n')
				line++;
			if (c=='\\' ) { // http://de.wikipedia.org/wiki/Steuerzeichen
				cursor++;
				c=prog[cursor];
				if (c==0)
					throw(new Error(std::string("Unclosed String. 1")+s.str().substr(0,10),line));
				else if (c=='0') // NULL
					c='\0';
				else if (c=='a') // BELL
					c='\a';
				else if (c=='b') // BACKSPACE
					c='\b';
				else if (c=='n') // LINEFEED
					c='\n';
				else if (c=='r') // CARRIAGE RETURN
					c='\r';
				else if (c=='t') // TAB
					c='\t';
				else if (c=='\\')
					c='\\';
				else if (c=='"')
					c='\"';
				else if (c=='\'')
					c='\'';
			}
			s<<c;
			cursor++;
			c=prog[cursor];
		}
		if (c=='\0')
			throw(new Error(std::string("Unclosed String. 2")+s.str().substr(0,10),line));
		cursor++;
		return new TObject(String::create(s.str()));
	}
//        else if (c=='.') {
//            //cursor++;
//            return new TDot();
//        }
	//    cursor++;
	//std::cout << "\n"<<(int)(prog+cursor);
	std::cout << " \a !!!";
	throw(new Error(std::string("Unknown Syntax Error near \n...")+(prog+ (cursor>10?(cursor-10):0) ),line));
	return new Token();
}

예제 #3

0

파일 보기

파일: machineTest.c 프로젝트: christian-mann/compilers_lexer

int main() {
	char *valid[] = {
		"id34567890",
		"1234567890",
		"12345.12345",
		"12345.02345",
		"12345.12345E11",
		"12345.12345E+11",
		"12345.12345E-11",
		">",
		"<",
		":=",
		" ",
		"\n",
		"if",
		"[",
		"(",
		":",
		";",
		"..",
	};
	char *invalid[] = {
		"id345678901",
		"12345678901",
		"02345678901",
		"12345.123451",
		"12345.12345E111",
		"12345.12345E+111",
		"12345.12345E-01",
	};
	char *extra[] = {
		"...",
		"12345E11",
	};

	ReservedWordList *rwl = parseResWordFile(fopen("../../data/reserved-words.txt", "r"));
	SymbolTable *s = (SymbolTable*)malloc(sizeof(SymbolTable));
	for(int i = 0; i < sizeof(valid)/sizeof(valid[0]); i++) {
		MachineResult res = identifyToken(valid[i], rwl, s);
		if(res.error || *res.newString != 0) {
			printf("Something went wrong with string %s: ", valid[i]);
			printf("{%s, %s, %d, %d, %d}\n", res.lexeme, res.newString, res.type, res.attribute, res.error);
		} else {
			//printf("%s -> %s\n", res.lexeme, convertConstantToString(res.type));
		}
	}

	for(int i = 0; i < sizeof(invalid)/sizeof(invalid[0]); i++) {
		MachineResult res = identifyToken(invalid[i], rwl, s);
		if(!res.error) {
			printf("Something went wrong with string %s: ", invalid[i]);
			printf("{%s, %s, %d, %d, %d}\n", res.lexeme, res.newString, res.type, res.attribute, res.error);
		} else {
			//printf("%s -> %s, %s\n", invalid[i], convertConstantToString(res.type), convertConstantToString(res.error));
		}
	}

	for(int i = 0; i < sizeof(extra)/sizeof(extra[0]); i++) {
		MachineResult res = identifyToken(extra[i], rwl, s);
		if(res.newString == 0) {
			printf("Something went wrong with string %s: ", extra[i]);
			printf("{%s, %s, %d, %d, %d}\n", res.lexeme, res.newString, res.type, res.attribute, res.error);
		} else {
			//printf("%s -> %s, %s\n", extra[i], res.lexeme, res.newString);
		}
	}
	printf("If no other output, all tests passed.\n");
	return 0;
}

예제 #4

0

파일 보기

파일: lexer.c 프로젝트: christian-mann/compilers_lexer

int main(int argc, char **argv) {
	//open the files given as an argument
	char *sfSrc, *sfListing, *sfToken, *sfSymbolTable, *sfReservedWords;
	sfSrc = sfListing = sfToken = sfSymbolTable = sfReservedWords = NULL;

	FILE *fSrc, *fListing, *fToken, *fSymbolTable, *fReservedWords;
	fSrc = fListing = fToken = fSymbolTable = fReservedWords = NULL;

	// format: ./a.out -r sfReservedWords [-l sfListing] [-t sfToken] [[-s] sfSrc]
	int i;
	for(i = 0; i < argc; i++) {
		if(!strcmp(argv[i], "-l")) {
			sfListing = argv[i+1];
		}
		if(!strcmp(argv[i], "-t")) {
			sfToken = argv[i+1];
		}
		if(!strcmp(argv[i], "-r")) {
			sfReservedWords = argv[i+1];
		}
		if(!strcmp(argv[i], "-s")) {
			sfSymbolTable = argv[i+1];
		}
		if(i == argc-1) {
			sfSrc = argv[i];
		}
	}
	if(!sfSrc || (fSrc = fopen(sfSrc, "r")) == NULL) {
		fprintf(stderr, "Warning: source file not given or not found, using stdin.\n");
		fSrc = stdin;
	}
	if(!sfToken || (fToken = fopen(sfToken, "w")) == NULL) {
		fprintf(stderr, "Warning: token file not given or not found, using stdout.\n");
		fToken = stdout;
	}
	if(!sfSymbolTable || (fSymbolTable = fopen(sfSymbolTable, "w")) == NULL) {
		fprintf(stderr, "Warning: symbol table file not given or not found, not outputting symbol table.\n");
	}
	if(!sfListing || (fListing = fopen(sfListing, "w")) == NULL) {
		fprintf(stderr, "Warning: listing file not given or not found, not outputting listing.\n");
	}


	//create symbol table
	machinesInit(sfReservedWords);

	//begin reading a line at a time
	char sLine[80];
	int cLine = 1;
	bool eof = false;
	while(!eof) {
		fgets(sLine, sizeof(sLine), fSrc);
		if(feof(fSrc)) {
			sLine[0] = EOF;
			sLine[1] = 0;
		}
		int length = strlen(sLine);
		if(fListing && !feof(fSrc)) fprintf(fListing, "%d\t%s", cLine, sLine);


		//split line into tokens
		char *psLine = sLine;
		while(psLine < sLine + length && fToken) {
			MachineResult res = identifyToken(psLine);
			if(res.type == T_WS) { //we don't care about whitespace

			} else if(res.type == T_ID) {
				fprintf(fToken, "%d\t%s\t%s\t%p\n", cLine, res.lexeme, convertConstantToString(res.type), res.pointer);
				if(res.error && fListing) {
					fprintf(fListing, "%s:\t%p:\t%s\n", convertConstantToString(res.type), res.pointer, res.lexeme);
				}
			} else if(res.type == T_EOF) {
				fprintf(fToken, "%d\t%s\t%s\t%s\n", cLine, "(EOF)", convertConstantToString(res.type), convertConstantToString(res.attribute));
				eof = true;
			} else {
				fprintf(fToken, "%d\t%s\t%s\t%s\n", cLine, res.lexeme, convertConstantToString(res.type), convertConstantToString(res.attribute));
				if(res.error && fListing) {
					fprintf(fListing, "%s:\t%s:\t%s\n", convertConstantToString(res.type), convertConstantToString(res.attribute), res.lexeme);
				}
			}
			psLine = res.newString;
			free(res.lexeme);
		}
		cLine++;
	}

	//print symbol table
	if(fSymbolTable) {
		for(SymbolTable* s = tab; s && s->entry && s->entry->word; s = s->next) {
			fprintf(fSymbolTable, "%s\t%p\n", s->entry->word, s->entry);
		}
	}
}