Beispiel #1
0
Datei: lexer.c Projekt: 8l/ark-c
void consumeCharacter(Lexer *self) {
	if (self->pos > (int) self->inputLength) {
		errorMessage("Reached end of input, pos(%d) ... len(%d)", self->pos, self->inputLength);
		return;
	}
	// stop consuming if we hit the end of the file
	if(isEndOfInput(self->currentChar)) {
        return;
	}
	else if(self->currentChar == '\n') {
		self->charNumber = 0;	// reset the char number back to zero
		self->lineNumber++;
	}

	self->currentChar = self->input[++self->pos];
	self->charNumber++;
}
Beispiel #2
0
Datei: lexer.c Projekt: 8l/ark-c
void recognizeStringToken(Lexer *self) {
	expectCharacter(self, '"');

	int errpos = self->charNumber;
	int errline = self->lineNumber;
	// just consume everthing
	while (!isString(self->currentChar)) {
		consumeCharacter(self);
		if (isEndOfInput(self->currentChar)) {
			errorMessageWithPosition(self->fileName, errline, errpos, "Unterminated string literal");
		}
	}

	expectCharacter(self, '"');

	pushToken(self, TOKEN_STRING);
}
Beispiel #3
0
Datei: lexer.c Projekt: 8l/ark-c
void recognizeCharacterToken(Lexer *self) {
	expectCharacter(self, '\'');

	int errpos = self->charNumber;
	int errline = self->lineNumber;
	if (self->currentChar == '\'')
		errorMessageWithPosition(self->fileName, self->lineNumber, self->charNumber, "Empty character literal");

	while (!(self->currentChar == '\'' && peekAhead(self, -1) != '\\')) {
		consumeCharacter(self);
		if (isEndOfInput(self->currentChar)) {
			errorMessageWithPosition(self->fileName, errline, errpos, "Unterminated character literal");
		}
	}

	expectCharacter(self, '\'');

	pushToken(self, TOKEN_CHARACTER);
}
Beispiel #4
0
Datei: lexer.c Projekt: 8l/ark-c
void getNextToken(Lexer *self) {
	self->startPos = 0;
	skipLayoutAndComments(self);
	self->startPos = self->pos;

	if (isEndOfInput(self->currentChar)) {
		recognizeEndOfInputToken(self);
		self->running = false;	// stop lexing
		return;
	}
	else if (isDigit(self->currentChar) || (self->currentChar == '.' && isDigit(peekAhead(self, 1)))) {
		// number
		recognizeNumberToken(self);
	}
	else if (isLetterOrDigit(self->currentChar) || self->currentChar == '_') {
		// ident
		recognizeIdentifierToken(self);
	}
	else if (isString(self->currentChar)) {
		// string
		recognizeStringToken(self);
	}
	else if (isCharacter(self->currentChar)) {
		// character
		recognizeCharacterToken(self);
	}
	else if (isOperator(self->currentChar)) {
		// operator
		recognizeOperatorToken(self);
	}
	else if (isEndOfLine(self->currentChar)) {
		recognizeEndOfLineToken(self);
	}
	else if (isSeparator(self->currentChar)) {
		// separator
		recognizeSeparatorToken(self);
	}
	else {
		// errorneous
		recognizeErroneousToken(self);
	}
}
Beispiel #5
0
Datei: lexer.c Projekt: 8l/ark-c
static bool skipLayoutAndCommentsOnce(Lexer *self) {
	while (isLayout(self->currentChar)) {
		consumeCharacter(self);
	}

	while (self->currentChar == '#') {
		consumeCharacter(self);

		while (!isCommentCloser(self->currentChar)) {
			if (isEndOfInput(self->currentChar)) return false;
			consumeCharacter(self);
		}

		while (isLayout(self->currentChar)) {
			consumeCharacter(self);
		}

		return true;
	}

	// consume a block comment and its contents
	if (self->currentChar == '/' && peekAhead(self, 1) == '*') {
		// consume new comment symbols
		consumeCharacter(self);
		consumeCharacter(self);

		while (true) {
			consumeCharacter(self);

			if (isEndOfInput(self->currentChar)) {
				errorMessage("Unterminated block comment");
				return false;
			}

			if (self->currentChar == '*' && peekAhead(self, 1) == '/') {
				// consume the comment symbols
				consumeCharacter(self);
				consumeCharacter(self);

				// eat layout stuff like space etc
				while (isLayout(self->currentChar)) {
					consumeCharacter(self);
				}
				break;
			}
		}

		return true;
	}

	// consume a single line comment
	while ((self->currentChar == '/' && peekAhead(self, 1) == '/')) {
		consumeCharacter(self);	// eat the /
		consumeCharacter(self);	// eat the /

		while (!isCommentCloser(self->currentChar)) {
			if (isEndOfInput(self->currentChar)) return false;
			consumeCharacter(self);
		}

		while (isLayout(self->currentChar)) {
			consumeCharacter(self);
		}

		return true;
	}

	return false;
}
Beispiel #6
0
int nextLexeme(char *s, int *length, int *type){
	char *p = s;
	if(isEndOfInput(*p)){
		*length = 0;
		*type = LT_INV;
		return -1;
	} else if(isNewLine(*p)){
		*length = 1;
		*type = LT_NL;
		return 0;
	} else if(isWhiteSpace(*p)){
		while(isWhiteSpace(*p)){
			p++;
		}
		*length = p - s;
		*type = LT_WS;
		return 0;
	} else if(isNumberBegining(*p, *(p+1))){
		while(isPPNumber(*p)){
			if(isPPNumberExp(*p) && isPPNumberExpSign(*(p+1))){
				p += 2;
			} else {
				p++;
			}
		}
		*length = p - s;
		*type = LT_NUM;
		return 0;
	} else if(isIdentifierStart(*p)){
		while(isIdentifier(*p)){
			p++;
		}
		*length = p - s;
		*type = LT_ID;
		return 0;
	} else if(*p == '\''){
		p++;
		while(isSourceCharSet(*p) && *p != '\''){
			if(!(*p == '\\' && *(p+1) == '\'')){
				p++;
			} else {
				p += 2;
			}
		}
		if(*p == '\''){
			p++;
		} else {
			fprintf(stderr, "invalid token: %02x\n", *p);
			exit(1);
		}
		*length = p - s;
		*type = LT_CC;
		return 0;
	} else if(*p == '\"'){
		p++;
		while(isSourceCharSet(*p) && *p != '\"'){
			if(!(*p == '\\' && *(p+1) == '\"')){
				p++;
			} else {
				p += 2;
			}
		}
		if(*p == '\"'){
			p++;
		} else {
			fprintf(stderr, "invalid string literal token\n");
			exit(1);
		}
		*length = p - s;
		*type = LT_STRLIT;
		return 0;
	} else if(*p == '('){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == ')'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == ','){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == ';'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == '?'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == '['){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == ']'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == '{'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == '}'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == '~'){
		p++;
		*length = 1;
		*type = LT_PUNC;
		return 0;
	} else if(*p == '!'){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '%'){
		p++;
		if(*p == ':'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '>'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '&'){
		p++;
		if(*p == '&'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '*'){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '+'){
		p++;
		if(*p == '+'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '-'){
		p++;
		if(*p == '-'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '>'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '.' && isDigit){
		p++;
		if(*p == '.' && *(p+1) == '.'){
			p += 2;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '/'){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == ':'){
		p++;
		if(*p == '>'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '<'){
		p++;
		if(*p == '%'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == ':'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '<'){
			p++;
			if(*p == '='){
				p++;
				*length = p - s;
				*type = LT_PUNC;
				return 0;
			} else {
				*length = p - s;
				*type = LT_PUNC;
				return 0;
			}
		} else if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '='){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '>'){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '>'){
			p++;
			if(*p == '='){
				p++;
				*length = p - s;
				*type = LT_PUNC;
				return 0;
			} else {
				*length = p - s;
				*type = LT_PUNC;
				return 0;
			}
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '^'){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else if(*p == '|'){
		p++;
		if(*p == '='){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else if(*p == '|'){
			p++;
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		} else {
			*length = p - s;
			*type = LT_PUNC;
			return 0;
		}
	} else {
		p++;
		*length = p - s;
		*type = LT_PUNC;
		return 0;

	}





	return 0;
}