Exemple #1
0
/**
 * gsdl_tokenizer_next:
 * @self: A valid %GSDLTokenizer.
 * @result: (out callee-allocates): A %GSDLToken to initialize and fill in.
 * @err: (out) (allow-none): Location to store any error, may be %NULL.
 *
 * Fetches the next token from the input. Depending on the source of input, may set an error in one
 * of the %GSDL_SYNTAX_ERROR, %G_IO_CHANNEL_ERROR, or %G_CONVERT_ERROR domains.
 *
 * Returns: Whether a token could be successfully read.
 */
bool gsdl_tokenizer_next(GSDLTokenizer *self, GSDLToken **result, GError **err) {
	gunichar c, nc;
	int line;
	int col;

	retry:
	line = self->line;
	col = self->col;
	if (!_read(self, &c, err)) return false;

	if (G_UNLIKELY(c == EOF)) {
		*result = _maketoken(T_EOF, line, col);
		return true;
	} else if (c == '\r') {
		if (_peek(self, &c, err) && c == '\n') _consume(self);

		*result = _maketoken('\n', line, col);
		FAIL_IF_ERR();

		return true;
	} else if ((c == '/' && _peek(self, &nc, err) && nc == '/') || (c == '-' && _peek(self, &nc, err) && nc == '-') || c == '#') {
		if (c != '#') _consume(self);
		while (_peek(self, &c, err) && !(c == '\n' || c == EOF)) _consume(self);

		goto retry;
	} else if (c == '/' && _peek(self, &nc, err) && nc == '*') {
		while (_read(self, &c, err)) {
			if (c == EOF) {
				_set_error(err,
					self,
					GSDL_SYNTAX_ERROR_UNEXPECTED_CHAR,
					"Unterminated comment"
				);

				return false;
			} else if (c == '*' && _peek(self, &c, err) && c == '/') {
				_consume(self);
				break;
			}
		}

		goto retry;
	} else if (c < 256 && strchr("-+:;./{}=\n", (char) c)) {
		*result = _maketoken(c, line, col);
		return true;
	} else if (c < 256 && isdigit((char) c)) {
		*result = _maketoken(T_NUMBER, line, col);
		return _tokenize_number(self, *result, c, err);
	} else if (g_unichar_isalpha(c) || g_unichar_type(c) == G_UNICODE_CONNECT_PUNCTUATION || g_unichar_type(c) == G_UNICODE_CURRENCY_SYMBOL) {
		*result = _maketoken(T_IDENTIFIER, line, col);
		return _tokenize_identifier(self, *result, c, err);
	} else if (c == '[') {
		*result = _maketoken(T_BINARY, line, col);
		if (!_tokenize_binary(self, *result, err)) return false;

		REQUIRE(_read(self, &c, err));
		if (c == ']') {
			return true;
		} else {
			_set_error(err,
				self,
				GSDL_SYNTAX_ERROR_MISSING_DELIMITER,
				"Missing ']'"
			);
			return false;
		}
	} else if (c == '"') {
		*result = _maketoken(T_STRING, line, col);
		if (!_tokenize_string(self, *result, err)) return false;

		REQUIRE(_read(self, &c, err));
		if (c == '"') {
			return true;
		} else {
			_set_error(err,
				self,
				GSDL_SYNTAX_ERROR_MISSING_DELIMITER,
				"Missing '\"'"
			);
			return false;
		}
	} else if (c == '`') {
		*result = _maketoken(T_STRING, line, col);
		if (!_tokenize_backquote_string(self, *result, err)) return false;

		REQUIRE(_read(self, &c, err));
		if (c == '`') {
			return true;
		} else {
			_set_error(err,
				self,
				GSDL_SYNTAX_ERROR_MISSING_DELIMITER,
				"Missing '`'"
			);
			return false;
		}
	} else if (c == '\'') {
		*result = _maketoken(T_CHAR, line, col);
		(*result)->val = g_malloc0(4);

		_read(self, &c, err);

		if (c == '\\') {
			_read(self, &c, err);

			switch (c) {
				case 'n': c = '\n'; break;
				case 'r': c = '\r'; break;
				case 't': c = '\t'; break;
				case '"': c = '"'; break;
				case '\'': c = '\''; break;
				case '\\': c = '\\'; break;
			}
		}

		g_unichar_to_utf8(c, (*result)->val); 

		REQUIRE(_read(self, &c, err));
		if (c == '\'') {
			return true;
		} else {
			_set_error(err,
				self,
				GSDL_SYNTAX_ERROR_MISSING_DELIMITER,
				"Missing \"'\""
			);
			return false;
		}
	} else if (c == '\\' && _peek(self, &nc, err) && (nc == '\r' || nc == '\n')) {
		_consume(self);

		if (c == '\r') _read(self, &c, err);

		goto retry;
	} else if (c == ' ' || c == '\t') {
		// Do nothing
		goto retry;
	} else {
		_set_error(err,
			self,
			GSDL_SYNTAX_ERROR_UNEXPECTED_CHAR,
		   	g_strdup_printf("Invalid character '%s'(%d)", g_ucs4_to_utf8(&c, 1, NULL, NULL, NULL), c)
		);
		return false;
	}
}
Exemple #2
0
/*
 * tokenize the input string
 */
static struct scfg_token_queue *
_tokenize(char *s)
{
    struct scfg_token_queue *tq;
    int ret;

    /* New token queue */
    tq = scfg_token_queue_new();
    if ( NULL == tq ) {
        return NULL;
    }

    while ( '\0' != *s ) {
        /* Skip white space */
        _skip_whitespaces(&s);

        if ( '\0' == *s ) {
            break;
        }

        switch ( *s ) {
        case '{':
            /* LBRACE */
            ret = _token_queue_append_single(tq, TOK_LBRACE);
            if ( 0 != ret ) {
                /* Error */
                scfg_token_queue_delete(tq);
                return NULL;
            }
            s++;
            break;
        case '}':
            /* RBRACE */
            ret = _token_queue_append_single(tq, TOK_RBRACE);
            if ( 0 != ret ) {
                /* Error */
                scfg_token_queue_delete(tq);
                return NULL;
            }
            s++;
            break;
        case '(':
            /* LPAREN */
            ret = _token_queue_append_single(tq, TOK_LPAREN);
            if ( 0 != ret ) {
                /* Error */
                scfg_token_queue_delete(tq);
                return NULL;
            }
            s++;
            break;
        case ')':
            /* RPAREN */
            ret = _token_queue_append_single(tq, TOK_RPAREN);
            if ( 0 != ret ) {
                /* Error */
                scfg_token_queue_delete(tq);
                return NULL;
            }
            s++;
            break;
        case ';':
            /* SEMICOLON */
            ret = _token_queue_append_single(tq, TOK_SEMICOLON);
            if ( 0 != ret ) {
                /* Error */
                scfg_token_queue_delete(tq);
                return NULL;
            }
            s++;
            break;
        case '#':
            /* Comment */
            _skip_linecomment(&s);
            break;
        default:
            /* Comment, keyword or string */
            if ( '/' == *s && '/' == *(s+1) ) {
                _skip_linecomment(&s);
            } else if ( '/' == *s && '*' == *(s+1) ) {
                _skip_blockcomment(&s);
            } else if ( _isckeyword(*s) ) {
                ret = _tokenize_keword(tq, &s);
                if ( 0 != ret ) {
                    /* Error */
                    scfg_token_queue_delete(tq);
                    return NULL;
                }
            } else if ( '"' == *s || '\'' == *s ) {
                ret = _tokenize_string(tq, &s);
                if ( 0 != ret ) {
                    /* Error */
                    scfg_token_queue_delete(tq);
                    return NULL;
                }
            } else {
                /* Tokenize error */
                scfg_token_queue_delete(tq);
                return NULL;
            }
            break;
        }
    }

    return tq;
}