/** * gsdl_tokenizer_next: * @self: A valid %GSDLTokenizer. * @result: (out callee-allocates): A %GSDLToken to initialize and fill in. * @err: (out) (allow-none): Location to store any error, may be %NULL. * * Fetches the next token from the input. Depending on the source of input, may set an error in one * of the %GSDL_SYNTAX_ERROR, %G_IO_CHANNEL_ERROR, or %G_CONVERT_ERROR domains. * * Returns: Whether a token could be successfully read. */ bool gsdl_tokenizer_next(GSDLTokenizer *self, GSDLToken **result, GError **err) { gunichar c, nc; int line; int col; retry: line = self->line; col = self->col; if (!_read(self, &c, err)) return false; if (G_UNLIKELY(c == EOF)) { *result = _maketoken(T_EOF, line, col); return true; } else if (c == '\r') { if (_peek(self, &c, err) && c == '\n') _consume(self); *result = _maketoken('\n', line, col); FAIL_IF_ERR(); return true; } else if ((c == '/' && _peek(self, &nc, err) && nc == '/') || (c == '-' && _peek(self, &nc, err) && nc == '-') || c == '#') { if (c != '#') _consume(self); while (_peek(self, &c, err) && !(c == '\n' || c == EOF)) _consume(self); goto retry; } else if (c == '/' && _peek(self, &nc, err) && nc == '*') { while (_read(self, &c, err)) { if (c == EOF) { _set_error(err, self, GSDL_SYNTAX_ERROR_UNEXPECTED_CHAR, "Unterminated comment" ); return false; } else if (c == '*' && _peek(self, &c, err) && c == '/') { _consume(self); break; } } goto retry; } else if (c < 256 && strchr("-+:;./{}=\n", (char) c)) { *result = _maketoken(c, line, col); return true; } else if (c < 256 && isdigit((char) c)) { *result = _maketoken(T_NUMBER, line, col); return _tokenize_number(self, *result, c, err); } else if (g_unichar_isalpha(c) || g_unichar_type(c) == G_UNICODE_CONNECT_PUNCTUATION || g_unichar_type(c) == G_UNICODE_CURRENCY_SYMBOL) { *result = _maketoken(T_IDENTIFIER, line, col); return _tokenize_identifier(self, *result, c, err); } else if (c == '[') { *result = _maketoken(T_BINARY, line, col); if (!_tokenize_binary(self, *result, err)) return false; REQUIRE(_read(self, &c, err)); if (c == ']') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing ']'" ); return false; } } else if (c == '"') { *result = _maketoken(T_STRING, line, col); if (!_tokenize_string(self, *result, err)) return false; REQUIRE(_read(self, &c, err)); if (c == '"') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing '\"'" ); return false; } } else if (c == '`') { *result = _maketoken(T_STRING, line, col); if (!_tokenize_backquote_string(self, *result, err)) return false; REQUIRE(_read(self, &c, err)); if (c == '`') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing '`'" ); return false; } } else if (c == '\'') { *result = _maketoken(T_CHAR, line, col); (*result)->val = g_malloc0(4); _read(self, &c, err); if (c == '\\') { _read(self, &c, err); switch (c) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case '"': c = '"'; break; case '\'': c = '\''; break; case '\\': c = '\\'; break; } } g_unichar_to_utf8(c, (*result)->val); REQUIRE(_read(self, &c, err)); if (c == '\'') { return true; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_MISSING_DELIMITER, "Missing \"'\"" ); return false; } } else if (c == '\\' && _peek(self, &nc, err) && (nc == '\r' || nc == '\n')) { _consume(self); if (c == '\r') _read(self, &c, err); goto retry; } else if (c == ' ' || c == '\t') { // Do nothing goto retry; } else { _set_error(err, self, GSDL_SYNTAX_ERROR_UNEXPECTED_CHAR, g_strdup_printf("Invalid character '%s'(%d)", g_ucs4_to_utf8(&c, 1, NULL, NULL, NULL), c) ); return false; } }
/* * tokenize the input string */ static struct scfg_token_queue * _tokenize(char *s) { struct scfg_token_queue *tq; int ret; /* New token queue */ tq = scfg_token_queue_new(); if ( NULL == tq ) { return NULL; } while ( '\0' != *s ) { /* Skip white space */ _skip_whitespaces(&s); if ( '\0' == *s ) { break; } switch ( *s ) { case '{': /* LBRACE */ ret = _token_queue_append_single(tq, TOK_LBRACE); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } s++; break; case '}': /* RBRACE */ ret = _token_queue_append_single(tq, TOK_RBRACE); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } s++; break; case '(': /* LPAREN */ ret = _token_queue_append_single(tq, TOK_LPAREN); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } s++; break; case ')': /* RPAREN */ ret = _token_queue_append_single(tq, TOK_RPAREN); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } s++; break; case ';': /* SEMICOLON */ ret = _token_queue_append_single(tq, TOK_SEMICOLON); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } s++; break; case '#': /* Comment */ _skip_linecomment(&s); break; default: /* Comment, keyword or string */ if ( '/' == *s && '/' == *(s+1) ) { _skip_linecomment(&s); } else if ( '/' == *s && '*' == *(s+1) ) { _skip_blockcomment(&s); } else if ( _isckeyword(*s) ) { ret = _tokenize_keword(tq, &s); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } } else if ( '"' == *s || '\'' == *s ) { ret = _tokenize_string(tq, &s); if ( 0 != ret ) { /* Error */ scfg_token_queue_delete(tq); return NULL; } } else { /* Tokenize error */ scfg_token_queue_delete(tq); return NULL; } break; } } return tq; }