Exemplo n.º 1
0
/**
 * Fetch next token from input string.
 */
static struct ctl_tok *
ctl_next_token(struct ctl_string *s)
{
	/*
	 * If we have a read-ahead token, reuse it.
	 */

	if (s->unread != NULL) {
		struct ctl_tok *tok = s->unread;
		s->unread = NULL;
		return tok;
	}

	/*
	 * Read next token.
	 */

	s->p = skip_ascii_blanks(s->p);

	if ('\0' == *s->p)
		return ctl_token_alloc(CTL_TOK_EOF, s->p);

	switch (*s->p) {
	case '{':	s->p++; return ctl_token_alloc(CTL_TOK_LBRACE, s->p);
	case '}':	s->p++; return ctl_token_alloc(CTL_TOK_RBRACE, s->p);
	case ':':	s->p++; return ctl_token_alloc(CTL_TOK_COLON, s->p);
	case ',':	s->p++; return ctl_token_alloc(CTL_TOK_COMMA, s->p);
	default:	break;
	}

	if (is_ascii_alnum(*s->p)) {
		const char *start = s->p;
		struct ctl_tok *tok = ctl_token_alloc(CTL_TOK_ID, s->p);
		size_t len;

		s->p = skip_ascii_alnum(s->p);
		len = s->p - start;
		tok->val.s = halloc(len + 1);
		clamp_strncpy(tok->val.s, len + 1, start, len);
		return tok;
	} else {
		struct ctl_tok *tok = ctl_token_alloc(CTL_TOK_ERROR, s->p);
		tok->val.c = *s->p;
		return tok;
	}

	g_assert_not_reached();
	return NULL;
}
Exemplo n.º 2
0
/**
 * Get next token, as delimited by one of the characters given in ``delim'' or
 * by the end of the string, whichever comes first.  Same as strtok_next(),
 * only we can specify whether we wish to ignore leading and/or trailing spaces
 * for this lookup.
 *
 * When ``looked'' is non-NULL, we're looking whether the token matches the
 * string, and we do not bother constructing the token as soon as we have
 * determined that the current token cannot match.  Therefore, the returned
 * token string is meaningless and forced to "", the empty string.
 *
 * @param s			the string tokenizing object
 * @param delim		the string containing one-character delimiters, e.g. ",;"
 * @param no_lead	whether leading spaces in token should be stripped
 * @param no_end	whether trailing spaces in token should be stripped
 * @param length	if non-NULL, gets filled with the returned token length
 * @param looked	the token which we're looking for, NULL if none
 * @param caseless	whether token matching is to be done case-insensitively
 * @param found		if non-NULL, gets filled with whether we found ``looked''
 *
 * @return pointer to the next token, which must be duplicated if it needs to
 * be perused, or NULL if there are no more tokens.  The token lifetime lasts
 * until the next call to one of the strtok_* functions on the same object.
 */
static const char *
strtok_next_internal(strtok_t *s, const char *delim,
	bool no_lead, bool no_end, size_t *length,
	const char *looked, bool caseless, bool *found)
{
	size_t tlen;
	int c;
	int d_min, d_max;
	const char *l = NULL;
	bool seen_non_blank = FALSE;
	int deferred_blank = 0;
	char *tstart;

	strtok_check(s);
	g_assert(delim != NULL);

	if (NULL == s->p)
		return NULL;		/* Finished parsing */

	/*
	 * Pre-compile delimiter string to see what are the min and max character
	 * codes on which we delimit tokens.  When handling a low amount of
	 * delimiters which are close enough in the 8-bit code space, this lowers
	 * significantly the amount of character comparisons we have to do.
	 */

	d_min = 256;
	d_max = 0;

	{
		const char *q = delim;
		int d;

		while ((d = peek_u8(q++))) {
			if (d < d_min)
				d_min = d;
			if (d > d_max)
				d_max = d;
		}
	}

	/*
	 * Now parse the string until we reach one of the delimiters or its end.
	 */

	s->t = s->token;
	tlen = 0;

	while ((c = peek_u8(s->p++))) {

		/* Have we reached one of the delimiters? */

		if (c >= d_min && c <= d_max) {
			const char *q = delim;
			int d;

			while ((d = peek_u8(q++))) {
				if (d == c)
					goto end_token;
			}
		}

		/* Check whether token can match the ``looked'' up string */

		if (looked != NULL) {
			if (!seen_non_blank && !is_ascii_blank(c))
				seen_non_blank = TRUE;

			if (!no_lead || seen_non_blank) {
				int x;

				if (l == NULL)
					l = looked;

				if (no_end) {
					if (is_ascii_blank(c)) {
						deferred_blank++;
						continue;
					} else {
						for (/**/; deferred_blank > 0; deferred_blank--) {
							/* All blanks deemed equal here */
							if (!is_ascii_blank(*l++))
								goto skip_until_delim;
						}
					}
				}

				x = peek_u8(l++);
				if (caseless) {
					if (ascii_tolower(c) != ascii_tolower(x))
						goto skip_until_delim;
				} else if (c != x) {
					goto skip_until_delim;
				}
			}
			continue;		/* No need to collect token when looking... */
		}

		/* Character was not a delimiter, add to token */

		if (tlen >= s->len)
			extend_token(s);

		g_assert(tlen < s->len);

		s->t = poke_u8(s->t, c);
		tlen++;
	}

	s->p = NULL;			/* Signals: reached end of string */

end_token:
	if (tlen >= s->len)
		extend_token(s);

	g_assert(tlen < s->len);
	g_assert(s->len > 0);

	/*
	 * Strip trailing white spaces if required.
	 */

	if (no_end) {
		while (s->t > s->token) {
			if (!is_ascii_blank(*(s->t - 1)))
				break;
			s->t--;
		}
	}
	*s->t = '\0';			/* End token string */

	/* Check whether token can match the ``looked'' up string */

	if (looked != NULL) {
		if (l == NULL)
			l = looked;
		if (*l != '\0')
			goto not_found;
		*s->token = '\0';		/* Always return empty string */
	}

	/*
	 * Leading white spaces are skipped if required.
	 */

	tstart = no_lead ? skip_ascii_blanks(s->token) : s->token;

	/* Fill to-be-returned information */

	if (found)  *found  = TRUE;
	if (length) *length = s->t - tstart;

	return tstart;

skip_until_delim:

	/*
	 * Looked-up string did not match the token we were constructing.
	 * Move to the next delimiter or the end of the string, skipping
	 * the token construction.
	 */

	while ((c = peek_u8(s->p++))) {
		if (c >= d_min && c <= d_max) {
			const char *q = delim;
			int d;

			while ((d = peek_u8(q++))) {
				if (d == c)
					goto not_found;		/* Found delimiter, not the string */
			}
		}
	}

	/* FALL THROUGH */

not_found:

	/*
	 * We did not find the looked-up string and reached either the next
	 * delimiter or the end of the parsed string.
	 */

	if (0 == s->len)
		extend_token(s);

	*s->token = '\0';		/* Always return empty string */

	if (length) *length = 0;
	if (found)  *found  = FALSE;

	return s->token;
}