Beispiel #1
0
/*
 * Lex one token from the input stream.
 */
static inline void
json_lex(JsonLexContext *lex)
{
	char	   *s;
	int			len;

	/* Skip leading whitespace. */
	s = lex->token_terminator;
	len = s - lex->input;
	while (len < lex->input_length &&
		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
	{
		if (*s == '\n')
			++lex->line_number;
		++s;
		++len;
	}
	lex->token_start = s;

	/* Determine token type. */
	if (len >= lex->input_length)
	{
		lex->token_start = NULL;
		lex->prev_token_terminator = lex->token_terminator;
		lex->token_terminator = s;
		lex->token_type = JSON_TOKEN_END;
	}
	else
		switch (*s)
		{
				/* Single-character token, some kind of punctuation mark. */
			case '{':
				lex->prev_token_terminator = lex->token_terminator;
				lex->token_terminator = s + 1;
				lex->token_type = JSON_TOKEN_OBJECT_START;
				break;
			case '}':
				lex->prev_token_terminator = lex->token_terminator;
				lex->token_terminator = s + 1;
				lex->token_type = JSON_TOKEN_OBJECT_END;
				break;
			case '[':
				lex->prev_token_terminator = lex->token_terminator;
				lex->token_terminator = s + 1;
				lex->token_type = JSON_TOKEN_ARRAY_START;
				break;
			case ']':
				lex->prev_token_terminator = lex->token_terminator;
				lex->token_terminator = s + 1;
				lex->token_type = JSON_TOKEN_ARRAY_END;
				break;
			case ',':
				lex->prev_token_terminator = lex->token_terminator;
				lex->token_terminator = s + 1;
				lex->token_type = JSON_TOKEN_COMMA;
				break;
			case ':':
				lex->prev_token_terminator = lex->token_terminator;
				lex->token_terminator = s + 1;
				lex->token_type = JSON_TOKEN_COLON;
				break;
			case '"':
				/* string */
				json_lex_string(lex);
				lex->token_type = JSON_TOKEN_STRING;
				break;
			case '-':
				/* Negative number. */
				json_lex_number(lex, s + 1);
				lex->token_type = JSON_TOKEN_NUMBER;
				break;
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
			case '8':
			case '9':
				/* Positive number. */
				json_lex_number(lex, s);
				lex->token_type = JSON_TOKEN_NUMBER;
				break;
			default:
				{
					char	   *p;

					/*
					 * We're not dealing with a string, number, legal
					 * punctuation mark, or end of string.	The only legal
					 * tokens we might find here are true, false, and null,
					 * but for error reporting purposes we scan until we see a
					 * non-alphanumeric character.	That way, we can report
					 * the whole word as an unexpected token, rather than just
					 * some unintuitive prefix thereof.
					 */
					for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
						 /* skip */ ;

					/*
					 * We got some sort of unexpected punctuation or an
					 * otherwise unexpected character, so just complain about
					 * that one character.
					 */
					if (p == s)
					{
						lex->prev_token_terminator = lex->token_terminator;
						lex->token_terminator = s + 1;
						report_invalid_token(lex);
					}

					/*
					 * We've got a real alphanumeric token here.  If it
					 * happens to be true, false, or null, all is well.  If
					 * not, error out.
					 */
					lex->prev_token_terminator = lex->token_terminator;
					lex->token_terminator = p;
					if (p - s == 4)
					{
						if (memcmp(s, "true", 4) == 0)
							lex->token_type = JSON_TOKEN_TRUE;
						else if (memcmp(s, "null", 4) == 0)
							lex->token_type = JSON_TOKEN_NULL;
						else
							report_invalid_token(lex);
					}
					else if (p - s == 5 && memcmp(s, "false", 5) == 0)
						lex->token_type = JSON_TOKEN_FALSE;
					else
						report_invalid_token(lex);

				}
		}						/* end of switch */
}
Beispiel #2
0
/*
 * Lex one token from the input stream.
 */
static void
json_lex(JsonLexContext *lex)
{
	char	   *s;

	/* Skip leading whitespace. */
	s = lex->token_terminator;
	while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
	{
		if (*s == '\n')
			++lex->line_number;
		++s;
	}
	lex->token_start = s;

	/* Determine token type. */
	if (strchr("{}[],:", s[0]))
	{
		/* strchr() doesn't return false on a NUL input. */
		if (s[0] == '\0')
		{
			/* End of string. */
			lex->token_start = NULL;
			lex->token_terminator = NULL;
		}
		else
		{
			/* Single-character token, some kind of punctuation mark. */
			lex->token_terminator = s + 1;
		}
		lex->token_type = JSON_VALUE_INVALID;
	}
	else if (*s == '"')
	{
		/* String. */
		json_lex_string(lex);
		lex->token_type = JSON_VALUE_STRING;
	}
	else if (*s == '-')
	{
		/* Negative number. */
		json_lex_number(lex, s + 1);
		lex->token_type = JSON_VALUE_NUMBER;
	}
	else if (*s >= '0' && *s <= '9')
	{
		/* Positive number. */
		json_lex_number(lex, s);
		lex->token_type = JSON_VALUE_NUMBER;
	}
	else
	{
		char   *p;

		/*
		 * We're not dealing with a string, number, legal punctuation mark,
		 * or end of string.  The only legal tokens we might find here are
		 * true, false, and null, but for error reporting purposes we scan
		 * until we see a non-alphanumeric character.  That way, we can report
		 * the whole word as an unexpected token, rather than just some
		 * unintuitive prefix thereof.
		 */
 		for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
			|| (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p);
			++p)
			;

		/*
		 * We got some sort of unexpected punctuation or an otherwise
		 * unexpected character, so just complain about that one character.
		 */
		if (p == s)
		{
			lex->token_terminator = s + 1;
			report_invalid_token(lex);
		}

		/*
		 * We've got a real alphanumeric token here.  If it happens to be
		 * true, false, or null, all is well.  If not, error out.
		 */
		lex->token_terminator = p;
		if (p - s == 4)
		{
			if (memcmp(s, "true", 4) == 0)
				lex->token_type = JSON_VALUE_TRUE;
			else if (memcmp(s, "null", 4) == 0)
				lex->token_type = JSON_VALUE_NULL;
			else
				report_invalid_token(lex);
		}
		else if (p - s == 5 && memcmp(s, "false", 5) == 0)
			lex->token_type = JSON_VALUE_FALSE;
		else
			report_invalid_token(lex);
	}
}
Beispiel #3
0
/*
 * Lex one token from the input stream.
 */
static void
json_lex(JsonLexContext *lex)
{
	char	   *s;

	/* Skip leading whitespace. */
	s = lex->token_terminator;
	while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
		s++;
	lex->token_start = s;

	/* Determine token type. */
	if (strchr("{}[],:", s[0]) != NULL)
	{
		/* strchr() is willing to match a zero byte, so test for that. */
		if (s[0] == '\0')
		{
			/* End of string. */
			lex->token_start = NULL;
			lex->token_terminator = s;
		}
		else
		{
			/* Single-character token, some kind of punctuation mark. */
			lex->token_terminator = s + 1;
		}
		lex->token_type = JSON_VALUE_INVALID;
	}
	else if (*s == '"')
	{
		/* String. */
		json_lex_string(lex);
		lex->token_type = JSON_VALUE_STRING;
	}
	else if (*s == '-')
	{
		/* Negative number. */
		json_lex_number(lex, s + 1);
		lex->token_type = JSON_VALUE_NUMBER;
	}
	else if (*s >= '0' && *s <= '9')
	{
		/* Positive number. */
		json_lex_number(lex, s);
		lex->token_type = JSON_VALUE_NUMBER;
	}
	else
	{
		char	   *p;

		/*
		 * We're not dealing with a string, number, legal punctuation mark, or
		 * end of string.  The only legal tokens we might find here are true,
		 * false, and null, but for error reporting purposes we scan until we
		 * see a non-alphanumeric character.  That way, we can report the
		 * whole word as an unexpected token, rather than just some
		 * unintuitive prefix thereof.
		 */
		for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
			/* skip */ ;

		if (p == s)
		{
			/*
			 * We got some sort of unexpected punctuation or an otherwise
			 * unexpected character, so just complain about that one
			 * character.  (It can't be multibyte because the above loop
			 * will advance over any multibyte characters.)
			 */
			lex->token_terminator = s + 1;
			report_invalid_token(lex);
		}

		/*
		 * We've got a real alphanumeric token here.  If it happens to be
		 * true, false, or null, all is well.  If not, error out.
		 */
		lex->token_terminator = p;
		if (p - s == 4)
		{
			if (memcmp(s, "true", 4) == 0)
				lex->token_type = JSON_VALUE_TRUE;
			else if (memcmp(s, "null", 4) == 0)
				lex->token_type = JSON_VALUE_NULL;
			else
				report_invalid_token(lex);
		}
		else if (p - s == 5 && memcmp(s, "false", 5) == 0)
			lex->token_type = JSON_VALUE_FALSE;
		else
			report_invalid_token(lex);
	}
}