/* * pg_parse_json * * Publicly visible entry point for the JSON parser. * * lex is a lexing context, set up for the json to be processed by calling * makeJsonLexContext(). sem is a strucure of function pointers to semantic * action routines to be called at appropriate spots during parsing, and a * pointer to a state object to be passed to those routines. */ void pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) { JsonTokenType tok; /* get the initial token */ json_lex(lex); tok = lex_peek(lex); /* parse by recursive descent */ switch (tok) { case JSON_TOKEN_OBJECT_START: parse_object(lex, sem); break; case JSON_TOKEN_ARRAY_START: parse_array(lex, sem); break; default: parse_scalar(lex, sem); /* json can be a bare scalar */ } lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END); }
/* * lex_accept * * accept the look_ahead token and move the lexer to the next token if the * look_ahead token matches the token parameter. In that case, and if required, * also hand back the de-escaped lexeme. * * returns true if the token matched, false otherwise. */ static inline bool lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme) { if (lex->token_type == token) { if (lexeme != NULL) { if (lex->token_type == JSON_TOKEN_STRING) { if (lex->strval != NULL) *lexeme = pstrdup(lex->strval->data); } else { int len = (lex->token_terminator - lex->token_start); char *tokstr = palloc(len + 1); memcpy(tokstr, lex->token_start, len); tokstr[len] = '\0'; *lexeme = tokstr; } } json_lex(lex); return true; } return false; }
/* * SQL function json_typeof(json) -> text * * Returns the type of the outermost JSON value as TEXT. Possible types are * "object", "array", "string", "number", "boolean", and "null". * * Performs a single call to json_lex() to get the first token of the supplied * value. This initial token uniquely determines the value's type. As our * input must already have been validated by json_in() or json_recv(), the * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END, * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END. */ Datum json_typeof(PG_FUNCTION_ARGS) { text *json = PG_GETARG_TEXT_P(0); JsonLexContext *lex = makeJsonLexContext(json, false); JsonTokenType tok; char *type; /* Lex exactly one token from the input and check its type. */ json_lex(lex); tok = lex_peek(lex); switch (tok) { case JSON_TOKEN_OBJECT_START: type = "object"; break; case JSON_TOKEN_ARRAY_START: type = "array"; break; case JSON_TOKEN_STRING: type = "string"; break; case JSON_TOKEN_NUMBER: type = "number"; break; case JSON_TOKEN_TRUE: case JSON_TOKEN_FALSE: type = "boolean"; break; case JSON_TOKEN_NULL: type = "null"; break; default: elog(ERROR, "unexpected json token: %d", tok); } PG_RETURN_TEXT_P(cstring_to_text(type)); }
/* * Check whether supplied input is valid JSON. */ static void json_validate_cstring(char *input) { JsonLexContext lex; JsonParseStack *stack, *stacktop; int stacksize; /* Set up lexing context. */ lex.input = input; lex.token_terminator = lex.input; lex.line_number = 1; lex.line_start = input; /* Set up parse stack. */ stacksize = 32; stacktop = palloc(sizeof(JsonParseStack) * stacksize); stack = stacktop; stack->state = JSON_PARSE_VALUE; /* Main parsing loop. */ for (;;) { JsonStackOp op; /* Fetch next token. */ json_lex(&lex); /* Check for unexpected end of input. */ if (lex.token_start == NULL) report_parse_error(stack, &lex); redo: /* Figure out what to do with this token. */ op = JSON_STACKOP_NONE; switch (stack->state) { case JSON_PARSE_VALUE: if (lex.token_type != JSON_VALUE_INVALID) op = JSON_STACKOP_POP; else if (lex.token_start[0] == '[') stack->state = JSON_PARSE_ARRAY_START; else if (lex.token_start[0] == '{') stack->state = JSON_PARSE_OBJECT_START; else report_parse_error(stack, &lex); break; case JSON_PARSE_ARRAY_START: if (lex.token_type != JSON_VALUE_INVALID) stack->state = JSON_PARSE_ARRAY_NEXT; else if (lex.token_start[0] == ']') op = JSON_STACKOP_POP; else if (lex.token_start[0] == '[' || lex.token_start[0] == '{') { stack->state = JSON_PARSE_ARRAY_NEXT; op = JSON_STACKOP_PUSH_WITH_PUSHBACK; } else report_parse_error(stack, &lex); break; case JSON_PARSE_ARRAY_NEXT: if (lex.token_type != JSON_VALUE_INVALID) report_parse_error(stack, &lex); else if (lex.token_start[0] == ']') op = JSON_STACKOP_POP; else if (lex.token_start[0] == ',') op = JSON_STACKOP_PUSH; else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_START: if (lex.token_type == JSON_VALUE_STRING) stack->state = JSON_PARSE_OBJECT_LABEL; else if (lex.token_type == JSON_VALUE_INVALID && lex.token_start[0] == '}') op = JSON_STACKOP_POP; else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_LABEL: if (lex.token_type == JSON_VALUE_INVALID && lex.token_start[0] == ':') { stack->state = JSON_PARSE_OBJECT_NEXT; op = JSON_STACKOP_PUSH; } else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_NEXT: if (lex.token_type != JSON_VALUE_INVALID) report_parse_error(stack, &lex); else if (lex.token_start[0] == '}') op = JSON_STACKOP_POP; else if (lex.token_start[0] == ',') stack->state = JSON_PARSE_OBJECT_COMMA; else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_COMMA: if (lex.token_type == JSON_VALUE_STRING) stack->state = JSON_PARSE_OBJECT_LABEL; else report_parse_error(stack, &lex); break; default: elog(ERROR, "unexpected json parse state: %d", (int) stack->state); } /* Push or pop the stack, if needed. */ switch (op) { case JSON_STACKOP_PUSH: case JSON_STACKOP_PUSH_WITH_PUSHBACK: ++stack; if (stack >= &stacktop[stacksize]) { int stackoffset = stack - stacktop; stacksize = stacksize + 32; stacktop = repalloc(stacktop, sizeof(JsonParseStack) * stacksize); stack = stacktop + stackoffset; } stack->state = JSON_PARSE_VALUE; if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK) goto redo; break; case JSON_STACKOP_POP: if (stack == stacktop) { /* Expect end of input. */ json_lex(&lex); if (lex.token_start != NULL) report_parse_error(NULL, &lex); return; } --stack; break; case JSON_STACKOP_NONE: /* nothing to do */ break; } } }