static char * eat_string(int starting_line) { int c; char buffer[500]; char *ptr = buffer; for (;;) { /* * Get the next input character, handling EOF: */ c = input(); if (!c) { unput(c); report_parse_error("unterminated string found beginning", starting_line); return(0); } /* * Deal with special characters ('\\', '"', and '\n'): */ if (c=='\\') { c = eat_escape_code(); if (!c) continue; } else if (c == '"') { *ptr = 0; return(string_Copy(buffer)); } else if (c == '\n') { unput(c); /* fix line # reference to right line # */ report_parse_error("carriage return found in string", yylineno); return(0); } /* * Add the character c to the current string: */ *ptr = c; ptr++; /* * If out of buffer space, do a recursive call then * concatanate the result to the string read in so far to get the * entire string and return that: */ if (ptr>buffer+sizeof(buffer)-20) { string rest_of_string, result; rest_of_string = eat_string(starting_line); if (!rest_of_string) return(0); *ptr = 0; result = string_Concat(buffer, rest_of_string); free(rest_of_string); return(result); } } }
/* * Recursive Descent parse routines. There is one for each structural * element in a json document: * - scalar (string, number, true, false, null) * - array ( [ ] ) * - array element * - object ( { } ) * - object field */ static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem) { char *val = NULL; json_scalar_action sfunc = sem->scalar; char **valaddr; JsonTokenType tok = lex_peek(lex); valaddr = sfunc == NULL ? NULL : &val; /* a scalar must be a string, a number, true, false, or null */ switch (tok) { case JSON_TOKEN_TRUE: lex_accept(lex, JSON_TOKEN_TRUE, valaddr); break; case JSON_TOKEN_FALSE: lex_accept(lex, JSON_TOKEN_FALSE, valaddr); break; case JSON_TOKEN_NULL: lex_accept(lex, JSON_TOKEN_NULL, valaddr); break; case JSON_TOKEN_NUMBER: lex_accept(lex, JSON_TOKEN_NUMBER, valaddr); break; case JSON_TOKEN_STRING: lex_accept(lex, JSON_TOKEN_STRING, valaddr); break; default: report_parse_error(JSON_PARSE_VALUE, lex); } if (sfunc != NULL) (*sfunc) (sem->semstate, val, tok); }
static void parse_object(JsonLexContext *lex, JsonSemAction *sem) { /* * an object is a possibly empty sequence of object fields, separated by * commas and surrounde by curly braces. */ json_struct_action ostart = sem->object_start; json_struct_action oend = sem->object_end; JsonTokenType tok; if (ostart != NULL) (*ostart) (sem->semstate); /* * Data inside an object at at a higher nesting level than the object * itself. Note that we increment this after we call the semantic routine * for the object start and restore it before we call the routine for the * object end. */ lex->lex_level++; /* we know this will succeeed, just clearing the token */ lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START); tok = lex_peek(lex); switch (tok) { case JSON_TOKEN_STRING: parse_object_field(lex, sem); while (lex_accept(lex, JSON_TOKEN_COMMA, NULL)) parse_object_field(lex, sem); break; case JSON_TOKEN_OBJECT_END: break; default: /* case of an invalid initial token inside the object */ report_parse_error(JSON_PARSE_OBJECT_START, lex); } lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END); lex->lex_level--; if (oend != NULL) (*oend) (sem->semstate); }
static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem) { /* * an object field is "fieldname" : value where value can be a scalar, * object or array */ char *fname = NULL; /* keep compiler quiet */ json_ofield_action ostart = sem->object_field_start; json_ofield_action oend = sem->object_field_end; bool isnull; char **fnameaddr = NULL; JsonTokenType tok; if (ostart != NULL || oend != NULL) fnameaddr = &fname; if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr)) report_parse_error(JSON_PARSE_STRING, lex); lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON); tok = lex_peek(lex); isnull = tok == JSON_TOKEN_NULL; if (ostart != NULL) (*ostart) (sem->semstate, fname, isnull); switch (tok) { case JSON_TOKEN_OBJECT_START: parse_object(lex, sem); break; case JSON_TOKEN_ARRAY_START: parse_array(lex, sem); break; default: parse_scalar(lex, sem); } if (oend != NULL) (*oend) (sem->semstate, fname, isnull); if (fname != NULL) pfree(fname); }
static char * eat_til_endshow(int start_line_no) { register int c; string text_so_far = string_Copy(""); string next_line; for (;;) { /* * Skip the spaces & tabs at the start of the current line: */ while ((c=input()), c==' ' || c=='\t') ; unput(c); /* * Handle unterminated shows: */ if (!c) { report_parse_error("unterminated show beginning", start_line_no); free(text_so_far); return(0); } /* * Read in rest of the line (including the <cr> at end), allowing * for escape codes and checking for "endshow{nonalpha}" at the * start of the line. (Note: \<newline> is considered the * end of a line here!) */ next_line = eat_show_line(1); if (!next_line) /* i.e., is this the endshow line? */ return(text_so_far); text_so_far = string_Concat2(text_so_far, next_line); free(next_line); } }
int yylex(void) { register int c, last_char; register char *ptr; int start_line_no; int_dictionary_binding *binding; char varname[MAX_IDENTIFIER_LENGTH+1]; for (;;) { switch (c = input()) { /* * Skip whitespace: */ case ' ': case '\t': case '\n': continue; /* * '#' comments out everything up to the and including * the next <cr>: */ case '#': while ( (c=input()) && (c!='\n') ) ; if (!c) unput(c); continue; /* * Handle c-style comments. Note that "/[^*]" is not the start * of any valid token. */ case '/': start_line_no = yylineno; /* verify that next character is a '*': */ if ((c=input()) != '*') return(ERROR); /* Scan until "*\/" or <EOF>: */ for (last_char=0; ; last_char=c) { c = input(); if (c == '/' && (last_char=='*')) break; if (!c) { unput(c); report_parse_error("unterminated c style comment found beginning", start_line_no); return(ERROR); } } continue; /* * The following characters lex as themselves: * '+', '|', '&', '(', ')', '.', ',' and <EOF>: */ case 0: case '+': case '|': case '&': case '(': case ')': case '.': case ',': return(c); /* * Handle "=[^~=]", "=~", and "==": */ case '=': switch (c = input()) { case '~': return(REGEQ); case '=': return(EQ); default: unput(c); return('='); } /* * Handle "![^~=]", "!~", and "!=": */ case '!': switch (c = input()) { case '~': return(REGNEQ); case '=': return(NEQ); default: unput(c); return('!'); } /* * Handle identifiers and keywords: * * Note that the below set of characters is hard coded from * is_identifier_char from parser.h. */ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '_': /* * Read in the first MAX_IDENTIFIER_LENGTH characters of the * identifier into varname null terminated. Eat * the rest of the characters of the identifier: */ for (ptr = varname;;) { if (ptr<varname+MAX_IDENTIFIER_LENGTH) *(ptr++) = c; c = input(); if (!is_identifier_char(c)) break; } unput(c); *ptr = '\0'; /* * Look up the identifier in the keyword dictionary. * If its a match, return the keyword's #. In the case * of show, call handle_show to do more processing. * If not a match, treat as a variable name. */ binding = int_dictionary_Lookup(keyword_dict, varname); if (!binding) { yylval.text = string_Copy(varname); return(VARNAME); } if (binding->value == SHOW) return(handle_show()); else return(binding->value); /* * Handle "${identifier}". Note that $ followed by a * non-identifier character is not the start of any valid token. */ case '$': c = input(); if (!is_identifier_char(c)) return(ERROR); /* * Read in the first MAX_IDENTIFIER_LENGTH characters of the * identifier into varname null terminated. Eat * the rest of the characters of the identifier: */ for (ptr = varname;;) { if (ptr<varname+MAX_IDENTIFIER_LENGTH) *(ptr++) = c; c = input(); if (!is_identifier_char(c)) break; } unput(c); *ptr = '\0'; yylval.text = string_Copy(varname); return(VARREF); /* * Handle constant strings: */ case '"': yylval.text = eat_string(yylineno); if (yylval.text) return(STRING); else return(ERROR); /* * All other characters do not start valid tokens: */ default: return(ERROR); } } }
/* * Check whether supplied input is valid JSON. */ static void json_validate_cstring(char *input) { JsonLexContext lex; JsonParseStack *stack, *stacktop; int stacksize; /* Set up lexing context. */ lex.input = input; lex.token_terminator = lex.input; lex.line_number = 1; lex.line_start = input; /* Set up parse stack. */ stacksize = 32; stacktop = palloc(sizeof(JsonParseStack) * stacksize); stack = stacktop; stack->state = JSON_PARSE_VALUE; /* Main parsing loop. */ for (;;) { JsonStackOp op; /* Fetch next token. */ json_lex(&lex); /* Check for unexpected end of input. */ if (lex.token_start == NULL) report_parse_error(stack, &lex); redo: /* Figure out what to do with this token. */ op = JSON_STACKOP_NONE; switch (stack->state) { case JSON_PARSE_VALUE: if (lex.token_type != JSON_VALUE_INVALID) op = JSON_STACKOP_POP; else if (lex.token_start[0] == '[') stack->state = JSON_PARSE_ARRAY_START; else if (lex.token_start[0] == '{') stack->state = JSON_PARSE_OBJECT_START; else report_parse_error(stack, &lex); break; case JSON_PARSE_ARRAY_START: if (lex.token_type != JSON_VALUE_INVALID) stack->state = JSON_PARSE_ARRAY_NEXT; else if (lex.token_start[0] == ']') op = JSON_STACKOP_POP; else if (lex.token_start[0] == '[' || lex.token_start[0] == '{') { stack->state = JSON_PARSE_ARRAY_NEXT; op = JSON_STACKOP_PUSH_WITH_PUSHBACK; } else report_parse_error(stack, &lex); break; case JSON_PARSE_ARRAY_NEXT: if (lex.token_type != JSON_VALUE_INVALID) report_parse_error(stack, &lex); else if (lex.token_start[0] == ']') op = JSON_STACKOP_POP; else if (lex.token_start[0] == ',') op = JSON_STACKOP_PUSH; else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_START: if (lex.token_type == JSON_VALUE_STRING) stack->state = JSON_PARSE_OBJECT_LABEL; else if (lex.token_type == JSON_VALUE_INVALID && lex.token_start[0] == '}') op = JSON_STACKOP_POP; else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_LABEL: if (lex.token_type == JSON_VALUE_INVALID && lex.token_start[0] == ':') { stack->state = JSON_PARSE_OBJECT_NEXT; op = JSON_STACKOP_PUSH; } else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_NEXT: if (lex.token_type != JSON_VALUE_INVALID) report_parse_error(stack, &lex); else if (lex.token_start[0] == '}') op = JSON_STACKOP_POP; else if (lex.token_start[0] == ',') stack->state = JSON_PARSE_OBJECT_COMMA; else report_parse_error(stack, &lex); break; case JSON_PARSE_OBJECT_COMMA: if (lex.token_type == JSON_VALUE_STRING) stack->state = JSON_PARSE_OBJECT_LABEL; else report_parse_error(stack, &lex); break; default: elog(ERROR, "unexpected json parse state: %d", (int) stack->state); } /* Push or pop the stack, if needed. */ switch (op) { case JSON_STACKOP_PUSH: case JSON_STACKOP_PUSH_WITH_PUSHBACK: ++stack; if (stack >= &stacktop[stacksize]) { int stackoffset = stack - stacktop; stacksize = stacksize + 32; stacktop = repalloc(stacktop, sizeof(JsonParseStack) * stacksize); stack = stacktop + stackoffset; } stack->state = JSON_PARSE_VALUE; if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK) goto redo; break; case JSON_STACKOP_POP: if (stack == stacktop) { /* Expect end of input. */ json_lex(&lex); if (lex.token_start != NULL) report_parse_error(NULL, &lex); return; } --stack; break; case JSON_STACKOP_NONE: /* nothing to do */ break; } } }
/* * lex_accept * * move the lexer to the next token if the current look_ahead token matches * the parameter token. Otherwise, report an error. */ static inline void lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) { if (!lex_accept(lex, token, NULL)) report_parse_error(ctx, lex);; }