Datum dabs_int_lexize(PG_FUNCTION_ARGS) { char *in = (char *) PG_GETARG_POINTER(1); char *out = pnstrdup(in, PG_GETARG_INT32(2)); char *start; char *end; TSLexeme *res = palloc0(sizeof(TSLexeme) * 2); res[1].lexeme = NULL; while (*out && t_iseq(out, '-')) out += pg_mblen(out); start = out; while (*out) out += pg_mblen(out); end = out; out = pnstrdup(start, end - start); res[0].lexeme = out; PG_RETURN_POINTER(res); }
static char * getlexeme(char *start, char *end, int *len) { char *ptr; int charlen; while (start < end && (charlen = pg_mblen(start)) == 1 && t_iseq(start, '_')) start += charlen; ptr = start; if (ptr >= end) return NULL; while (ptr < end && !((charlen = pg_mblen(ptr)) == 1 && t_iseq(ptr, '_'))) ptr += charlen; *len = ptr - start; return start; }
/* * Test whether a regex is of the subset supported here. * Keep this in sync with RS_compile! */ bool RS_isRegis(const char *str) { int state = RS_IN_WAIT; const char *c = str; while (*c) { if (state == RS_IN_WAIT) { if (t_isalpha(c)) /* okay */ ; else if (t_iseq(c, '[')) state = RS_IN_ONEOF; else return false; } else if (state == RS_IN_ONEOF) { if (t_iseq(c, '^')) state = RS_IN_NONEOF; else if (t_isalpha(c)) state = RS_IN_ONEOF_IN; else return false; } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { if (t_isalpha(c)) /* okay */ ; else if (t_iseq(c, ']')) state = RS_IN_WAIT; else return false; } else elog(ERROR, "internal error in RS_isRegis: state %d", state); c += pg_mblen(c); } return (state == RS_IN_WAIT); }
/* * Finds the next whitespace-delimited word within the 'in' string. * Returns a pointer to the first character of the word, and a pointer * to the next byte after the last character in the word (in *end). * Character '*' at the end of word will not be threated as word * charater if flags is not null. */ static char * findwrd(char *in, char **end, uint16 *flags) { char *start; char *lastchar; /* Skip leading spaces */ while (*in && t_isspace(in)) in += pg_mblen(in); /* Return NULL on empty lines */ if (*in == '\0') { *end = NULL; return NULL; } lastchar = start = in; /* Find end of word */ while (*in && !t_isspace(in)) { lastchar = in; in += pg_mblen(in); } if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags) { *flags = TSL_PREFIX; *end = lastchar; } else { if (flags) *flags = 0; *end = in; } return start; }
static void thesaurusRead(char *filename, DictThesaurus *d) { tsearch_readline_state trst; uint16 idsubst = 0; bool useasis = false; char *line; filename = get_tsearch_config_filename(filename, "ths"); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open thesaurus file \"%s\": %m", filename))); while ((line = tsearch_readline(&trst)) != NULL) { char *ptr; int state = TR_WAITLEX; char *beginwrd = NULL; uint16 posinsubst = 0; uint16 nwrd = 0; ptr = line; /* is it a comment? */ while (*ptr && t_isspace(ptr)) ptr += pg_mblen(ptr); if (t_iseq(ptr, '#') || *ptr == '\0' || t_iseq(ptr, '\n') || t_iseq(ptr, '\r')) { pfree(line); continue; } while (*ptr) { if (state == TR_WAITLEX) { if (t_iseq(ptr, ':')) { if (posinsubst == 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected delimiter"))); state = TR_WAITSUBS; } else if (!t_isspace(ptr)) { beginwrd = ptr; state = TR_INLEX; } } else if (state == TR_INLEX) { if (t_iseq(ptr, ':')) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITSUBS; } else if (t_isspace(ptr)) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITLEX; } } else if (state == TR_WAITSUBS) { if (t_iseq(ptr, '*')) { useasis = true; state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } else if (t_iseq(ptr, '\\')) { useasis = false; state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } else if (!t_isspace(ptr)) { useasis = false; beginwrd = ptr; state = TR_INSUBS; } } else if (state == TR_INSUBS) { if (t_isspace(ptr)) { if (ptr == beginwrd) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line or lexeme"))); addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); state = TR_WAITSUBS; } } else elog(ERROR, "unrecognized thesaurus state: %d", state); ptr += pg_mblen(ptr); } if (state == TR_INSUBS) { if (ptr == beginwrd) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line or lexeme"))); addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); } idsubst++; if (!(nwrd && posinsubst)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line"))); pfree(line); } d->nsubst = idsubst; tsearch_readline_end(&trst); }
void RS_compile(Regis *r, bool issuffix, const char *str) { int len = strlen(str); int state = RS_IN_WAIT; const char *c = str; RegisNode *ptr = NULL; memset(r, 0, sizeof(Regis)); r->issuffix = (issuffix) ? 1 : 0; while (*c) { if (state == RS_IN_WAIT) { if (t_isalpha(c)) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); COPYCHAR(ptr->data, c); ptr->type = RSF_ONEOF; ptr->len = pg_mblen(c); } else if (t_iseq(c, '[')) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); ptr->type = RSF_ONEOF; state = RS_IN_ONEOF; } else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else if (state == RS_IN_ONEOF) { if (t_iseq(c, '^')) { ptr->type = RSF_NONEOF; state = RS_IN_NONEOF; } else if (t_isalpha(c)) { COPYCHAR(ptr->data, c); ptr->len = pg_mblen(c); state = RS_IN_ONEOF_IN; } else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { if (t_isalpha(c)) { COPYCHAR(ptr->data + ptr->len, c); ptr->len += pg_mblen(c); } else if (t_iseq(c, ']')) state = RS_IN_WAIT; else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else elog(ERROR, "internal error in RS_compile: state %d", state); c += pg_mblen(c); } if (state != RS_IN_WAIT) /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); ptr = r->node; while (ptr) { r->nchar++; ptr = ptr->next; } }
Datum ltree_in(PG_FUNCTION_ARGS) { char *buf = (char *) PG_GETARG_POINTER(0); char *ptr; nodeitem *list, *lptr; int num = 0, totallen = 0; int state = LTPRS_WAITNAME; ltree *result; ltree_level *curlevel; int charlen; int pos = 0; ptr = buf; while (*ptr) { charlen = pg_mblen(ptr); if (charlen == 1 && t_iseq(ptr, '.')) num++; ptr += charlen; } if (num + 1 > MaxAllocSize / sizeof(nodeitem)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of levels (%d) exceeds the maximum allowed (%d)", num + 1, (int) (MaxAllocSize / sizeof(nodeitem))))); list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1)); ptr = buf; while (*ptr) { charlen = pg_mblen(ptr); if (state == LTPRS_WAITNAME) { if (ISALNUM(ptr)) { lptr->start = ptr; lptr->wlen = 0; state = LTPRS_WAITDELIM; } else UNCHAR; } else if (state == LTPRS_WAITDELIM) { if (charlen == 1 && t_iseq(ptr, '.')) { lptr->len = ptr - lptr->start; if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", lptr->wlen, pos))); totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); lptr++; state = LTPRS_WAITNAME; } else if (!ISALNUM(ptr)) UNCHAR; } else /* internal error */ elog(ERROR, "internal error in parser"); ptr += charlen; lptr->wlen++; pos++; } if (state == LTPRS_WAITDELIM) { lptr->len = ptr - lptr->start; if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", lptr->wlen, pos))); totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); lptr++; } else if (!(state == LTPRS_WAITNAME && lptr == list)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Unexpected end of line."))); result = (ltree *) palloc0(LTREE_HDRSIZE + totallen); SET_VARSIZE(result, LTREE_HDRSIZE + totallen); result->numlevel = lptr - list; curlevel = LTREE_FIRST(result); lptr = list; while (lptr - list < result->numlevel) { curlevel->len = (uint16) lptr->len; memcpy(curlevel->name, lptr->start, lptr->len); curlevel = LEVEL_NEXT(curlevel); lptr++; } pfree(list); PG_RETURN_POINTER(result); }
Datum lquery_in(PG_FUNCTION_ARGS) { char *buf = (char *) PG_GETARG_POINTER(0); char *ptr; int num = 0, totallen = 0, numOR = 0; int state = LQPRS_WAITLEVEL; lquery *result; nodeitem *lptr = NULL; lquery_level *cur, *curqlevel, *tmpql; lquery_variant *lrptr = NULL; bool hasnot = false; bool wasbad = false; int charlen; int pos = 0; ptr = buf; while (*ptr) { charlen = pg_mblen(ptr); if (charlen == 1) { if (t_iseq(ptr, '.')) num++; else if (t_iseq(ptr, '|')) numOR++; } ptr += charlen; } num++; if (num > MaxAllocSize / ITEMSIZE) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of levels (%d) exceeds the maximum allowed (%d)", num, (int) (MaxAllocSize / ITEMSIZE)))); curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num); ptr = buf; while (*ptr) { charlen = pg_mblen(ptr); if (state == LQPRS_WAITLEVEL) { if (ISALNUM(ptr)) { GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); lptr->start = ptr; state = LQPRS_WAITDELIM; curqlevel->numvar = 1; } else if (charlen == 1 && t_iseq(ptr, '!')) { GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); lptr->start = ptr + 1; state = LQPRS_WAITDELIM; curqlevel->numvar = 1; curqlevel->flag |= LQL_NOT; hasnot = true; } else if (charlen == 1 && t_iseq(ptr, '*')) state = LQPRS_WAITOPEN; else UNCHAR; } else if (state == LQPRS_WAITVAR) { if (ISALNUM(ptr)) { lptr++; lptr->start = ptr; state = LQPRS_WAITDELIM; curqlevel->numvar++; } else UNCHAR; } else if (state == LQPRS_WAITDELIM) { if (charlen == 1 && t_iseq(ptr, '@')) { if (lptr->start == ptr) UNCHAR; lptr->flag |= LVAR_INCASE; curqlevel->flag |= LVAR_INCASE; } else if (charlen == 1 && t_iseq(ptr, '*')) { if (lptr->start == ptr) UNCHAR; lptr->flag |= LVAR_ANYEND; curqlevel->flag |= LVAR_ANYEND; } else if (charlen == 1 && t_iseq(ptr, '%')) { if (lptr->start == ptr) UNCHAR; lptr->flag |= LVAR_SUBLEXEME; curqlevel->flag |= LVAR_SUBLEXEME; } else if (charlen == 1 && t_iseq(ptr, '|')) { lptr->len = ptr - lptr->start - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", lptr->wlen, pos))); state = LQPRS_WAITVAR; } else if (charlen == 1 && t_iseq(ptr, '.')) { lptr->len = ptr - lptr->start - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", lptr->wlen, pos))); state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); } else if (ISALNUM(ptr)) { if (lptr->flag) UNCHAR; } else UNCHAR; } else if (state == LQPRS_WAITOPEN) { if (charlen == 1 && t_iseq(ptr, '{')) state = LQPRS_WAITFNUM; else if (charlen == 1 && t_iseq(ptr, '.')) { curqlevel->low = 0; curqlevel->high = 0xffff; curqlevel = NEXTLEV(curqlevel); state = LQPRS_WAITLEVEL; } else UNCHAR; } else if (state == LQPRS_WAITFNUM) { if (charlen == 1 && t_iseq(ptr, ',')) state = LQPRS_WAITSNUM; else if (t_isdigit(ptr)) { curqlevel->low = atoi(ptr); state = LQPRS_WAITND; } else UNCHAR; } else if (state == LQPRS_WAITSNUM) { if (t_isdigit(ptr)) { curqlevel->high = atoi(ptr); state = LQPRS_WAITCLOSE; } else if (charlen == 1 && t_iseq(ptr, '}')) { curqlevel->high = 0xffff; state = LQPRS_WAITEND; } else UNCHAR; } else if (state == LQPRS_WAITCLOSE) { if (charlen == 1 && t_iseq(ptr, '}')) state = LQPRS_WAITEND; else if (!t_isdigit(ptr)) UNCHAR; } else if (state == LQPRS_WAITND) { if (charlen == 1 && t_iseq(ptr, '}')) { curqlevel->high = curqlevel->low; state = LQPRS_WAITEND; } else if (charlen == 1 && t_iseq(ptr, ',')) state = LQPRS_WAITSNUM; else if (!t_isdigit(ptr)) UNCHAR; } else if (state == LQPRS_WAITEND) { if (charlen == 1 && t_iseq(ptr, '.')) { state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); } else UNCHAR; } else /* internal error */ elog(ERROR, "internal error in parser"); ptr += charlen; if (state == LQPRS_WAITDELIM) lptr->wlen++; pos++; } if (state == LQPRS_WAITDELIM) { if (lptr->start == ptr) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Unexpected end of line."))); lptr->len = ptr - lptr->start - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); if (lptr->len == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Unexpected end of line."))); if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", lptr->wlen, pos))); } else if (state == LQPRS_WAITOPEN) curqlevel->high = 0xffff; else if (state != LQPRS_WAITEND) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Unexpected end of line."))); curqlevel = tmpql; totallen = LQUERY_HDRSIZE; while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE) { totallen += LQL_HDRSIZE; if (curqlevel->numvar) { lptr = GETVAR(curqlevel); while (lptr - GETVAR(curqlevel) < curqlevel->numvar) { totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len); lptr++; } } else if (curqlevel->low > curqlevel->high) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error"), errdetail("Low limit(%d) is greater than upper(%d).", curqlevel->low, curqlevel->high))); curqlevel = NEXTLEV(curqlevel); } result = (lquery *) palloc0(totallen); SET_VARSIZE(result, totallen); result->numlevel = num; result->firstgood = 0; result->flag = 0; if (hasnot) result->flag |= LQUERY_HASNOT; cur = LQUERY_FIRST(result); curqlevel = tmpql; while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE) { memcpy(cur, curqlevel, LQL_HDRSIZE); cur->totallen = LQL_HDRSIZE; if (curqlevel->numvar) { lrptr = LQL_FIRST(cur); lptr = GETVAR(curqlevel); while (lptr - GETVAR(curqlevel) < curqlevel->numvar) { cur->totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len); lrptr->len = lptr->len; lrptr->flag = lptr->flag; lrptr->val = ltree_crc32_sz(lptr->start, lptr->len); memcpy(lrptr->name, lptr->start, lptr->len); lptr++; lrptr = LVAR_NEXT(lrptr); } pfree(GETVAR(curqlevel)); if (cur->numvar > 1 || cur->flag != 0) wasbad = true; else if (wasbad == false) (result->firstgood)++; } else wasbad = true; curqlevel = NEXTLEV(curqlevel); cur = LQL_NEXT(cur); } pfree(tmpql); PG_RETURN_POINTER(result); }
Datum tsvectorout(PG_FUNCTION_ARGS) { TSVector out = PG_GETARG_TSVECTOR(0); char *outbuf; int32 i, lenbuf = 0, pp; WordEntry *ptr = ARRPTR(out); char *curbegin, *curin, *curout; lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; for (i = 0; i < out->size; i++) { lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ; if (ptr[i].haspos) lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i])); } curout = outbuf = (char *) palloc(lenbuf); for (i = 0; i < out->size; i++) { curbegin = curin = STRPTR(out) + ptr->pos; if (i != 0) *curout++ = ' '; *curout++ = '\''; while (curin - curbegin < ptr->len) { int len = pg_mblen(curin); if (t_iseq(curin, '\'')) *curout++ = '\''; else if (t_iseq(curin, '\\')) *curout++ = '\\'; while (len--) *curout++ = *curin++; } *curout++ = '\''; if ((pp = POSDATALEN(out, ptr)) != 0) { WordEntryPos *wptr; *curout++ = ':'; wptr = POSDATAPTR(out, ptr); while (pp) { curout += sprintf(curout, "%d", WEP_GETPOS(*wptr)); switch (WEP_GETWEIGHT(*wptr)) { case 3: *curout++ = 'A'; break; case 2: *curout++ = 'B'; break; case 1: *curout++ = 'C'; break; case 0: default: break; } if (pp > 1) *curout++ = ','; pp--; wptr++; } } ptr++; } *curout = '\0'; PG_FREE_IF_COPY(out, 0); PG_RETURN_CSTRING(outbuf); }
/* * get token from query string */ static int4 gettoken_query(QPRS_STATE *state, int4 *val, int4 *lenval, char **strval, uint16 *flag) { int charlen; for (;;) { charlen = pg_mblen(state->buf); switch (state->state) { case WAITOPERAND: if (charlen == 1 && t_iseq(state->buf, '!')) { (state->buf)++; *val = (int4) '!'; return OPR; } else if (charlen == 1 && t_iseq(state->buf, '(')) { state->count++; (state->buf)++; return OPEN; } else if (ISALNUM(state->buf)) { state->state = INOPERAND; *strval = state->buf; *lenval = charlen; *flag = 0; } else if (!t_isspace(state->buf)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand syntax error"))); break; case INOPERAND: if (ISALNUM(state->buf)) { if (*flag) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("modificators syntax error"))); *lenval += charlen; } else if (charlen == 1 && t_iseq(state->buf, '%')) *flag |= LVAR_SUBLEXEME; else if (charlen == 1 && t_iseq(state->buf, '@')) *flag |= LVAR_INCASE; else if (charlen == 1 && t_iseq(state->buf, '*')) *flag |= LVAR_ANYEND; else { state->state = WAITOPERATOR; return VAL; } break; case WAITOPERATOR: if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))) { state->state = WAITOPERAND; *val = (int4) *(state->buf); (state->buf)++; return OPR; } else if (charlen == 1 && t_iseq(state->buf, ')')) { (state->buf)++; state->count--; return (state->count < 0) ? ERR : CLOSE; } else if (*(state->buf) == '\0') return (state->count) ? ERR : END; else if (charlen == 1 && !t_iseq(state->buf, ' ')) return ERR; break; default: return ERR; break; } state->buf += charlen; } return END; }
/* * Get next token from string being parsed. Returns true if successful, * false if end of input string is reached. On success, these output * parameters are filled in: * * *strval pointer to token * *lenval length of *strval * *pos_ptr pointer to a palloc'd array of positions and weights * associated with the token. If the caller is not interested * in the information, NULL can be supplied. Otherwise * the caller is responsible for pfreeing the array. * *poslen number of elements in *pos_ptr * *endptr scan resumption point * * Pass NULL for unwanted output parameters. */ bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr) { int oldstate = 0; char *curpos = state->word; int statecode = WAITWORD; /* * pos is for collecting the comma delimited list of positions followed by * the actual token. */ WordEntryPos *pos = NULL; int npos = 0; /* elements of pos used */ int posalen = 0; /* allocated size of pos */ while (1) { if (statecode == WAITWORD) { if (*(state->prsbuf) == '\0') return false; else if (t_iseq(state->prsbuf, '\'')) statecode = WAITENDCMPLX; else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) PRSSYNTAXERROR; else if (!t_isspace(state->prsbuf)) { COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDWORD; } } else if (statecode == WAITNEXTCHAR) { if (*(state->prsbuf) == '\0') ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("there is no escaped character: \"%s\"", state->bufstart))); else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); Assert(oldstate != 0); statecode = oldstate; } } else if (statecode == WAITENDWORD) { if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf))) { RESIZEPRSBUF; if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; RETURN_TOKEN; } else if (t_iseq(state->prsbuf, ':')) { if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; if (state->oprisdelim) RETURN_TOKEN; else statecode = INPOSINFO; } else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITENDCMPLX) { if (t_iseq(state->prsbuf, '\'')) { statecode = WAITCHARCMPLX; } else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDCMPLX; } else if (*(state->prsbuf) == '\0') PRSSYNTAXERROR; else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITCHARCMPLX) { if (t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDCMPLX; } else { RESIZEPRSBUF; *(curpos) = '\0'; if (curpos == state->word) PRSSYNTAXERROR; if (state->oprisdelim) { /* state->prsbuf+=pg_mblen(state->prsbuf); */ RETURN_TOKEN; } else statecode = WAITPOSINFO; continue; /* recheck current character */ } } else if (statecode == WAITPOSINFO) { if (t_iseq(state->prsbuf, ':')) statecode = INPOSINFO; else RETURN_TOKEN; } else if (statecode == INPOSINFO) { if (t_isdigit(state->prsbuf)) { if (posalen == 0) { posalen = 4; pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen); npos = 0; } else if (npos + 1 >= posalen) { posalen *= 2; pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen); } npos++; WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); /* we cannot get here in tsquery, so no need for 2 errmsgs */ if (WEP_GETPOS(pos[npos - 1]) == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong position info in tsvector: \"%s\"", state->bufstart))); WEP_SETWEIGHT(pos[npos - 1], 0); statecode = WAITPOSDELIM; } else PRSSYNTAXERROR; } else if (statecode == WAITPOSDELIM) { if (t_iseq(state->prsbuf, ',')) statecode = INPOSINFO; else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 3); } else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 2); } else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 1); } else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; else if (!t_isdigit(state->prsbuf)) PRSSYNTAXERROR; } else /* internal error */ elog(ERROR, "unrecognized state in gettoken_tsvector: %d", statecode); /* get next char */ state->prsbuf += pg_mblen(state->prsbuf); } }