Datum tsvectorin(PG_FUNCTION_ARGS) { char *buf = PG_GETARG_CSTRING(0); TSVectorParseState state; WordEntryIN *arr; int totallen; int arrlen; /* allocated size of arr */ WordEntry *inarr; int len = 0; TSVector in; int i; char *token; int toklen; WordEntryPos *pos; int poslen; char *strbuf; int stroff; /* * Tokens are appended to tmpbuf, cur is a pointer to the end of used * space in tmpbuf. */ char *tmpbuf; char *cur; int buflen = 256; /* allocated size of tmpbuf */ state = init_tsvector_parser(buf, false, false); arrlen = 64; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); cur = tmpbuf = (char *) palloc(buflen); while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL)) { if (toklen >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long (%ld bytes, max %ld bytes)", (long) toklen, (long) (MAXSTRLEN - 1)))); if (cur - tmpbuf > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)", (long) (cur - tmpbuf), (long) MAXSTRPOS))); /* * Enlarge buffers if needed */ if (len >= arrlen) { arrlen *= 2; arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen); } while ((cur - tmpbuf) + toklen >= buflen) { int dist = cur - tmpbuf; buflen *= 2; tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); cur = tmpbuf + dist; } arr[len].entry.len = toklen; arr[len].entry.pos = cur - tmpbuf; memcpy((void *) cur, (void *) token, toklen); cur += toklen; if (poslen != 0) { arr[len].entry.haspos = 1; arr[len].pos = pos; arr[len].poslen = poslen; } else { arr[len].entry.haspos = 0; arr[len].pos = NULL; arr[len].poslen = 0; } len++; } close_tsvector_parser(state); if (len > 0) len = uniqueentry(arr, len, tmpbuf, &buflen); else buflen = 0; if (buflen > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS))); totallen = CALCDATASIZE(len, buflen); in = (TSVector) palloc0(totallen); SET_VARSIZE(in, totallen); in->size = len; inarr = ARRPTR(in); strbuf = STRPTR(in); stroff = 0; for (i = 0; i < len; i++) { memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len); arr[i].entry.pos = stroff; stroff += arr[i].entry.len; if (arr[i].entry.haspos) { if (arr[i].poslen > 0xFFFF) elog(ERROR, "positions array too long"); /* Copy number of positions */ stroff = SHORTALIGN(stroff); *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen; stroff += sizeof(uint16); /* Copy positions */ memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos)); stroff += arr[i].poslen * sizeof(WordEntryPos); pfree(arr[i].pos); } inarr[i] = arr[i].entry; } Assert((strbuf + stroff - (char *) in) == totallen); PG_RETURN_TSVECTOR(in); }
/* * get token from query string */ static int4 gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight) { while (1) { switch (state->state) { case WAITOPERAND: if (*(state->buf) == '!') { (state->buf)++; *val = (int4) '!'; return OPR; } else if (*(state->buf) == '(') { state->count++; (state->buf)++; return OPEN; } else if (*(state->buf) == ':') { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("error at start of operand"))); } else if (*(state->buf) != ' ') { state->valstate.prsbuf = state->buf; state->state = WAITOPERATOR; if (gettoken_tsvector(&(state->valstate))) { *strval = state->valstate.word; *lenval = state->valstate.curpos - state->valstate.word; state->buf = get_weight(state->valstate.prsbuf, weight); return VAL; } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("no operand"))); } break; case WAITOPERATOR: if (*(state->buf) == '&' || *(state->buf) == '|') { state->state = WAITOPERAND; *val = (int4) *(state->buf); (state->buf)++; return OPR; } else if (*(state->buf) == ')') { (state->buf)++; state->count--; return (state->count < 0) ? ERR : CLOSE; } else if (*(state->buf) == '\0') return (state->count) ? ERR : END; else if (*(state->buf) != ' ') return ERR; break; default: return ERR; break; } (state->buf)++; } return END; }
Datum tsvector_in(PG_FUNCTION_ARGS) { char *buf = PG_GETARG_CSTRING(0); TI_IN_STATE state; WordEntryIN *arr; WordEntry *inarr; int4 len = 0, totallen = 64; tsvector *in; char *tmpbuf, *cur; int4 i, buflen = 256; state.prsbuf = buf; state.len = 32; state.word = (char *) palloc(state.len); state.oprisdelim = false; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); cur = tmpbuf = (char *) palloc(buflen); while (gettoken_tsvector(&state)) { if (len >= totallen) { totallen *= 2; arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); } while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) { int4 dist = cur - tmpbuf; buflen *= 2; tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); cur = tmpbuf + dist; } if (state.curpos - state.word >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("word is too long"))); arr[len].entry.len = state.curpos - state.word; if (cur - tmpbuf > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("too long value"))); arr[len].entry.pos = cur - tmpbuf; memcpy((void *) cur, (void *) state.word, arr[len].entry.len); cur += arr[len].entry.len; if (state.alen) { arr[len].entry.haspos = 1; arr[len].pos = state.pos; } else arr[len].entry.haspos = 0; len++; } pfree(state.word); if (len > 0) len = uniqueentry(arr, len, tmpbuf, &buflen); else buflen=0; totallen = CALCDATASIZE(len, buflen); in = (tsvector *) palloc(totallen); memset(in, 0, totallen); in->len = totallen; in->size = len; cur = STRPTR(in); inarr = ARRPTR(in); for (i = 0; i < len; i++) { memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); arr[i].entry.pos = cur - STRPTR(in); cur += SHORTALIGN(arr[i].entry.len); if (arr[i].entry.haspos) { memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos); pfree(arr[i].pos); } memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry)); } pfree(tmpbuf); pfree(arr); PG_RETURN_POINTER(in); }