/* * Validity test for an old-format hstore. * 0 = not valid * 1 = valid but with "slop" in the length * 2 = exactly valid */ static int hstoreValidOldFormat(HStore *hs) { int count = hs->size_; HOldEntry *entries = (HOldEntry *) ARRPTR(hs); int vsize; int lastpos = 0; int i; if (hs->size_ & HS_FLAG_NEWVERSION) return 0; /* New format uses an HEntry for key and another for value */ StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry), "old hstore format is not upward-compatible"); if (count == 0) return 2; if (count > 0xFFFFFFF) return 0; if (CALCDATASIZE(count, 0) > VARSIZE(hs)) return 0; if (entries[0].pos != 0) return 0; /* key length must be nondecreasing */ for (i = 1; i < count; ++i) { if (entries[i].keylen < entries[i - 1].keylen) return 0; } /* * entry position must be strictly increasing, except for the first entry * (which can be ""=>"" and thus zero-length); and all entries must be * properly contiguous */ for (i = 0; i < count; ++i) { if (entries[i].pos != lastpos) return 0; lastpos += (entries[i].keylen + ((entries[i].valisnull) ? 0 : entries[i].vallen)); } vsize = CALCDATASIZE(count, lastpos); if (vsize > VARSIZE(hs)) return 0; if (vsize != VARSIZE(hs)) return 1; return 2; }
Datum to_tsvector(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(1); PRSTEXT prs; tsvector *out = NULL; TSCfgInfo *cfg = findcfg(PG_GETARG_INT32(0)); prs.lenwords = 32; prs.curwords = 0; prs.pos = 0; prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); PG_FREE_IF_COPY(in, 1); if (prs.curwords) out = makevalue(&prs); else { pfree(prs.words); out = palloc(CALCDATASIZE(0, 0)); out->len = CALCDATASIZE(0, 0); out->size = 0; } PG_RETURN_POINTER(out); }
Datum to_tsvector_byid(PG_FUNCTION_ARGS) { Oid cfgId = PG_GETARG_OID(0); text *in = PG_GETARG_TEXT_P(1); ParsedText prs; TSVector out; prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of * word's number */ if (prs.lenwords == 0) prs.lenwords = 2; prs.curwords = 0; prs.pos = 0; prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); PG_FREE_IF_COPY(in, 1); if (prs.curwords) out = make_tsvector(&prs); else { pfree(prs.words); out = palloc(CALCDATASIZE(0, 0)); SET_VARSIZE(out, CALCDATASIZE(0, 0)); out->size = 0; } PG_RETURN_POINTER(out); }
Datum hstore_in(PG_FUNCTION_ARGS) { HSParser state; int4 len, buflen, i; HStore *out; HEntry *entries; char *ptr; state.begin = PG_GETARG_CSTRING(0); parse_hstore(&state); if (state.pcur == 0) { freeHSParse(&state); len = CALCDATASIZE(0, 0); out = palloc(len); out->len = len; out->size = 0; PG_RETURN_POINTER(out); } state.pcur = uniquePairs(state.pairs, state.pcur, &buflen); len = CALCDATASIZE(state.pcur, buflen); out = palloc(len); out->len = len; out->size = state.pcur; entries = ARRPTR(out); ptr = STRPTR(out); for (i = 0; i < out->size; i++) { entries[i].keylen = state.pairs[i].keylen; entries[i].pos = ptr - STRPTR(out); memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen); ptr += entries[i].keylen; entries[i].valisnull = state.pairs[i].isnull; if (entries[i].valisnull) entries[i].vallen = 4; /* null */ else { entries[i].vallen = state.pairs[i].vallen; memcpy(ptr, state.pairs[i].val, state.pairs[i].vallen); ptr += entries[i].vallen; } } freeHSParse(&state); PG_RETURN_POINTER(out); }
Datum tconvert(PG_FUNCTION_ARGS) { text *key; text *val = NULL; int len; HStore *out; if (PG_ARGISNULL(0)) PG_RETURN_NULL(); key = PG_GETARG_TEXT_P(0); if (PG_ARGISNULL(1)) len = CALCDATASIZE(1, VARSIZE(key)); else { val = PG_GETARG_TEXT_P(1); len = CALCDATASIZE(1, VARSIZE(key) + VARSIZE(val) - 2 * VARHDRSZ); } out = palloc(len); SET_VARSIZE(out, len); out->size = 1; ARRPTR(out)->keylen = hstoreCheckKeyLen(VARSIZE(key) - VARHDRSZ); if (PG_ARGISNULL(1)) { ARRPTR(out)->vallen = 0; ARRPTR(out)->valisnull = true; } else { ARRPTR(out)->vallen = hstoreCheckValLen(VARSIZE(val) - VARHDRSZ); ARRPTR(out)->valisnull = false; } ARRPTR(out)->pos = 0; memcpy(STRPTR(out), VARDATA(key), ARRPTR(out)->keylen); if (!PG_ARGISNULL(1)) { memcpy(STRPTR(out) + ARRPTR(out)->keylen, VARDATA(val), ARRPTR(out)->vallen); PG_FREE_IF_COPY(val, 1); } PG_FREE_IF_COPY(key, 0); PG_RETURN_POINTER(out); }
Datum tsvector_strip(PG_FUNCTION_ARGS) { TSVector in = PG_GETARG_TSVECTOR(0); TSVector out; int i, len = 0; WordEntry *arrin = ARRPTR(in), *arrout; char *cur; for (i = 0; i < in->size; i++) len += arrin[i].len; len = CALCDATASIZE(in->size, len); out = (TSVector) palloc0(len); SET_VARSIZE(out, len); out->size = in->size; arrout = ARRPTR(out); cur = STRPTR(out); for (i = 0; i < in->size; i++) { memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len); arrout[i].haspos = 0; arrout[i].len = arrin[i].len; arrout[i].pos = cur - STRPTR(out); cur += arrout[i].len; } PG_FREE_IF_COPY(in, 0); PG_RETURN_POINTER(out); }
HStore * hstorePairs(Pairs *pairs, int4 pcount, int4 buflen) { HStore *out; HEntry *entry; char *ptr; char *buf; int4 len; int4 i; len = CALCDATASIZE(pcount, buflen); out = palloc(len); SET_VARSIZE(out, len); HS_SETCOUNT(out, pcount); if (pcount == 0) return out; entry = ARRPTR(out); buf = ptr = STRPTR(out); for (i = 0; i < pcount; i++) HS_ADDITEM(entry, buf, ptr, pairs[i]); HS_FINALIZE(out, pcount, buf, ptr); return out; }
Datum hstore_out(PG_FUNCTION_ARGS) { HStore *in = PG_GETARG_HS(0); int buflen, i; char *out, *ptr; char *base = STRPTR(in); HEntry *entries = ARRPTR(in); if (in->size == 0) { out = palloc(1); *out = '\0'; PG_FREE_IF_COPY(in, 0); PG_RETURN_CSTRING(out); } buflen = (4 /* " */ + 2 /* => */ + 2 /* , */ ) * in->size + 2 /* esc */ * (in->len - CALCDATASIZE(in->size, 0)); out = ptr = palloc(buflen); for (i = 0; i < in->size; i++) { *ptr++ = '"'; ptr = cpw(ptr, base + entries[i].pos, entries[i].keylen); *ptr++ = '"'; *ptr++ = '='; *ptr++ = '>'; if (entries[i].valisnull) { *ptr++ = 'N'; *ptr++ = 'U'; *ptr++ = 'L'; *ptr++ = 'L'; } else { *ptr++ = '"'; ptr = cpw(ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen); *ptr++ = '"'; } if (i + 1 != in->size) { *ptr++ = ','; *ptr++ = ' '; } } *ptr = '\0'; PG_FREE_IF_COPY(in, 0); PG_RETURN_CSTRING(out); }
/* * Validity test for a new-format hstore. * 0 = not valid * 1 = valid but with "slop" in the length * 2 = exactly valid */ static int hstoreValidNewFormat(HStore *hs) { int count = HS_COUNT(hs); HEntry *entries = ARRPTR(hs); int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0; int vsize = CALCDATASIZE(count, buflen); int i; if (hs->size_ & HS_FLAG_NEWVERSION) return 2; if (count == 0) return 2; if (!HSE_ISFIRST(entries[0])) return 0; if (vsize > VARSIZE(hs)) return 0; /* entry position must be nondecreasing */ for (i = 1; i < 2 * count; ++i) { if (HSE_ISFIRST(entries[i]) || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1]))) return 0; } /* key length must be nondecreasing and keys must not be null */ for (i = 1; i < count; ++i) { if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1)) return 0; if (HSE_ISNULL(entries[2 * i])) return 0; } if (vsize != VARSIZE(hs)) return 1; return 2; }
Datum hstore_hash(PG_FUNCTION_ARGS) { HStore *hs = PG_GETARG_HS(0); Datum hval = hash_any((unsigned char *) VARDATA(hs), VARSIZE(hs) - VARHDRSZ); /* * this is the only place in the code that cares whether the overall * varlena size exactly matches the true data size; this assertion should * be maintained by all the other code, but we make it explicit here. */ Assert(VARSIZE(hs) == (HS_COUNT(hs) != 0 ? CALCDATASIZE(HS_COUNT(hs), HSE_ENDPOS(ARRPTR(hs)[2 * HS_COUNT(hs) - 1])) : HSHRDSIZE)); PG_FREE_IF_COPY(hs, 0); PG_RETURN_DATUM(hval); }
/* * Trigger */ Datum tsearch2(PG_FUNCTION_ARGS) { TriggerData *trigdata; Trigger *trigger; Relation rel; HeapTuple rettuple = NULL; TSCfgInfo *cfg = findcfg(get_currcfg()); int numidxattr, i; PRSTEXT prs; Datum datum = (Datum) 0; Oid funcoid = InvalidOid; if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */ elog(ERROR, "TSearch: Not fired by trigger manager"); trigdata = (TriggerData *) fcinfo->context; if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) /* internal error */ elog(ERROR, "TSearch: Can't process STATEMENT events"); if (TRIGGER_FIRED_AFTER(trigdata->tg_event)) /* internal error */ elog(ERROR, "TSearch: Must be fired BEFORE event"); if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) rettuple = trigdata->tg_trigtuple; else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) rettuple = trigdata->tg_newtuple; else /* internal error */ elog(ERROR, "TSearch: Unknown event"); trigger = trigdata->tg_trigger; rel = trigdata->tg_relation; if (trigger->tgnargs < 2) /* internal error */ elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)"); numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); if (numidxattr == SPI_ERROR_NOATTRIBUTE) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("tsvector column \"%s\" does not exist", trigger->tgargs[0]))); prs.lenwords = 32; prs.curwords = 0; prs.pos = 0; prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); /* find all words in indexable column */ for (i = 1; i < trigger->tgnargs; i++) { int numattr; Oid oidtype; Datum txt_toasted; bool isnull; text *txt; numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); if (numattr == SPI_ERROR_NOATTRIBUTE) { funcoid = findFunc(trigger->tgargs[i]); if (funcoid == InvalidOid) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("could not find function or field \"%s\"", trigger->tgargs[i]))); continue; } oidtype = SPI_gettypeid(rel->rd_att, numattr); /* We assume char() and varchar() are binary-equivalent to text */ if (!(oidtype == TEXTOID || oidtype == VARCHAROID || oidtype == BPCHAROID)) { elog(WARNING, "TSearch: '%s' is not of character type", trigger->tgargs[i]); continue; } txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); if (isnull) continue; if (funcoid != InvalidOid) { text *txttmp = (text *) DatumGetPointer(OidFunctionCall1( funcoid, PointerGetDatum(txt_toasted) )); txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp))); if (txt == txttmp) txt_toasted = PointerGetDatum(txt); } else txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted))); parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); if (txt != (text *) DatumGetPointer(txt_toasted)) pfree(txt); } /* make tsvector value */ if (prs.curwords) { datum = PointerGetDatum(makevalue(&prs)); rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, &datum, NULL); pfree(DatumGetPointer(datum)); } else { tsvector *out = palloc(CALCDATASIZE(0, 0)); out->len = CALCDATASIZE(0, 0); out->size = 0; datum = PointerGetDatum(out); pfree(prs.words); rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, &datum, NULL); } if (rettuple == NULL) /* internal error */ elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result); return PointerGetDatum(rettuple); }
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column) { TriggerData *trigdata; Trigger *trigger; Relation rel; HeapTuple rettuple = NULL; int tsvector_attr_num, i; ParsedText prs; Datum datum; bool isnull; text *txt; Oid cfgId; /* Check call context */ if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */ elog(ERROR, "tsvector_update_trigger: not fired by trigger manager"); trigdata = (TriggerData *) fcinfo->context; if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) elog(ERROR, "tsvector_update_trigger: must be fired for row"); if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event)) elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event"); if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) rettuple = trigdata->tg_trigtuple; else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) rettuple = trigdata->tg_newtuple; else elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE"); trigger = trigdata->tg_trigger; rel = trigdata->tg_relation; if (trigger->tgnargs < 3) elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)"); /* Find the target tsvector column */ tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("tsvector column \"%s\" does not exist", trigger->tgargs[0]))); if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num), TSVECTOROID)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("column \"%s\" is not of tsvector type", trigger->tgargs[0]))); /* Find the configuration to use */ if (config_column) { int config_attr_num; config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]); if (config_attr_num == SPI_ERROR_NOATTRIBUTE) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("configuration column \"%s\" does not exist", trigger->tgargs[1]))); if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num), REGCONFIGOID)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("column \"%s\" is not of regconfig type", trigger->tgargs[1]))); datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull); if (isnull) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("configuration column \"%s\" must not be null", trigger->tgargs[1]))); cfgId = DatumGetObjectId(datum); } else { List *names; names = stringToQualifiedNameList(trigger->tgargs[1]); /* require a schema so that results are not search path dependent */ if (list_length(names) < 2) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("text search configuration name \"%s\" must be schema-qualified", trigger->tgargs[1]))); cfgId = get_ts_config_oid(names, false); } /* initialize parse state */ prs.lenwords = 32; prs.curwords = 0; prs.pos = 0; prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); /* find all words in indexable column(s) */ for (i = 2; i < trigger->tgnargs; i++) { int numattr; numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); if (numattr == SPI_ERROR_NOATTRIBUTE) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("column \"%s\" does not exist", trigger->tgargs[i]))); if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("column \"%s\" is not of a character type", trigger->tgargs[i]))); datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); if (isnull) continue; txt = DatumGetTextP(datum); parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); if (txt != (text *) DatumGetPointer(datum)) pfree(txt); } /* make tsvector value */ if (prs.curwords) { datum = PointerGetDatum(make_tsvector(&prs)); rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num, &datum, NULL); pfree(DatumGetPointer(datum)); } else { TSVector out = palloc(CALCDATASIZE(0, 0)); SET_VARSIZE(out, CALCDATASIZE(0, 0)); out->size = 0; datum = PointerGetDatum(out); rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num, &datum, NULL); pfree(prs.words); } if (rettuple == NULL) /* internal error */ elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple", SPI_result); return PointerGetDatum(rettuple); }
Datum tsvector_concat(PG_FUNCTION_ARGS) { TSVector in1 = PG_GETARG_TSVECTOR(0); TSVector in2 = PG_GETARG_TSVECTOR(1); TSVector out; WordEntry *ptr; WordEntry *ptr1, *ptr2; WordEntryPos *p; int maxpos = 0, i, j, i1, i2, dataoff; char *data, *data1, *data2; ptr = ARRPTR(in1); i = in1->size; while (i--) { if ((j = POSDATALEN(in1, ptr)) != 0) { p = POSDATAPTR(in1, ptr); while (j--) { if (WEP_GETPOS(*p) > maxpos) maxpos = WEP_GETPOS(*p); p++; } } ptr++; } ptr1 = ARRPTR(in1); ptr2 = ARRPTR(in2); data1 = STRPTR(in1); data2 = STRPTR(in2); i1 = in1->size; i2 = in2->size; /* conservative estimate of space needed */ out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2)); SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2)); out->size = in1->size + in2->size; ptr = ARRPTR(out); data = STRPTR(out); dataoff = 0; while (i1 && i2) { int cmp = compareEntry(data1, ptr1, data2, ptr2); if (cmp < 0) { /* in1 first */ ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } else if (cmp > 0) { /* in2 first */ ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } else { ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { if (ptr1->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); } else /* must have ptr2->haspos */ { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } } ptr++; ptr1++; ptr2++; i1--; i2--; } } while (i1) { ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } while (i2) { ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } /* * Instead of checking each offset individually, we check for overflow of * pos fields once at the end. */ if (dataoff > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS))); out->size = ptr - ARRPTR(out); SET_VARSIZE(out, CALCDATASIZE(out->size, dataoff)); if (data != STRPTR(out)) memmove(STRPTR(out), data, dataoff); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); PG_RETURN_POINTER(out); }
Datum tsvectorin(PG_FUNCTION_ARGS) { char *buf = PG_GETARG_CSTRING(0); TSVectorParseState state; WordEntryIN *arr; int totallen; int arrlen; /* allocated size of arr */ WordEntry *inarr; int len = 0; TSVector in; int i; char *token; int toklen; WordEntryPos *pos; int poslen; char *strbuf; int stroff; /* * Tokens are appended to tmpbuf, cur is a pointer to the end of used * space in tmpbuf. */ char *tmpbuf; char *cur; int buflen = 256; /* allocated size of tmpbuf */ state = init_tsvector_parser(buf, false, false); arrlen = 64; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); cur = tmpbuf = (char *) palloc(buflen); while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL)) { if (toklen >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long (%ld bytes, max %ld bytes)", (long) toklen, (long) (MAXSTRLEN - 1)))); if (cur - tmpbuf > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)", (long) (cur - tmpbuf), (long) MAXSTRPOS))); /* * Enlarge buffers if needed */ if (len >= arrlen) { arrlen *= 2; arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen); } while ((cur - tmpbuf) + toklen >= buflen) { int dist = cur - tmpbuf; buflen *= 2; tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); cur = tmpbuf + dist; } arr[len].entry.len = toklen; arr[len].entry.pos = cur - tmpbuf; memcpy((void *) cur, (void *) token, toklen); cur += toklen; if (poslen != 0) { arr[len].entry.haspos = 1; arr[len].pos = pos; arr[len].poslen = poslen; } else { arr[len].entry.haspos = 0; arr[len].pos = NULL; arr[len].poslen = 0; } len++; } close_tsvector_parser(state); if (len > 0) len = uniqueentry(arr, len, tmpbuf, &buflen); else buflen = 0; if (buflen > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS))); totallen = CALCDATASIZE(len, buflen); in = (TSVector) palloc0(totallen); SET_VARSIZE(in, totallen); in->size = len; inarr = ARRPTR(in); strbuf = STRPTR(in); stroff = 0; for (i = 0; i < len; i++) { memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len); arr[i].entry.pos = stroff; stroff += arr[i].entry.len; if (arr[i].entry.haspos) { if (arr[i].poslen > 0xFFFF) elog(ERROR, "positions array too long"); /* Copy number of positions */ stroff = SHORTALIGN(stroff); *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen; stroff += sizeof(uint16); /* Copy positions */ memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos)); stroff += arr[i].poslen * sizeof(WordEntryPos); pfree(arr[i].pos); } inarr[i] = arr[i].entry; } Assert((strbuf + stroff - (char *) in) == totallen); PG_RETURN_TSVECTOR(in); }
/* * make value of tsvector, given parsed text */ TSVector make_tsvector(ParsedText *prs) { int i, j, lenstr = 0, totallen; TSVector in; WordEntry *ptr; char *str; int stroff; prs->curwords = uniqueWORD(prs->words, prs->curwords); for (i = 0; i < prs->curwords; i++) { lenstr += prs->words[i].len; if (prs->words[i].alen) { lenstr = SHORTALIGN(lenstr); lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); } } if (lenstr > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS))); totallen = CALCDATASIZE(prs->curwords, lenstr); in = (TSVector) palloc0(totallen); SET_VARSIZE(in, totallen); in->size = prs->curwords; ptr = ARRPTR(in); str = STRPTR(in); stroff = 0; for (i = 0; i < prs->curwords; i++) { ptr->len = prs->words[i].len; ptr->pos = stroff; memcpy(str + stroff, prs->words[i].word, prs->words[i].len); stroff += prs->words[i].len; pfree(prs->words[i].word); if (prs->words[i].alen) { int k = prs->words[i].pos.apos[0]; WordEntryPos *wptr; if (k > 0xFFFF) elog(ERROR, "positions array too long"); ptr->haspos = 1; stroff = SHORTALIGN(stroff); *(uint16 *) (str + stroff) = (uint16) k; wptr = POSDATAPTR(in, ptr); for (j = 0; j < k; j++) { WEP_SETWEIGHT(wptr[j], 0); WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); } stroff += sizeof(uint16) + k * sizeof(WordEntryPos); pfree(prs->words[i].pos.apos); } else ptr->haspos = 0; ptr++; } pfree(prs->words); return in; }
Datum hs_concat(PG_FUNCTION_ARGS) { HStore *s1 = PG_GETARG_HS(0); HStore *s2 = PG_GETARG_HS(1); HStore *out = palloc(VARSIZE(s1) + VARSIZE(s2)); char *ps1, *ps2, *pd; HEntry *es1, *es2, *ed; SET_VARSIZE(out, VARSIZE(s1) + VARSIZE(s2)); out->size = s1->size + s2->size; ps1 = STRPTR(s1); ps2 = STRPTR(s2); pd = STRPTR(out); es1 = ARRPTR(s1); es2 = ARRPTR(s2); ed = ARRPTR(out); while (es1 - ARRPTR(s1) < s1->size && es2 - ARRPTR(s2) < s2->size) { int difference; if (es1->keylen == es2->keylen) difference = strncmp(ps1, ps2, es1->keylen); else difference = (es1->keylen > es2->keylen) ? 1 : -1; if (difference == 0) { memcpy(ed, es2, sizeof(HEntry)); memcpy(pd, ps2, es2->keylen + ((es2->valisnull) ? 0 : es2->vallen)); ed->pos = pd - STRPTR(out); pd += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen); ed++; ps1 += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen); es1++; ps2 += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen); es2++; } else if (difference > 0) { memcpy(ed, es2, sizeof(HEntry)); memcpy(pd, ps2, es2->keylen + ((es2->valisnull) ? 0 : es2->vallen)); ed->pos = pd - STRPTR(out); pd += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen); ed++; ps2 += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen); es2++; } else { memcpy(ed, es1, sizeof(HEntry)); memcpy(pd, ps1, es1->keylen + ((es1->valisnull) ? 0 : es1->vallen)); ed->pos = pd - STRPTR(out); pd += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen); ed++; ps1 += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen); es1++; } } while (es1 - ARRPTR(s1) < s1->size) { memcpy(ed, es1, sizeof(HEntry)); memcpy(pd, ps1, es1->keylen + ((es1->valisnull) ? 0 : es1->vallen)); ed->pos = pd - STRPTR(out); pd += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen); ed++; ps1 += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen); es1++; } while (es2 - ARRPTR(s2) < s2->size) { memcpy(ed, es2, sizeof(HEntry)); memcpy(pd, ps2, es2->keylen + ((es2->valisnull) ? 0 : es2->vallen)); ed->pos = pd - STRPTR(out); pd += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen); ed++; ps2 += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen); es2++; } if (ed - ARRPTR(out) != out->size) { int buflen = pd - STRPTR(out); pd = STRPTR(out); out->size = ed - ARRPTR(out); memmove(STRPTR(out), pd, buflen); SET_VARSIZE(out, CALCDATASIZE(out->size, buflen)); } PG_FREE_IF_COPY(s1, 0); PG_FREE_IF_COPY(s2, 1); PG_RETURN_POINTER(out); }
/* * make value of tsvector */ static tsvector * makevalue(PRSTEXT * prs) { int4 i, j, lenstr = 0, totallen; tsvector *in; WordEntry *ptr; char *str, *cur; prs->curwords = uniqueWORD(prs->words, prs->curwords); for (i = 0; i < prs->curwords; i++) { lenstr += SHORTALIGN(prs->words[i].len); if (prs->words[i].alen) lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); } totallen = CALCDATASIZE(prs->curwords, lenstr); in = (tsvector *) palloc(totallen); memset(in, 0, totallen); in->len = totallen; in->size = prs->curwords; ptr = ARRPTR(in); cur = str = STRPTR(in); for (i = 0; i < prs->curwords; i++) { ptr->len = prs->words[i].len; if (cur - str > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("value is too big"))); ptr->pos = cur - str; memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); pfree(prs->words[i].word); cur += SHORTALIGN(prs->words[i].len); if (prs->words[i].alen) { WordEntryPos *wptr; ptr->haspos = 1; *(uint16 *) cur = prs->words[i].pos.apos[0]; wptr = POSDATAPTR(in, ptr); for (j = 0; j < *(uint16 *) cur; j++) { wptr[j].weight = 0; wptr[j].pos = prs->words[i].pos.apos[j + 1]; } cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); pfree(prs->words[i].pos.apos); } else ptr->haspos = 0; ptr++; } pfree(prs->words); return in; }
Datum tsvector_concat(PG_FUNCTION_ARGS) { TSVector in1 = PG_GETARG_TSVECTOR(0); TSVector in2 = PG_GETARG_TSVECTOR(1); TSVector out; WordEntry *ptr; WordEntry *ptr1, *ptr2; WordEntryPos *p; int maxpos = 0, i, j, i1, i2, dataoff, output_bytes, output_size; char *data, *data1, *data2; /* Get max position in in1; we'll need this to offset in2's positions */ ptr = ARRPTR(in1); i = in1->size; while (i--) { if ((j = POSDATALEN(in1, ptr)) != 0) { p = POSDATAPTR(in1, ptr); while (j--) { if (WEP_GETPOS(*p) > maxpos) maxpos = WEP_GETPOS(*p); p++; } } ptr++; } ptr1 = ARRPTR(in1); ptr2 = ARRPTR(in2); data1 = STRPTR(in1); data2 = STRPTR(in2); i1 = in1->size; i2 = in2->size; /* * Conservative estimate of space needed. We might need all the data in * both inputs, and conceivably add a pad byte before position data for * each item where there was none before. */ output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2; out = (TSVector) palloc0(output_bytes); SET_VARSIZE(out, output_bytes); /* * We must make out->size valid so that STRPTR(out) is sensible. We'll * collapse out any unused space at the end. */ out->size = in1->size + in2->size; ptr = ARRPTR(out); data = STRPTR(out); dataoff = 0; while (i1 && i2) { int cmp = compareEntry(data1, ptr1, data2, ptr2); if (cmp < 0) { /* in1 first */ ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } else if (cmp > 0) { /* in2 first */ ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } else { ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { if (ptr1->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); } else /* must have ptr2->haspos */ { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } } ptr++; ptr1++; ptr2++; i1--; i2--; } } while (i1) { ptr->haspos = ptr1->haspos; ptr->len = ptr1->len; memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len); ptr->pos = dataoff; dataoff += ptr1->len; if (ptr->haspos) { dataoff = SHORTALIGN(dataoff); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } ptr++; ptr1++; i1--; } while (i2) { ptr->haspos = ptr2->haspos; ptr->len = ptr2->len; memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len); ptr->pos = dataoff; dataoff += ptr2->len; if (ptr->haspos) { int addlen = add_pos(in2, ptr2, out, ptr, maxpos); if (addlen == 0) ptr->haspos = 0; else { dataoff = SHORTALIGN(dataoff); dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16); } } ptr++; ptr2++; i2--; } /* * Instead of checking each offset individually, we check for overflow of * pos fields once at the end. */ if (dataoff > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS))); /* * Adjust sizes (asserting that we didn't overrun the original estimates) * and collapse out any unused array entries. */ output_size = ptr - ARRPTR(out); Assert(output_size <= out->size); out->size = output_size; if (data != STRPTR(out)) memmove(STRPTR(out), data, dataoff); output_bytes = CALCDATASIZE(out->size, dataoff); Assert(output_bytes <= VARSIZE(out)); SET_VARSIZE(out, output_bytes); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); PG_RETURN_POINTER(out); }
Datum tsvector_in(PG_FUNCTION_ARGS) { char *buf = PG_GETARG_CSTRING(0); TI_IN_STATE state; WordEntryIN *arr; WordEntry *inarr; int4 len = 0, totallen = 64; tsvector *in; char *tmpbuf, *cur; int4 i, buflen = 256; state.prsbuf = buf; state.len = 32; state.word = (char *) palloc(state.len); state.oprisdelim = false; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); cur = tmpbuf = (char *) palloc(buflen); while (gettoken_tsvector(&state)) { if (len >= totallen) { totallen *= 2; arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); } while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) { int4 dist = cur - tmpbuf; buflen *= 2; tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); cur = tmpbuf + dist; } if (state.curpos - state.word >= MAXSTRLEN) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("word is too long"))); arr[len].entry.len = state.curpos - state.word; if (cur - tmpbuf > MAXSTRPOS) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("too long value"))); arr[len].entry.pos = cur - tmpbuf; memcpy((void *) cur, (void *) state.word, arr[len].entry.len); cur += arr[len].entry.len; if (state.alen) { arr[len].entry.haspos = 1; arr[len].pos = state.pos; } else arr[len].entry.haspos = 0; len++; } pfree(state.word); if (len > 0) len = uniqueentry(arr, len, tmpbuf, &buflen); else buflen=0; totallen = CALCDATASIZE(len, buflen); in = (tsvector *) palloc(totallen); memset(in, 0, totallen); in->len = totallen; in->size = len; cur = STRPTR(in); inarr = ARRPTR(in); for (i = 0; i < len; i++) { memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); arr[i].entry.pos = cur - STRPTR(in); cur += SHORTALIGN(arr[i].entry.len); if (arr[i].entry.haspos) { memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos); pfree(arr[i].pos); } memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry)); } pfree(tmpbuf); pfree(arr); PG_RETURN_POINTER(in); }