Example #1
0
/*
 * Validity test for an old-format hstore.
 *	0 = not valid
 *	1 = valid but with "slop" in the length
 *	2 = exactly valid
 */
static int
hstoreValidOldFormat(HStore *hs)
{
    int			count = hs->size_;
    HOldEntry  *entries = (HOldEntry *) ARRPTR(hs);
    int			vsize;
    int			lastpos = 0;
    int			i;

    if (hs->size_ & HS_FLAG_NEWVERSION)
        return 0;

    /* New format uses an HEntry for key and another for value */
    StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
                     "old hstore format is not upward-compatible");

    if (count == 0)
        return 2;

    if (count > 0xFFFFFFF)
        return 0;

    if (CALCDATASIZE(count, 0) > VARSIZE(hs))
        return 0;

    if (entries[0].pos != 0)
        return 0;

    /* key length must be nondecreasing */

    for (i = 1; i < count; ++i)
    {
        if (entries[i].keylen < entries[i - 1].keylen)
            return 0;
    }

    /*
     * entry position must be strictly increasing, except for the first entry
     * (which can be ""=>"" and thus zero-length); and all entries must be
     * properly contiguous
     */

    for (i = 0; i < count; ++i)
    {
        if (entries[i].pos != lastpos)
            return 0;
        lastpos += (entries[i].keylen
                    + ((entries[i].valisnull) ? 0 : entries[i].vallen));
    }

    vsize = CALCDATASIZE(count, lastpos);

    if (vsize > VARSIZE(hs))
        return 0;

    if (vsize != VARSIZE(hs))
        return 1;

    return 2;
}
Example #2
0
Datum
to_tsvector(PG_FUNCTION_ARGS)
{
	text	   *in = PG_GETARG_TEXT_P(1);
	PRSTEXT		prs;
	tsvector   *out = NULL;
	TSCfgInfo  *cfg = findcfg(PG_GETARG_INT32(0));

	prs.lenwords = 32;
	prs.curwords = 0;
	prs.pos = 0;
	prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);

	parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
	PG_FREE_IF_COPY(in, 1);

	if (prs.curwords)
		out = makevalue(&prs);
	else
	{
		pfree(prs.words);
		out = palloc(CALCDATASIZE(0, 0));
		out->len = CALCDATASIZE(0, 0);
		out->size = 0;
	}
	PG_RETURN_POINTER(out);
}
Example #3
0
Datum
to_tsvector_byid(PG_FUNCTION_ARGS)
{
	Oid			cfgId = PG_GETARG_OID(0);
	text	   *in = PG_GETARG_TEXT_P(1);
	ParsedText	prs;
	TSVector	out;

	prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6;		/* just estimation of
														 * word's number */
	if (prs.lenwords == 0)
		prs.lenwords = 2;
	prs.curwords = 0;
	prs.pos = 0;
	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);

	parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
	PG_FREE_IF_COPY(in, 1);

	if (prs.curwords)
		out = make_tsvector(&prs);
	else
	{
		pfree(prs.words);
		out = palloc(CALCDATASIZE(0, 0));
		SET_VARSIZE(out, CALCDATASIZE(0, 0));
		out->size = 0;
	}

	PG_RETURN_POINTER(out);
}
Example #4
0
Datum
hstore_in(PG_FUNCTION_ARGS)
{
	HSParser	state;
	int4		len,
				buflen,
				i;
	HStore	   *out;
	HEntry	   *entries;
	char	   *ptr;

	state.begin = PG_GETARG_CSTRING(0);

	parse_hstore(&state);

	if (state.pcur == 0)
	{
		freeHSParse(&state);
		len = CALCDATASIZE(0, 0);
		out = palloc(len);
		out->len = len;
		out->size = 0;
		PG_RETURN_POINTER(out);
	}

	state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);

	len = CALCDATASIZE(state.pcur, buflen);
	out = palloc(len);
	out->len = len;
	out->size = state.pcur;

	entries = ARRPTR(out);
	ptr = STRPTR(out);

	for (i = 0; i < out->size; i++)
	{
		entries[i].keylen = state.pairs[i].keylen;
		entries[i].pos = ptr - STRPTR(out);
		memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
		ptr += entries[i].keylen;

		entries[i].valisnull = state.pairs[i].isnull;
		if (entries[i].valisnull)
			entries[i].vallen = 4;		/* null */
		else
		{
			entries[i].vallen = state.pairs[i].vallen;
			memcpy(ptr, state.pairs[i].val, state.pairs[i].vallen);
			ptr += entries[i].vallen;
		}
	}

	freeHSParse(&state);
	PG_RETURN_POINTER(out);
}
Example #5
0
Datum
tconvert(PG_FUNCTION_ARGS)
{
	text	   *key;
	text	   *val = NULL;
	int			len;
	HStore	   *out;

	if (PG_ARGISNULL(0))
		PG_RETURN_NULL();

	key = PG_GETARG_TEXT_P(0);

	if (PG_ARGISNULL(1))
		len = CALCDATASIZE(1, VARSIZE(key));
	else
	{
		val = PG_GETARG_TEXT_P(1);
		len = CALCDATASIZE(1, VARSIZE(key) + VARSIZE(val) - 2 * VARHDRSZ);
	}
	out = palloc(len);
	SET_VARSIZE(out, len);
	out->size = 1;

	ARRPTR(out)->keylen = hstoreCheckKeyLen(VARSIZE(key) - VARHDRSZ);
	if (PG_ARGISNULL(1))
	{
		ARRPTR(out)->vallen = 0;
		ARRPTR(out)->valisnull = true;
	}
	else
	{
		ARRPTR(out)->vallen = hstoreCheckValLen(VARSIZE(val) - VARHDRSZ);
		ARRPTR(out)->valisnull = false;
	}
	ARRPTR(out)->pos = 0;

	memcpy(STRPTR(out), VARDATA(key), ARRPTR(out)->keylen);
	if (!PG_ARGISNULL(1))
	{
		memcpy(STRPTR(out) + ARRPTR(out)->keylen, VARDATA(val), ARRPTR(out)->vallen);
		PG_FREE_IF_COPY(val, 1);
	}

	PG_FREE_IF_COPY(key, 0);

	PG_RETURN_POINTER(out);
}
Example #6
0
Datum
tsvector_strip(PG_FUNCTION_ARGS)
{
	TSVector	in = PG_GETARG_TSVECTOR(0);
	TSVector	out;
	int			i,
				len = 0;
	WordEntry  *arrin = ARRPTR(in),
			   *arrout;
	char	   *cur;

	for (i = 0; i < in->size; i++)
		len += arrin[i].len;

	len = CALCDATASIZE(in->size, len);
	out = (TSVector) palloc0(len);
	SET_VARSIZE(out, len);
	out->size = in->size;
	arrout = ARRPTR(out);
	cur = STRPTR(out);
	for (i = 0; i < in->size; i++)
	{
		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
		arrout[i].haspos = 0;
		arrout[i].len = arrin[i].len;
		arrout[i].pos = cur - STRPTR(out);
		cur += arrout[i].len;
	}

	PG_FREE_IF_COPY(in, 0);
	PG_RETURN_POINTER(out);
}
Example #7
0
File: hstore_io.c Project: d/gpdb
HStore *
hstorePairs(Pairs *pairs, int4 pcount, int4 buflen)
{
	HStore	   *out;
	HEntry	   *entry;
	char	   *ptr;
	char	   *buf;
	int4		len;
	int4		i;

	len = CALCDATASIZE(pcount, buflen);
	out = palloc(len);
	SET_VARSIZE(out, len);
	HS_SETCOUNT(out, pcount);

	if (pcount == 0)
		return out;

	entry = ARRPTR(out);
	buf = ptr = STRPTR(out);

	for (i = 0; i < pcount; i++)
		HS_ADDITEM(entry, buf, ptr, pairs[i]);

	HS_FINALIZE(out, pcount, buf, ptr);

	return out;
}
Example #8
0
Datum
hstore_out(PG_FUNCTION_ARGS)
{
	HStore	   *in = PG_GETARG_HS(0);
	int			buflen,
				i;
	char	   *out,
			   *ptr;
	char	   *base = STRPTR(in);
	HEntry	   *entries = ARRPTR(in);

	if (in->size == 0)
	{
		out = palloc(1);
		*out = '\0';
		PG_FREE_IF_COPY(in, 0);
		PG_RETURN_CSTRING(out);
	}

	buflen = (4 /* " */ + 2 /* => */ + 2 /* , */ ) * in->size +
		2 /* esc */ * (in->len - CALCDATASIZE(in->size, 0));

	out = ptr = palloc(buflen);
	for (i = 0; i < in->size; i++)
	{
		*ptr++ = '"';
		ptr = cpw(ptr, base + entries[i].pos, entries[i].keylen);
		*ptr++ = '"';
		*ptr++ = '=';
		*ptr++ = '>';
		if (entries[i].valisnull)
		{
			*ptr++ = 'N';
			*ptr++ = 'U';
			*ptr++ = 'L';
			*ptr++ = 'L';
		}
		else
		{
			*ptr++ = '"';
			ptr = cpw(ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen);
			*ptr++ = '"';
		}

		if (i + 1 != in->size)
		{
			*ptr++ = ',';
			*ptr++ = ' ';
		}
	}
	*ptr = '\0';

	PG_FREE_IF_COPY(in, 0);
	PG_RETURN_CSTRING(out);
}
Example #9
0
/*
 * Validity test for a new-format hstore.
 *	0 = not valid
 *	1 = valid but with "slop" in the length
 *	2 = exactly valid
 */
static int
hstoreValidNewFormat(HStore *hs)
{
    int			count = HS_COUNT(hs);
    HEntry	   *entries = ARRPTR(hs);
    int			buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
    int			vsize = CALCDATASIZE(count, buflen);
    int			i;

    if (hs->size_ & HS_FLAG_NEWVERSION)
        return 2;

    if (count == 0)
        return 2;

    if (!HSE_ISFIRST(entries[0]))
        return 0;

    if (vsize > VARSIZE(hs))
        return 0;

    /* entry position must be nondecreasing */

    for (i = 1; i < 2 * count; ++i)
    {
        if (HSE_ISFIRST(entries[i])
                || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
            return 0;
    }

    /* key length must be nondecreasing and keys must not be null */

    for (i = 1; i < count; ++i)
    {
        if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1))
            return 0;
        if (HSE_ISNULL(entries[2 * i]))
            return 0;
    }

    if (vsize != VARSIZE(hs))
        return 1;

    return 2;
}
Datum
hstore_hash(PG_FUNCTION_ARGS)
{
    HStore	   *hs = PG_GETARG_HS(0);
    Datum		hval = hash_any((unsigned char *) VARDATA(hs),
                                VARSIZE(hs) - VARHDRSZ);

    /*
     * this is the only place in the code that cares whether the overall
     * varlena size exactly matches the true data size; this assertion should
     * be maintained by all the other code, but we make it explicit here.
     */
    Assert(VARSIZE(hs) ==
           (HS_COUNT(hs) != 0 ?
            CALCDATASIZE(HS_COUNT(hs),
                         HSE_ENDPOS(ARRPTR(hs)[2 * HS_COUNT(hs) - 1])) :
            HSHRDSIZE));

    PG_FREE_IF_COPY(hs, 0);
    PG_RETURN_DATUM(hval);
}
Example #11
0
/*
 * Trigger
 */
Datum
tsearch2(PG_FUNCTION_ARGS)
{
	TriggerData *trigdata;
	Trigger    *trigger;
	Relation	rel;
	HeapTuple	rettuple = NULL;
	TSCfgInfo  *cfg = findcfg(get_currcfg());
	int			numidxattr,
				i;
	PRSTEXT		prs;
	Datum		datum = (Datum) 0;
	Oid			funcoid = InvalidOid;

	if (!CALLED_AS_TRIGGER(fcinfo))
		/* internal error */
		elog(ERROR, "TSearch: Not fired by trigger manager");

	trigdata = (TriggerData *) fcinfo->context;
	if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
		/* internal error */
		elog(ERROR, "TSearch: Can't process STATEMENT events");
	if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
		/* internal error */
		elog(ERROR, "TSearch: Must be fired BEFORE event");

	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
		rettuple = trigdata->tg_trigtuple;
	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
		rettuple = trigdata->tg_newtuple;
	else
		/* internal error */
		elog(ERROR, "TSearch: Unknown event");

	trigger = trigdata->tg_trigger;
	rel = trigdata->tg_relation;

	if (trigger->tgnargs < 2)
		/* internal error */
		elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");

	numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
	if (numidxattr == SPI_ERROR_NOATTRIBUTE)
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_COLUMN),
				 errmsg("tsvector column \"%s\" does not exist",
						trigger->tgargs[0])));

	prs.lenwords = 32;
	prs.curwords = 0;
	prs.pos = 0;
	prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);

	/* find all words in indexable column */
	for (i = 1; i < trigger->tgnargs; i++)
	{
		int			numattr;
		Oid			oidtype;
		Datum		txt_toasted;
		bool		isnull;
		text	   *txt;

		numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
		if (numattr == SPI_ERROR_NOATTRIBUTE)
		{
			funcoid = findFunc(trigger->tgargs[i]);
			if (funcoid == InvalidOid)
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_COLUMN),
						 errmsg("could not find function or field \"%s\"",
								trigger->tgargs[i])));

			continue;
		}
		oidtype = SPI_gettypeid(rel->rd_att, numattr);
		/* We assume char() and varchar() are binary-equivalent to text */
		if (!(oidtype == TEXTOID ||
			  oidtype == VARCHAROID ||
			  oidtype == BPCHAROID))
		{
			elog(WARNING, "TSearch: '%s' is not of character type",
				 trigger->tgargs[i]);
			continue;
		}
		txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
		if (isnull)
			continue;

		if (funcoid != InvalidOid)
		{
			text	   *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
																 funcoid,
											 PointerGetDatum(txt_toasted)
																	  ));

			txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
			if (txt == txttmp)
				txt_toasted = PointerGetDatum(txt);
		}
		else
			txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));

		parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
		if (txt != (text *) DatumGetPointer(txt_toasted))
			pfree(txt);
	}

	/* make tsvector value */
	if (prs.curwords)
	{
		datum = PointerGetDatum(makevalue(&prs));
		rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
								   &datum, NULL);
		pfree(DatumGetPointer(datum));
	}
	else
	{
		tsvector   *out = palloc(CALCDATASIZE(0, 0));

		out->len = CALCDATASIZE(0, 0);
		out->size = 0;
		datum = PointerGetDatum(out);
		pfree(prs.words);
		rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
								   &datum, NULL);
	}

	if (rettuple == NULL)
		/* internal error */
		elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);

	return PointerGetDatum(rettuple);
}
Example #12
0
static Datum
tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
{
	TriggerData *trigdata;
	Trigger    *trigger;
	Relation	rel;
	HeapTuple	rettuple = NULL;
	int			tsvector_attr_num,
				i;
	ParsedText	prs;
	Datum		datum;
	bool		isnull;
	text	   *txt;
	Oid			cfgId;

	/* Check call context */
	if (!CALLED_AS_TRIGGER(fcinfo))		/* internal error */
		elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");

	trigdata = (TriggerData *) fcinfo->context;
	if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
		elog(ERROR, "tsvector_update_trigger: must be fired for row");
	if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
		elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");

	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
		rettuple = trigdata->tg_trigtuple;
	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
		rettuple = trigdata->tg_newtuple;
	else
		elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");

	trigger = trigdata->tg_trigger;
	rel = trigdata->tg_relation;

	if (trigger->tgnargs < 3)
		elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");

	/* Find the target tsvector column */
	tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
	if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_COLUMN),
				 errmsg("tsvector column \"%s\" does not exist",
						trigger->tgargs[0])));
	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
						  TSVECTOROID))
		ereport(ERROR,
				(errcode(ERRCODE_DATATYPE_MISMATCH),
				 errmsg("column \"%s\" is not of tsvector type",
						trigger->tgargs[0])));

	/* Find the configuration to use */
	if (config_column)
	{
		int			config_attr_num;

		config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
		if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_COLUMN),
					 errmsg("configuration column \"%s\" does not exist",
							trigger->tgargs[1])));
		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
							  REGCONFIGOID))
			ereport(ERROR,
					(errcode(ERRCODE_DATATYPE_MISMATCH),
					 errmsg("column \"%s\" is not of regconfig type",
							trigger->tgargs[1])));

		datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
		if (isnull)
			ereport(ERROR,
					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
					 errmsg("configuration column \"%s\" must not be null",
							trigger->tgargs[1])));
		cfgId = DatumGetObjectId(datum);
	}
	else
	{
		List	   *names;

		names = stringToQualifiedNameList(trigger->tgargs[1]);
		/* require a schema so that results are not search path dependent */
		if (list_length(names) < 2)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("text search configuration name \"%s\" must be schema-qualified",
							trigger->tgargs[1])));
		cfgId = get_ts_config_oid(names, false);
	}

	/* initialize parse state */
	prs.lenwords = 32;
	prs.curwords = 0;
	prs.pos = 0;
	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);

	/* find all words in indexable column(s) */
	for (i = 2; i < trigger->tgnargs; i++)
	{
		int			numattr;

		numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
		if (numattr == SPI_ERROR_NOATTRIBUTE)
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_COLUMN),
					 errmsg("column \"%s\" does not exist",
							trigger->tgargs[i])));
		if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
			ereport(ERROR,
					(errcode(ERRCODE_DATATYPE_MISMATCH),
					 errmsg("column \"%s\" is not of a character type",
							trigger->tgargs[i])));

		datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
		if (isnull)
			continue;

		txt = DatumGetTextP(datum);

		parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);

		if (txt != (text *) DatumGetPointer(datum))
			pfree(txt);
	}

	/* make tsvector value */
	if (prs.curwords)
	{
		datum = PointerGetDatum(make_tsvector(&prs));
		rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
								   &datum, NULL);
		pfree(DatumGetPointer(datum));
	}
	else
	{
		TSVector	out = palloc(CALCDATASIZE(0, 0));

		SET_VARSIZE(out, CALCDATASIZE(0, 0));
		out->size = 0;
		datum = PointerGetDatum(out);
		rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
								   &datum, NULL);
		pfree(prs.words);
	}

	if (rettuple == NULL)		/* internal error */
		elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple",
			 SPI_result);

	return PointerGetDatum(rettuple);
}
Example #13
0
Datum
tsvector_concat(PG_FUNCTION_ARGS)
{
	TSVector	in1 = PG_GETARG_TSVECTOR(0);
	TSVector	in2 = PG_GETARG_TSVECTOR(1);
	TSVector	out;
	WordEntry  *ptr;
	WordEntry  *ptr1,
			   *ptr2;
	WordEntryPos *p;
	int			maxpos = 0,
				i,
				j,
				i1,
				i2,
				dataoff;
	char	   *data,
			   *data1,
			   *data2;

	ptr = ARRPTR(in1);
	i = in1->size;
	while (i--)
	{
		if ((j = POSDATALEN(in1, ptr)) != 0)
		{
			p = POSDATAPTR(in1, ptr);
			while (j--)
			{
				if (WEP_GETPOS(*p) > maxpos)
					maxpos = WEP_GETPOS(*p);
				p++;
			}
		}
		ptr++;
	}

	ptr1 = ARRPTR(in1);
	ptr2 = ARRPTR(in2);
	data1 = STRPTR(in1);
	data2 = STRPTR(in2);
	i1 = in1->size;
	i2 = in2->size;
	/* conservative estimate of space needed */
	out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2));
	SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
	out->size = in1->size + in2->size;
	ptr = ARRPTR(out);
	data = STRPTR(out);
	dataoff = 0;
	while (i1 && i2)
	{
		int			cmp = compareEntry(data1, ptr1, data2, ptr2);

		if (cmp < 0)
		{						/* in1 first */
			ptr->haspos = ptr1->haspos;
			ptr->len = ptr1->len;
			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
			ptr->pos = dataoff;
			dataoff += ptr1->len;
			if (ptr->haspos)
			{
				dataoff = SHORTALIGN(dataoff);
				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
			}

			ptr++;
			ptr1++;
			i1--;
		}
		else if (cmp > 0)
		{						/* in2 first */
			ptr->haspos = ptr2->haspos;
			ptr->len = ptr2->len;
			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
			ptr->pos = dataoff;
			dataoff += ptr2->len;
			if (ptr->haspos)
			{
				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

				if (addlen == 0)
					ptr->haspos = 0;
				else
				{
					dataoff = SHORTALIGN(dataoff);
					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
				}
			}

			ptr++;
			ptr2++;
			i2--;
		}
		else
		{
			ptr->haspos = ptr1->haspos | ptr2->haspos;
			ptr->len = ptr1->len;
			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
			ptr->pos = dataoff;
			dataoff += ptr1->len;
			if (ptr->haspos)
			{
				if (ptr1->haspos)
				{
					dataoff = SHORTALIGN(dataoff);
					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
					if (ptr2->haspos)
						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
				}
				else	/* must have ptr2->haspos */
				{
					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

					if (addlen == 0)
						ptr->haspos = 0;
					else
					{
						dataoff = SHORTALIGN(dataoff);
						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
					}
				}
			}

			ptr++;
			ptr1++;
			ptr2++;
			i1--;
			i2--;
		}
	}

	while (i1)
	{
		ptr->haspos = ptr1->haspos;
		ptr->len = ptr1->len;
		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
		ptr->pos = dataoff;
		dataoff += ptr1->len;
		if (ptr->haspos)
		{
			dataoff = SHORTALIGN(dataoff);
			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
		}

		ptr++;
		ptr1++;
		i1--;
	}

	while (i2)
	{
		ptr->haspos = ptr2->haspos;
		ptr->len = ptr2->len;
		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
		ptr->pos = dataoff;
		dataoff += ptr2->len;
		if (ptr->haspos)
		{
			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

			if (addlen == 0)
				ptr->haspos = 0;
			else
			{
				dataoff = SHORTALIGN(dataoff);
				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
			}
		}

		ptr++;
		ptr2++;
		i2--;
	}

	/*
	 * Instead of checking each offset individually, we check for overflow of
	 * pos fields once at the end.
	 */
	if (dataoff > MAXSTRPOS)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));

	out->size = ptr - ARRPTR(out);
	SET_VARSIZE(out, CALCDATASIZE(out->size, dataoff));
	if (data != STRPTR(out))
		memmove(STRPTR(out), data, dataoff);

	PG_FREE_IF_COPY(in1, 0);
	PG_FREE_IF_COPY(in2, 1);
	PG_RETURN_POINTER(out);
}
Example #14
0
Datum
tsvectorin(PG_FUNCTION_ARGS)
{
	char	   *buf = PG_GETARG_CSTRING(0);
	TSVectorParseState state;
	WordEntryIN *arr;
	int			totallen;
	int			arrlen;			/* allocated size of arr */
	WordEntry  *inarr;
	int			len = 0;
	TSVector	in;
	int			i;
	char	   *token;
	int			toklen;
	WordEntryPos *pos;
	int			poslen;
	char	   *strbuf;
	int			stroff;

	/*
	 * Tokens are appended to tmpbuf, cur is a pointer to the end of used
	 * space in tmpbuf.
	 */
	char	   *tmpbuf;
	char	   *cur;
	int			buflen = 256;	/* allocated size of tmpbuf */

	state = init_tsvector_parser(buf, false, false);

	arrlen = 64;
	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
	cur = tmpbuf = (char *) palloc(buflen);

	while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
	{
		if (toklen >= MAXSTRLEN)
			ereport(ERROR,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("word is too long (%ld bytes, max %ld bytes)",
							(long) toklen,
							(long) (MAXSTRLEN - 1))));

		if (cur - tmpbuf > MAXSTRPOS)
			ereport(ERROR,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
							(long) (cur - tmpbuf), (long) MAXSTRPOS)));

		/*
		 * Enlarge buffers if needed
		 */
		if (len >= arrlen)
		{
			arrlen *= 2;
			arr = (WordEntryIN *)
				repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
		}
		while ((cur - tmpbuf) + toklen >= buflen)
		{
			int			dist = cur - tmpbuf;

			buflen *= 2;
			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
			cur = tmpbuf + dist;
		}
		arr[len].entry.len = toklen;
		arr[len].entry.pos = cur - tmpbuf;
		memcpy((void *) cur, (void *) token, toklen);
		cur += toklen;

		if (poslen != 0)
		{
			arr[len].entry.haspos = 1;
			arr[len].pos = pos;
			arr[len].poslen = poslen;
		}
		else
		{
			arr[len].entry.haspos = 0;
			arr[len].pos = NULL;
			arr[len].poslen = 0;
		}
		len++;
	}

	close_tsvector_parser(state);

	if (len > 0)
		len = uniqueentry(arr, len, tmpbuf, &buflen);
	else
		buflen = 0;

	if (buflen > MAXSTRPOS)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));

	totallen = CALCDATASIZE(len, buflen);
	in = (TSVector) palloc0(totallen);
	SET_VARSIZE(in, totallen);
	in->size = len;
	inarr = ARRPTR(in);
	strbuf = STRPTR(in);
	stroff = 0;
	for (i = 0; i < len; i++)
	{
		memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
		arr[i].entry.pos = stroff;
		stroff += arr[i].entry.len;
		if (arr[i].entry.haspos)
		{
			if (arr[i].poslen > 0xFFFF)
				elog(ERROR, "positions array too long");

			/* Copy number of positions */
			stroff = SHORTALIGN(stroff);
			*(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
			stroff += sizeof(uint16);

			/* Copy positions */
			memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
			stroff += arr[i].poslen * sizeof(WordEntryPos);

			pfree(arr[i].pos);
		}
		inarr[i] = arr[i].entry;
	}

	Assert((strbuf + stroff - (char *) in) == totallen);

	PG_RETURN_TSVECTOR(in);
}
Example #15
0
/*
 * make value of tsvector, given parsed text
 */
TSVector
make_tsvector(ParsedText *prs)
{
	int			i,
				j,
				lenstr = 0,
				totallen;
	TSVector	in;
	WordEntry  *ptr;
	char	   *str;
	int			stroff;

	prs->curwords = uniqueWORD(prs->words, prs->curwords);
	for (i = 0; i < prs->curwords; i++)
	{
		lenstr += prs->words[i].len;
		if (prs->words[i].alen)
		{
			lenstr = SHORTALIGN(lenstr);
			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
		}
	}

	if (lenstr > MAXSTRPOS)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));

	totallen = CALCDATASIZE(prs->curwords, lenstr);
	in = (TSVector) palloc0(totallen);
	SET_VARSIZE(in, totallen);
	in->size = prs->curwords;

	ptr = ARRPTR(in);
	str = STRPTR(in);
	stroff = 0;
	for (i = 0; i < prs->curwords; i++)
	{
		ptr->len = prs->words[i].len;
		ptr->pos = stroff;
		memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
		stroff += prs->words[i].len;
		pfree(prs->words[i].word);
		if (prs->words[i].alen)
		{
			int			k = prs->words[i].pos.apos[0];
			WordEntryPos *wptr;

			if (k > 0xFFFF)
				elog(ERROR, "positions array too long");

			ptr->haspos = 1;
			stroff = SHORTALIGN(stroff);
			*(uint16 *) (str + stroff) = (uint16) k;
			wptr = POSDATAPTR(in, ptr);
			for (j = 0; j < k; j++)
			{
				WEP_SETWEIGHT(wptr[j], 0);
				WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
			}
			stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
			pfree(prs->words[i].pos.apos);
		}
		else
			ptr->haspos = 0;
		ptr++;
	}
	pfree(prs->words);
	return in;
}
Example #16
0
Datum
hs_concat(PG_FUNCTION_ARGS)
{
	HStore	   *s1 = PG_GETARG_HS(0);
	HStore	   *s2 = PG_GETARG_HS(1);
	HStore	   *out = palloc(VARSIZE(s1) + VARSIZE(s2));
	char	   *ps1,
			   *ps2,
			   *pd;
	HEntry	   *es1,
			   *es2,
			   *ed;

	SET_VARSIZE(out, VARSIZE(s1) + VARSIZE(s2));
	out->size = s1->size + s2->size;

	ps1 = STRPTR(s1);
	ps2 = STRPTR(s2);
	pd = STRPTR(out);
	es1 = ARRPTR(s1);
	es2 = ARRPTR(s2);
	ed = ARRPTR(out);

	while (es1 - ARRPTR(s1) < s1->size && es2 - ARRPTR(s2) < s2->size)
	{
		int			difference;

		if (es1->keylen == es2->keylen)
			difference = strncmp(ps1, ps2, es1->keylen);
		else
			difference = (es1->keylen > es2->keylen) ? 1 : -1;

		if (difference == 0)
		{
			memcpy(ed, es2, sizeof(HEntry));
			memcpy(pd, ps2, es2->keylen + ((es2->valisnull) ? 0 : es2->vallen));
			ed->pos = pd - STRPTR(out);
			pd += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen);
			ed++;

			ps1 += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen);
			es1++;
			ps2 += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen);
			es2++;
		}
		else if (difference > 0)
		{
			memcpy(ed, es2, sizeof(HEntry));
			memcpy(pd, ps2, es2->keylen + ((es2->valisnull) ? 0 : es2->vallen));
			ed->pos = pd - STRPTR(out);
			pd += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen);
			ed++;

			ps2 += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen);
			es2++;
		}
		else
		{
			memcpy(ed, es1, sizeof(HEntry));
			memcpy(pd, ps1, es1->keylen + ((es1->valisnull) ? 0 : es1->vallen));
			ed->pos = pd - STRPTR(out);
			pd += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen);
			ed++;

			ps1 += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen);
			es1++;
		}
	}

	while (es1 - ARRPTR(s1) < s1->size)
	{
		memcpy(ed, es1, sizeof(HEntry));
		memcpy(pd, ps1, es1->keylen + ((es1->valisnull) ? 0 : es1->vallen));
		ed->pos = pd - STRPTR(out);
		pd += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen);
		ed++;

		ps1 += es1->keylen + ((es1->valisnull) ? 0 : es1->vallen);
		es1++;
	}

	while (es2 - ARRPTR(s2) < s2->size)
	{
		memcpy(ed, es2, sizeof(HEntry));
		memcpy(pd, ps2, es2->keylen + ((es2->valisnull) ? 0 : es2->vallen));
		ed->pos = pd - STRPTR(out);
		pd += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen);
		ed++;

		ps2 += es2->keylen + ((es2->valisnull) ? 0 : es2->vallen);
		es2++;
	}

	if (ed - ARRPTR(out) != out->size)
	{
		int			buflen = pd - STRPTR(out);

		pd = STRPTR(out);

		out->size = ed - ARRPTR(out);

		memmove(STRPTR(out), pd, buflen);
		SET_VARSIZE(out, CALCDATASIZE(out->size, buflen));
	}

	PG_FREE_IF_COPY(s1, 0);
	PG_FREE_IF_COPY(s2, 1);

	PG_RETURN_POINTER(out);
}
Example #17
0
/*
 * make value of tsvector
 */
static tsvector *
makevalue(PRSTEXT * prs)
{
	int4		i,
				j,
				lenstr = 0,
				totallen;
	tsvector   *in;
	WordEntry  *ptr;
	char	   *str,
			   *cur;

	prs->curwords = uniqueWORD(prs->words, prs->curwords);
	for (i = 0; i < prs->curwords; i++)
	{
		lenstr += SHORTALIGN(prs->words[i].len);

		if (prs->words[i].alen)
			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
	}

	totallen = CALCDATASIZE(prs->curwords, lenstr);
	in = (tsvector *) palloc(totallen);
	memset(in, 0, totallen);
	in->len = totallen;
	in->size = prs->curwords;

	ptr = ARRPTR(in);
	cur = str = STRPTR(in);
	for (i = 0; i < prs->curwords; i++)
	{
		ptr->len = prs->words[i].len;
		if (cur - str > MAXSTRPOS)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("value is too big")));
		ptr->pos = cur - str;
		memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
		pfree(prs->words[i].word);
		cur += SHORTALIGN(prs->words[i].len);
		if (prs->words[i].alen)
		{
			WordEntryPos *wptr;

			ptr->haspos = 1;
			*(uint16 *) cur = prs->words[i].pos.apos[0];
			wptr = POSDATAPTR(in, ptr);
			for (j = 0; j < *(uint16 *) cur; j++)
			{
				wptr[j].weight = 0;
				wptr[j].pos = prs->words[i].pos.apos[j + 1];
			}
			cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
			pfree(prs->words[i].pos.apos);
		}
		else
			ptr->haspos = 0;
		ptr++;
	}
	pfree(prs->words);
	return in;
}
Example #18
0
Datum
tsvector_concat(PG_FUNCTION_ARGS)
{
	TSVector	in1 = PG_GETARG_TSVECTOR(0);
	TSVector	in2 = PG_GETARG_TSVECTOR(1);
	TSVector	out;
	WordEntry  *ptr;
	WordEntry  *ptr1,
			   *ptr2;
	WordEntryPos *p;
	int			maxpos = 0,
				i,
				j,
				i1,
				i2,
				dataoff,
				output_bytes,
				output_size;
	char	   *data,
			   *data1,
			   *data2;

	/* Get max position in in1; we'll need this to offset in2's positions */
	ptr = ARRPTR(in1);
	i = in1->size;
	while (i--)
	{
		if ((j = POSDATALEN(in1, ptr)) != 0)
		{
			p = POSDATAPTR(in1, ptr);
			while (j--)
			{
				if (WEP_GETPOS(*p) > maxpos)
					maxpos = WEP_GETPOS(*p);
				p++;
			}
		}
		ptr++;
	}

	ptr1 = ARRPTR(in1);
	ptr2 = ARRPTR(in2);
	data1 = STRPTR(in1);
	data2 = STRPTR(in2);
	i1 = in1->size;
	i2 = in2->size;

	/*
	 * Conservative estimate of space needed.  We might need all the data in
	 * both inputs, and conceivably add a pad byte before position data for
	 * each item where there was none before.
	 */
	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;

	out = (TSVector) palloc0(output_bytes);
	SET_VARSIZE(out, output_bytes);

	/*
	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
	 * collapse out any unused space at the end.
	 */
	out->size = in1->size + in2->size;

	ptr = ARRPTR(out);
	data = STRPTR(out);
	dataoff = 0;
	while (i1 && i2)
	{
		int			cmp = compareEntry(data1, ptr1, data2, ptr2);

		if (cmp < 0)
		{						/* in1 first */
			ptr->haspos = ptr1->haspos;
			ptr->len = ptr1->len;
			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
			ptr->pos = dataoff;
			dataoff += ptr1->len;
			if (ptr->haspos)
			{
				dataoff = SHORTALIGN(dataoff);
				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
			}

			ptr++;
			ptr1++;
			i1--;
		}
		else if (cmp > 0)
		{						/* in2 first */
			ptr->haspos = ptr2->haspos;
			ptr->len = ptr2->len;
			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
			ptr->pos = dataoff;
			dataoff += ptr2->len;
			if (ptr->haspos)
			{
				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

				if (addlen == 0)
					ptr->haspos = 0;
				else
				{
					dataoff = SHORTALIGN(dataoff);
					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
				}
			}

			ptr++;
			ptr2++;
			i2--;
		}
		else
		{
			ptr->haspos = ptr1->haspos | ptr2->haspos;
			ptr->len = ptr1->len;
			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
			ptr->pos = dataoff;
			dataoff += ptr1->len;
			if (ptr->haspos)
			{
				if (ptr1->haspos)
				{
					dataoff = SHORTALIGN(dataoff);
					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
					if (ptr2->haspos)
						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
				}
				else	/* must have ptr2->haspos */
				{
					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

					if (addlen == 0)
						ptr->haspos = 0;
					else
					{
						dataoff = SHORTALIGN(dataoff);
						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
					}
				}
			}

			ptr++;
			ptr1++;
			ptr2++;
			i1--;
			i2--;
		}
	}

	while (i1)
	{
		ptr->haspos = ptr1->haspos;
		ptr->len = ptr1->len;
		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
		ptr->pos = dataoff;
		dataoff += ptr1->len;
		if (ptr->haspos)
		{
			dataoff = SHORTALIGN(dataoff);
			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
		}

		ptr++;
		ptr1++;
		i1--;
	}

	while (i2)
	{
		ptr->haspos = ptr2->haspos;
		ptr->len = ptr2->len;
		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
		ptr->pos = dataoff;
		dataoff += ptr2->len;
		if (ptr->haspos)
		{
			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);

			if (addlen == 0)
				ptr->haspos = 0;
			else
			{
				dataoff = SHORTALIGN(dataoff);
				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
			}
		}

		ptr++;
		ptr2++;
		i2--;
	}

	/*
	 * Instead of checking each offset individually, we check for overflow of
	 * pos fields once at the end.
	 */
	if (dataoff > MAXSTRPOS)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));

	/*
	 * Adjust sizes (asserting that we didn't overrun the original estimates)
	 * and collapse out any unused array entries.
	 */
	output_size = ptr - ARRPTR(out);
	Assert(output_size <= out->size);
	out->size = output_size;
	if (data != STRPTR(out))
		memmove(STRPTR(out), data, dataoff);
	output_bytes = CALCDATASIZE(out->size, dataoff);
	Assert(output_bytes <= VARSIZE(out));
	SET_VARSIZE(out, output_bytes);

	PG_FREE_IF_COPY(in1, 0);
	PG_FREE_IF_COPY(in2, 1);
	PG_RETURN_POINTER(out);
}
Example #19
0
Datum
tsvector_in(PG_FUNCTION_ARGS)
{
	char	   *buf = PG_GETARG_CSTRING(0);
	TI_IN_STATE state;
	WordEntryIN *arr;
	WordEntry  *inarr;
	int4		len = 0,
				totallen = 64;
	tsvector   *in;
	char	   *tmpbuf,
			   *cur;
	int4		i,
				buflen = 256;

	state.prsbuf = buf;
	state.len = 32;
	state.word = (char *) palloc(state.len);
	state.oprisdelim = false;

	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
	cur = tmpbuf = (char *) palloc(buflen);
	while (gettoken_tsvector(&state))
	{
		if (len >= totallen)
		{
			totallen *= 2;
			arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
		}
		while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
		{
			int4		dist = cur - tmpbuf;

			buflen *= 2;
			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
			cur = tmpbuf + dist;
		}
		if (state.curpos - state.word >= MAXSTRLEN)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("word is too long")));
		arr[len].entry.len = state.curpos - state.word;
		if (cur - tmpbuf > MAXSTRPOS)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("too long value")));
		arr[len].entry.pos = cur - tmpbuf;
		memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
		cur += arr[len].entry.len;
		if (state.alen)
		{
			arr[len].entry.haspos = 1;
			arr[len].pos = state.pos;
		}
		else
			arr[len].entry.haspos = 0;
		len++;
	}
	pfree(state.word);

	if (len > 0)
		len = uniqueentry(arr, len, tmpbuf, &buflen);
	else
		buflen=0;
	totallen = CALCDATASIZE(len, buflen);
	in = (tsvector *) palloc(totallen);
	memset(in, 0, totallen);
	in->len = totallen;
	in->size = len;
	cur = STRPTR(in);
	inarr = ARRPTR(in);
	for (i = 0; i < len; i++)
	{
		memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
		arr[i].entry.pos = cur - STRPTR(in);
		cur += SHORTALIGN(arr[i].entry.len);
		if (arr[i].entry.haspos)
		{
			memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
			cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
			pfree(arr[i].pos);
		}
		memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
	}
	pfree(tmpbuf);
	pfree(arr);
	PG_RETURN_POINTER(in);
}