Esempio n. 1
0
Datum
tsvectorin(PG_FUNCTION_ARGS)
{
	char	   *buf = PG_GETARG_CSTRING(0);
	TSVectorParseState state;
	WordEntryIN *arr;
	int			totallen;
	int			arrlen;			/* allocated size of arr */
	WordEntry  *inarr;
	int			len = 0;
	TSVector	in;
	int			i;
	char	   *token;
	int			toklen;
	WordEntryPos *pos;
	int			poslen;
	char	   *strbuf;
	int			stroff;

	/*
	 * Tokens are appended to tmpbuf, cur is a pointer to the end of used
	 * space in tmpbuf.
	 */
	char	   *tmpbuf;
	char	   *cur;
	int			buflen = 256;	/* allocated size of tmpbuf */

	state = init_tsvector_parser(buf, false, false);

	arrlen = 64;
	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
	cur = tmpbuf = (char *) palloc(buflen);

	while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
	{
		if (toklen >= MAXSTRLEN)
			ereport(ERROR,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("word is too long (%ld bytes, max %ld bytes)",
							(long) toklen,
							(long) (MAXSTRLEN - 1))));

		if (cur - tmpbuf > MAXSTRPOS)
			ereport(ERROR,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
							(long) (cur - tmpbuf), (long) MAXSTRPOS)));

		/*
		 * Enlarge buffers if needed
		 */
		if (len >= arrlen)
		{
			arrlen *= 2;
			arr = (WordEntryIN *)
				repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
		}
		while ((cur - tmpbuf) + toklen >= buflen)
		{
			int			dist = cur - tmpbuf;

			buflen *= 2;
			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
			cur = tmpbuf + dist;
		}
		arr[len].entry.len = toklen;
		arr[len].entry.pos = cur - tmpbuf;
		memcpy((void *) cur, (void *) token, toklen);
		cur += toklen;

		if (poslen != 0)
		{
			arr[len].entry.haspos = 1;
			arr[len].pos = pos;
			arr[len].poslen = poslen;
		}
		else
		{
			arr[len].entry.haspos = 0;
			arr[len].pos = NULL;
			arr[len].poslen = 0;
		}
		len++;
	}

	close_tsvector_parser(state);

	if (len > 0)
		len = uniqueentry(arr, len, tmpbuf, &buflen);
	else
		buflen = 0;

	if (buflen > MAXSTRPOS)
		ereport(ERROR,
				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));

	totallen = CALCDATASIZE(len, buflen);
	in = (TSVector) palloc0(totallen);
	SET_VARSIZE(in, totallen);
	in->size = len;
	inarr = ARRPTR(in);
	strbuf = STRPTR(in);
	stroff = 0;
	for (i = 0; i < len; i++)
	{
		memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
		arr[i].entry.pos = stroff;
		stroff += arr[i].entry.len;
		if (arr[i].entry.haspos)
		{
			if (arr[i].poslen > 0xFFFF)
				elog(ERROR, "positions array too long");

			/* Copy number of positions */
			stroff = SHORTALIGN(stroff);
			*(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
			stroff += sizeof(uint16);

			/* Copy positions */
			memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
			stroff += arr[i].poslen * sizeof(WordEntryPos);

			pfree(arr[i].pos);
		}
		inarr[i] = arr[i].entry;
	}

	Assert((strbuf + stroff - (char *) in) == totallen);

	PG_RETURN_TSVECTOR(in);
}
Esempio n. 2
0
/*
 * get token from query string
 */
static int4
gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
{
	while (1)
	{
		switch (state->state)
		{
			case WAITOPERAND:
				if (*(state->buf) == '!')
				{
					(state->buf)++;
					*val = (int4) '!';
					return OPR;
				}
				else if (*(state->buf) == '(')
				{
					state->count++;
					(state->buf)++;
					return OPEN;
				}
				else if (*(state->buf) == ':')
				{
					ereport(ERROR,
							(errcode(ERRCODE_SYNTAX_ERROR),
							 errmsg("error at start of operand")));
				}
				else if (*(state->buf) != ' ')
				{
					state->valstate.prsbuf = state->buf;
					state->state = WAITOPERATOR;
					if (gettoken_tsvector(&(state->valstate)))
					{
						*strval = state->valstate.word;
						*lenval = state->valstate.curpos - state->valstate.word;
						state->buf = get_weight(state->valstate.prsbuf, weight);
						return VAL;
					}
					else
						ereport(ERROR,
								(errcode(ERRCODE_SYNTAX_ERROR),
								 errmsg("no operand")));
				}
				break;
			case WAITOPERATOR:
				if (*(state->buf) == '&' || *(state->buf) == '|')
				{
					state->state = WAITOPERAND;
					*val = (int4) *(state->buf);
					(state->buf)++;
					return OPR;
				}
				else if (*(state->buf) == ')')
				{
					(state->buf)++;
					state->count--;
					return (state->count < 0) ? ERR : CLOSE;
				}
				else if (*(state->buf) == '\0')
					return (state->count) ? ERR : END;
				else if (*(state->buf) != ' ')
					return ERR;
				break;
			default:
				return ERR;
				break;
		}
		(state->buf)++;
	}
	return END;
}
Esempio n. 3
0
Datum
tsvector_in(PG_FUNCTION_ARGS)
{
	char	   *buf = PG_GETARG_CSTRING(0);
	TI_IN_STATE state;
	WordEntryIN *arr;
	WordEntry  *inarr;
	int4		len = 0,
				totallen = 64;
	tsvector   *in;
	char	   *tmpbuf,
			   *cur;
	int4		i,
				buflen = 256;

	state.prsbuf = buf;
	state.len = 32;
	state.word = (char *) palloc(state.len);
	state.oprisdelim = false;

	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
	cur = tmpbuf = (char *) palloc(buflen);
	while (gettoken_tsvector(&state))
	{
		if (len >= totallen)
		{
			totallen *= 2;
			arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
		}
		while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
		{
			int4		dist = cur - tmpbuf;

			buflen *= 2;
			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
			cur = tmpbuf + dist;
		}
		if (state.curpos - state.word >= MAXSTRLEN)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("word is too long")));
		arr[len].entry.len = state.curpos - state.word;
		if (cur - tmpbuf > MAXSTRPOS)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("too long value")));
		arr[len].entry.pos = cur - tmpbuf;
		memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
		cur += arr[len].entry.len;
		if (state.alen)
		{
			arr[len].entry.haspos = 1;
			arr[len].pos = state.pos;
		}
		else
			arr[len].entry.haspos = 0;
		len++;
	}
	pfree(state.word);

	if (len > 0)
		len = uniqueentry(arr, len, tmpbuf, &buflen);
	else
		buflen=0;
	totallen = CALCDATASIZE(len, buflen);
	in = (tsvector *) palloc(totallen);
	memset(in, 0, totallen);
	in->len = totallen;
	in->size = len;
	cur = STRPTR(in);
	inarr = ARRPTR(in);
	for (i = 0; i < len; i++)
	{
		memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
		arr[i].entry.pos = cur - STRPTR(in);
		cur += SHORTALIGN(arr[i].entry.len);
		if (arr[i].entry.haspos)
		{
			memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
			cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
			pfree(arr[i].pos);
		}
		memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
	}
	pfree(tmpbuf);
	pfree(arr);
	PG_RETURN_POINTER(in);
}