Esempio n. 1
0
void
hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
{
    int			type,
                lenlemm;
    char	   *lemm = NULL;
    WParserInfo *prsobj = findprs(cfg->prs_id);
    LexizeData	ldata;
    TSLexeme   *norms;
    ParsedLex  *lexs;

    prsobj->prs = (void *) DatumGetPointer(
                      FunctionCall2(
                          &(prsobj->start_info),
                          PointerGetDatum(buf),
                          Int32GetDatum(buflen)
                      )
                  );

    LexizeInit(&ldata, cfg);

    do
    {
        type = DatumGetInt32(FunctionCall3(
                                 &(prsobj->getlexeme_info),
                                 PointerGetDatum(prsobj->prs),
                                 PointerGetDatum(&lemm),
                                 PointerGetDatum(&lenlemm)));

        if (type > 0 && lenlemm >= MAXSTRLEN)
        {
#ifdef IGNORE_LONGLEXEME
            ereport(NOTICE,
                    (errcode(ERRCODE_SYNTAX_ERROR),
                     errmsg("A word you are indexing is too long. It will be ignored.")));
            continue;
#else
            ereport(ERROR,
                    (errcode(ERRCODE_SYNTAX_ERROR),
                     errmsg("A word you are indexing is too long")));
#endif
        }

        LexizeAddLemm(&ldata, type, lemm, lenlemm);

        do
        {
            if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
                addHLParsedLex(prs, query, lexs, norms);
            else
                addHLParsedLex(prs, query, lexs, NULL);
        } while (norms);

    } while (type > 0);

    FunctionCall1(
        &(prsobj->end_info),
        PointerGetDatum(prsobj->prs)
    );
}
Esempio n. 2
0
void
hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
{
	int			type,
				lenlemm;
	char	   *lemm = NULL;
	LexizeData	ldata;
	TSLexeme   *norms;
	ParsedLex  *lexs;
	TSConfigCacheEntry *cfg;
	TSParserCacheEntry *prsobj;
	void	   *prsdata;

	cfg = lookup_ts_config_cache(cfgId);
	prsobj = lookup_ts_parser_cache(cfg->prsId);

	prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
													 PointerGetDatum(buf),
													 Int32GetDatum(buflen)));

	LexizeInit(&ldata, cfg);

	do
	{
		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
										   PointerGetDatum(prsdata),
										   PointerGetDatum(&lemm),
										   PointerGetDatum(&lenlemm)));

		if (type > 0 && lenlemm >= MAXSTRLEN)
		{
#ifdef IGNORE_LONGLEXEME
			ereport(NOTICE,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("word is too long to be indexed"),
					 errdetail("Words longer than %d characters are ignored.",
							   MAXSTRLEN)));
			continue;
#else
			ereport(ERROR,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("word is too long to be indexed"),
					 errdetail("Words longer than %d characters are ignored.",
							   MAXSTRLEN)));
#endif
		}

		LexizeAddLemm(&ldata, type, lemm, lenlemm);

		do
		{
			if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
				addHLParsedLex(prs, query, lexs, norms);
			else
				addHLParsedLex(prs, query, lexs, NULL);
		} while (norms);

	} while (type > 0);

	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
Esempio n. 3
0
/*
 * Parse string and lexize words.
 *
 * prs will be filled in.
 */
void
parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
{
	int			type,
				lenlemm;
	char	   *lemm = NULL;
	LexizeData	ldata;
	TSLexeme   *norms;
	TSConfigCacheEntry *cfg;
	TSParserCacheEntry *prsobj;
	void	   *prsdata;

	cfg = lookup_ts_config_cache(cfgId);
	prsobj = lookup_ts_parser_cache(cfg->prsId);

	prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
													 PointerGetDatum(buf),
													 Int32GetDatum(buflen)));

	LexizeInit(&ldata, cfg);

	do
	{
		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
										   PointerGetDatum(prsdata),
										   PointerGetDatum(&lemm),
										   PointerGetDatum(&lenlemm)));

		if (type > 0 && lenlemm >= MAXSTRLEN)
		{
#ifdef IGNORE_LONGLEXEME
			ereport(NOTICE,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("word is too long to be indexed"),
					 errdetail("Words longer than %d characters are ignored.",
							   MAXSTRLEN)));
			continue;
#else
			ereport(ERROR,
					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
					 errmsg("word is too long to be indexed"),
					 errdetail("Words longer than %d characters are ignored.",
							   MAXSTRLEN)));
#endif
		}

		LexizeAddLemm(&ldata, type, lemm, lenlemm);

		while ((norms = LexizeExec(&ldata, NULL)) != NULL)
		{
			TSLexeme   *ptr = norms;

			prs->pos++;			/* set pos */

			while (ptr->lexeme)
			{
				if (prs->curwords == prs->lenwords)
				{
					prs->lenwords *= 2;
					prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
				}

				if (ptr->flags & TSL_ADDPOS)
					prs->pos++;
				prs->words[prs->curwords].len = strlen(ptr->lexeme);
				prs->words[prs->curwords].word = ptr->lexeme;
				prs->words[prs->curwords].nvariant = ptr->nvariant;
				prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
				prs->words[prs->curwords].alen = 0;
				prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
				ptr++;
				prs->curwords++;
			}
			pfree(norms);
		}
	} while (type > 0);

	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
Esempio n. 4
0
void
parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
{
    int			type,
                lenlemm;
    char	   *lemm = NULL;
    WParserInfo *prsobj = findprs(cfg->prs_id);
    LexizeData	ldata;
    TSLexeme   *norms;

    prsobj->prs = (void *) DatumGetPointer(
                      FunctionCall2(
                          &(prsobj->start_info),
                          PointerGetDatum(buf),
                          Int32GetDatum(buflen)
                      )
                  );

    LexizeInit(&ldata, cfg);

    do
    {
        type = DatumGetInt32(FunctionCall3(
                                 &(prsobj->getlexeme_info),
                                 PointerGetDatum(prsobj->prs),
                                 PointerGetDatum(&lemm),
                                 PointerGetDatum(&lenlemm)));

        if (type > 0 && lenlemm >= MAXSTRLEN)
        {
#ifdef IGNORE_LONGLEXEME
            ereport(NOTICE,
                    (errcode(ERRCODE_SYNTAX_ERROR),
                     errmsg("A word you are indexing is too long. It will be ignored.")));
            continue;
#else
            ereport(ERROR,
                    (errcode(ERRCODE_SYNTAX_ERROR),
                     errmsg("A word you are indexing is too long")));
#endif
        }

        LexizeAddLemm(&ldata, type, lemm, lenlemm);

        while ((norms = LexizeExec(&ldata, NULL)) != NULL)
        {
            TSLexeme   *ptr = norms;

            prs->pos++;			/* set pos */

            while (ptr->lexeme)
            {
                if (prs->curwords == prs->lenwords)
                {
                    prs->lenwords *= 2;
                    prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
                }

                if (ptr->flags & TSL_ADDPOS)
                    prs->pos++;
                prs->words[prs->curwords].len = strlen(ptr->lexeme);
                prs->words[prs->curwords].word = ptr->lexeme;
                prs->words[prs->curwords].nvariant = ptr->nvariant;
                prs->words[prs->curwords].alen = 0;
                prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
                ptr++;
                prs->curwords++;
            }
            pfree(norms);
        }
    } while (type > 0);

    FunctionCall1(
        &(prsobj->end_info),
        PointerGetDatum(prsobj->prs)
    );
}