Example #1
0
/*
 * Reads a stop-word file. Each word is run through 'wordop'
 * function, if given.	wordop may either modify the input in-place,
 * or palloc a new version.
 */
void
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
{
	char	  **stop = NULL;

	s->len = 0;
	if (fname && *fname)
	{
		char	   *filename = get_tsearch_config_filename(fname, "stop");
		tsearch_readline_state trst;
		char	   *line;
		int			reallen = 0;

		if (!tsearch_readline_begin(&trst, filename))
			ereport(ERROR,
					(errcode(ERRCODE_CONFIG_FILE_ERROR),
					 errmsg("could not open stop-word file \"%s\": %m",
							filename)));

		while ((line = tsearch_readline(&trst)) != NULL)
		{
			char	   *pbuf = line;

			/* Trim trailing space */
			while (*pbuf && !t_isspace(pbuf))
				pbuf += pg_mblen(pbuf);
			*pbuf = '\0';

			/* Skip empty lines */
			if (*line == '\0')
			{
				pfree(line);
				continue;
			}

			if (s->len >= reallen)
			{
				if (reallen == 0)
				{
					reallen = 64;
					stop = (char **) palloc(sizeof(char *) * reallen);
				}
				else
				{
					reallen *= 2;
					stop = (char **) repalloc((void *) stop,
											  sizeof(char *) * reallen);
				}
			}

			if (wordop)
			{
				stop[s->len] = wordop(line);
				if (stop[s->len] != line)
					pfree(line);
			}
			else
				stop[s->len] = line;

			(s->len)++;
		}

		tsearch_readline_end(&trst);
		pfree(filename);
	}

	s->stop = stop;

	/* Sort to allow binary searching */
	if (s->stop && s->len > 0)
		qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
}
Example #2
0
static void
thesaurusRead(char *filename, DictThesaurus *d)
{
	tsearch_readline_state trst;
	uint16		idsubst = 0;
	bool		useasis = false;
	char	   *line;

	filename = get_tsearch_config_filename(filename, "ths");
	if (!tsearch_readline_begin(&trst, filename))
		ereport(ERROR,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("could not open thesaurus file \"%s\": %m",
						filename)));

	while ((line = tsearch_readline(&trst)) != NULL)
	{
		char	   *ptr;
		int			state = TR_WAITLEX;
		char	   *beginwrd = NULL;
		uint16		posinsubst = 0;
		uint16		nwrd = 0;

		ptr = line;

		/* is it a comment? */
		while (*ptr && t_isspace(ptr))
			ptr += pg_mblen(ptr);

		if (t_iseq(ptr, '#') || *ptr == '\0' ||
			t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
		{
			pfree(line);
			continue;
		}

		while (*ptr)
		{
			if (state == TR_WAITLEX)
			{
				if (t_iseq(ptr, ':'))
				{
					if (posinsubst == 0)
						ereport(ERROR,
								(errcode(ERRCODE_CONFIG_FILE_ERROR),
								 errmsg("unexpected delimiter")));
					state = TR_WAITSUBS;
				}
				else if (!t_isspace(ptr))
				{
					beginwrd = ptr;
					state = TR_INLEX;
				}
			}
			else if (state == TR_INLEX)
			{
				if (t_iseq(ptr, ':'))
				{
					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
					state = TR_WAITSUBS;
				}
				else if (t_isspace(ptr))
				{
					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
					state = TR_WAITLEX;
				}
			}
			else if (state == TR_WAITSUBS)
			{
				if (t_iseq(ptr, '*'))
				{
					useasis = true;
					state = TR_INSUBS;
					beginwrd = ptr + pg_mblen(ptr);
				}
				else if (t_iseq(ptr, '\\'))
				{
					useasis = false;
					state = TR_INSUBS;
					beginwrd = ptr + pg_mblen(ptr);
				}
				else if (!t_isspace(ptr))
				{
					useasis = false;
					beginwrd = ptr;
					state = TR_INSUBS;
				}
			}
			else if (state == TR_INSUBS)
			{
				if (t_isspace(ptr))
				{
					if (ptr == beginwrd)
						ereport(ERROR,
								(errcode(ERRCODE_CONFIG_FILE_ERROR),
								 errmsg("unexpected end of line or lexeme")));
					addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
					state = TR_WAITSUBS;
				}
			}
			else
				elog(ERROR, "unrecognized thesaurus state: %d", state);

			ptr += pg_mblen(ptr);
		}

		if (state == TR_INSUBS)
		{
			if (ptr == beginwrd)
				ereport(ERROR,
						(errcode(ERRCODE_CONFIG_FILE_ERROR),
						 errmsg("unexpected end of line or lexeme")));
			addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
		}

		idsubst++;

		if (!(nwrd && posinsubst))
			ereport(ERROR,
					(errcode(ERRCODE_CONFIG_FILE_ERROR),
					 errmsg("unexpected end of line")));

		pfree(line);
	}

	d->nsubst = idsubst;

	tsearch_readline_end(&trst);
}
Example #3
0
/*
 * get token from query string
 */
static int4
gettoken_query(QPRS_STATE *state, int4 *val, int4 *lenval, char **strval, uint16 *flag)
{
	int			charlen;

	for (;;)
	{
		charlen = pg_mblen(state->buf);

		switch (state->state)
		{
			case WAITOPERAND:
				if (charlen == 1 && t_iseq(state->buf, '!'))
				{
					(state->buf)++;
					*val = (int4) '!';
					return OPR;
				}
				else if (charlen == 1 && t_iseq(state->buf, '('))
				{
					state->count++;
					(state->buf)++;
					return OPEN;
				}
				else if (ISALNUM(state->buf))
				{
					state->state = INOPERAND;
					*strval = state->buf;
					*lenval = charlen;
					*flag = 0;
				}
				else if (!t_isspace(state->buf))
					ereport(ERROR,
							(errcode(ERRCODE_SYNTAX_ERROR),
							 errmsg("operand syntax error")));
				break;
			case INOPERAND:
				if (ISALNUM(state->buf))
				{
					if (*flag)
						ereport(ERROR,
								(errcode(ERRCODE_SYNTAX_ERROR),
								 errmsg("modificators syntax error")));
					*lenval += charlen;
				}
				else if (charlen == 1 && t_iseq(state->buf, '%'))
					*flag |= LVAR_SUBLEXEME;
				else if (charlen == 1 && t_iseq(state->buf, '@'))
					*flag |= LVAR_INCASE;
				else if (charlen == 1 && t_iseq(state->buf, '*'))
					*flag |= LVAR_ANYEND;
				else
				{
					state->state = WAITOPERATOR;
					return VAL;
				}
				break;
			case WAITOPERATOR:
				if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
				{
					state->state = WAITOPERAND;
					*val = (int4) *(state->buf);
					(state->buf)++;
					return OPR;
				}
				else if (charlen == 1 && t_iseq(state->buf, ')'))
				{
					(state->buf)++;
					state->count--;
					return (state->count < 0) ? ERR : CLOSE;
				}
				else if (*(state->buf) == '\0')
					return (state->count) ? ERR : END;
				else if (charlen == 1 && !t_iseq(state->buf, ' '))
					return ERR;
				break;
			default:
				return ERR;
				break;
		}

		state->buf += charlen;
	}
	return END;
}
Example #4
0
/*
 * initTrie  - create trie from file.
 *
 * Function converts UTF8-encoded file into current encoding.
 */
static TrieChar *
initTrie(char *filename)
{
	TrieChar   *volatile rootTrie = NULL;
	MemoryContext ccxt = CurrentMemoryContext;
	tsearch_readline_state trst;
	volatile bool skip;

	filename = get_tsearch_config_filename(filename, "rules");
	if (!tsearch_readline_begin(&trst, filename))
		ereport(ERROR,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("could not open unaccent file \"%s\": %m",
						filename)));

	do
	{
		/*
		 * pg_do_encoding_conversion() (called by tsearch_readline()) will
		 * emit exception if it finds untranslatable characters in current
		 * locale. We just skip such lines, continuing with the next.
		 */
		skip = true;

		PG_TRY();
		{
			char	   *line;

			while ((line = tsearch_readline(&trst)) != NULL)
			{
				/*----------
				 * The format of each line must be "src" or "src trg", where
				 * src and trg are sequences of one or more non-whitespace
				 * characters, separated by whitespace.  Whitespace at start
				 * or end of line is ignored.  If trg is omitted, an empty
				 * string is used as the replacement.
				 *
				 * We use a simple state machine, with states
				 *	0	initial (before src)
				 *	1	in src
				 *	2	in whitespace after src
				 *	3	in trg
				 *	4	in whitespace after trg
				 *	-1	syntax error detected
				 *----------
				 */
				int			state;
				char	   *ptr;
				char	   *src = NULL;
				char	   *trg = NULL;
				int			ptrlen;
				int			srclen = 0;
				int			trglen = 0;

				state = 0;
				for (ptr = line; *ptr; ptr += ptrlen)
				{
					ptrlen = pg_mblen(ptr);
					/* ignore whitespace, but end src or trg */
					if (t_isspace(ptr))
					{
						if (state == 1)
							state = 2;
						else if (state == 3)
							state = 4;
						continue;
					}
					switch (state)
					{
						case 0:
							/* start of src */
							src = ptr;
							srclen = ptrlen;
							state = 1;
							break;
						case 1:
							/* continue src */
							srclen += ptrlen;
							break;
						case 2:
							/* start of trg */
							trg = ptr;
							trglen = ptrlen;
							state = 3;
							break;
						case 3:
							/* continue trg */
							trglen += ptrlen;
							break;
						default:
							/* bogus line format */
							state = -1;
							break;
					}
				}

				if (state == 1 || state == 2)
				{
					/* trg was omitted, so use "" */
					trg = "";
					trglen = 0;
				}

				if (state > 0)
					rootTrie = placeChar(rootTrie,
										 (unsigned char *) src, srclen,
										 trg, trglen);
				else if (state < 0)
					ereport(WARNING,
							(errcode(ERRCODE_CONFIG_FILE_ERROR),
							 errmsg("invalid syntax: more than two strings in unaccent rule")));

				pfree(line);
			}
			skip = false;
		}
		PG_CATCH();
		{
			ErrorData  *errdata;
			MemoryContext ecxt;

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();
			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
			{
				FlushErrorState();
			}
			else
			{
				MemoryContextSwitchTo(ecxt);
				PG_RE_THROW();
			}
		}
		PG_END_TRY();
	}
	while (skip);

	tsearch_readline_end(&trst);

	return rootTrie;
}
Example #5
0
File: tok.c Project: BPaden/garglk
/* get the next token, removing it from the input stream */
int toknext(tokcxdef *ctx)
{
    char   *p;
    tokdef *tok = &ctx->tokcxcur;
    int     len;

    /* 
     *   Check for the special case that we pushed an open paren prior to
     *   a string containing an embedded expression.  If this is the case,
     *   immediately return the string we previously parsed. 
     */
    if ((ctx->tokcxflg & TOKCXF_EMBED_PAREN_PRE) != 0)
    {
        /* 
         *   convert the token to a string - note that the offset
         *   information for the string is already in the current token
         *   structure, since we set everything up for it on the previous
         *   call where we actually parsed the beginning of the string 
         */
        tok->toktyp = TOKTDSTRING;

        /* clear the special flag - we've now consumed the pushed string */
        ctx->tokcxflg &= ~TOKCXF_EMBED_PAREN_PRE;

        /* immediately return the string */
        return tok->toktyp;
    }

    /* set up at the current scanning position */
    p = ctx->tokcxptr;
    len = ctx->tokcxlen;

    /* scan off whitespace and comments until we find something */
    do
    {
    skipblanks:
        /* if there's nothing on this line, get the next one */
        if (len == 0)
        {
            /* if we're in a macro expansion, continue after it */
            if (ctx->tokcxmlvl)
            {
                ctx->tokcxmlvl--;
                p = ctx->tokcxmsav[ctx->tokcxmlvl];
                len = ctx->tokcxmsvl[ctx->tokcxmlvl];
            }
            else
            {
                if (tokgetlin(ctx, TRUE))
                {
                    tok->toktyp = TOKTEOF;
                    goto done;
                }
                p = ctx->tokcxptr;
                len = ctx->tokcxlen;
            }
        }
        while (len && t_isspace(*p)) ++p, --len;     /* scan off whitespace */
        
        /* check for comments, and remove if present */
        if (len >= 2 && *p == '/' && *(p+1) == '/')
            len = 0;
        else if (len >= 2 && *p == '/' && *(p+1) == '*')
        {
            while (len < 2 || *p != '*' || *(p+1) != '/')
            {
                if (len != 0)
                    ++p, --len;

                if (len == 0)
                {
                    if (ctx->tokcxmlvl != 0)
                    {
                        ctx->tokcxmlvl--;
                        p = ctx->tokcxmsav[ctx->tokcxmlvl];
                        len = ctx->tokcxmsvl[ctx->tokcxmlvl];
                    }
                    else
                    {
                        if (tokgetlin(ctx, FALSE))
                        {
                            ctx->tokcxptr = p;
                            tok->toktyp = TOKTEOF;
                            goto done;
                        }
                        p = ctx->tokcxptr;
                        len = ctx->tokcxlen;
                    }
                }
            }
            p += 2;
            len -= 2;
            goto skipblanks;
        }
    } while (len == 0);
    
nexttoken:
    if (isalpha((uchar)*p) || *p == '_' || *p == '$')
    {
        int       l;
        int       hash;
        char     *q;
        toktdef  *tab;
        int       found = FALSE;
        uchar     thischar;
        tokdfdef *df;
        
        for (hash = 0, l = 0, q = tok->toknam ;
             len != 0 && TOKISSYM(*p) && l < TOKNAMMAX ;
             (thischar = ((isupper((uchar)*p)
                           && (ctx->tokcxflg & TOKCXCASEFOLD))
                          ? tolower((uchar)*p) : *p)),
             (hash = ((hash + thischar) & (TOKHASHSIZE - 1))),
             (*q++ = thischar), ++p, --len, ++l) ;
        *q = '\0';
        if (len != 0 && TOKISSYM(*p))
        {
            while (len != 0 && TOKISSYM(*p)) ++p, --len;
            errlog1(ctx->tokcxerr, ERR_TRUNC, ERRTSTR,
                    errstr(ctx->tokcxerr, tok->toknam, tok->toklen));
        }
        tok->toklen = l;
        tok->tokhash = hash;

        /*
         *   check for the special defined() preprocessor operator 
         */
        if (l == 9 && !memcmp(tok->toknam,
                              ((ctx->tokcxflg & TOKCXCASEFOLD)
                               ? "__defined" : "__DEFINED"),
                              (size_t)9)
            && len > 2 && *p == '(' && TOKISSYM(*(p+1))
            && !isdigit((uchar)*(p+1)))
        {
            int symlen;
            char mysym[TOKNAMMAX];
            
            /* find the matching ')', allowing only symbolic characters */
            ++p, --len;
            for (symlen = 0, q = p ; len && *p != ')' && TOKISSYM(*p) ;
                 ++p, --len, ++symlen) ;

            /* make sure we found the closing paren */
            if (!len || *p != ')')
                errsig(ctx->tokcxerr, ERR_BADISDEF);
            ++p, --len;

            /* if we're folding case, convert the symbol to lower case */
            q = tok_casefold_defsym(ctx, mysym, q, symlen);

            /* check to see if it's defined */
            tok->toktyp = TOKTNUMBER;
            tok->tokval = (tok_find_define(ctx, q, symlen) != 0);
            goto done;
        }

        /* substitute the preprocessor #define, if any */
        if ((df = tok_find_define(ctx, tok->toknam, l)) != 0)
        {
            /* save the current parsing position */
            if (ctx->tokcxmlvl >= TOKMACNEST)
                errsig(ctx->tokcxerr, ERR_MACNEST);
            ctx->tokcxmsav[ctx->tokcxmlvl] = p;
            ctx->tokcxmsvl[ctx->tokcxmlvl] = len;
            ctx->tokcxmlvl++;

            /* point to the token's expansion and keep going */
            p = df->expan;
            len = df->explen;
            goto nexttoken;
        }
        
        /* look up in symbol table(s), if any */
        for (tab = ctx->tokcxstab ; tab ; tab = tab->toktnxt)
        {
            if ((found = (*tab->toktfsea)(tab, tok->toknam, l, hash,
                                          &tok->toksym)) != 0)
                break;
        }
        
        if (found && tok->toksym.tokstyp == TOKSTKW)
            tok->toktyp = tok->toksym.toksval;
        else
        {
            tok->toktyp = TOKTSYMBOL;
            if (!found) tok->toksym.tokstyp = TOKSTUNK;
        }
        goto done;
    }
    else if (isdigit((uchar)*p))
    {
        long acc = 0;
        
        /* check for octal/hex */
        if (*p == '0')
        {
            ++p, --len;
            if (len && (*p == 'x' || *p == 'X'))
            {
                /* hex */
                ++p, --len;
                while (len && TOKISHEX(*p))
                {
                    acc = (acc << 4) + TOKHEX2INT(*p);
                    ++p, --len;
                }
            }
            else
            {
                /* octal */
                while (len && TOKISOCT(*p))
                {
                    acc = (acc << 3) + TOKOCT2INT(*p);
                    ++p, --len;
                }
            }
        }
        else
        {
            /* decimal */
            while (len && isdigit((uchar)*p))
            {
                acc = (acc << 1) + (acc << 3) + TOKDEC2INT(*p);
                ++p, --len;
            }
        }
        tok->tokval = acc;
        tok->toktyp = TOKTNUMBER;
        goto done;
    }
    else if (*p == '"' || *p == '\'')
    {
        char  delim;                 /* closing delimiter we're looking for */
        char *strstart;                       /* pointer to start of string */
        int   warned;
        
        delim = *p;
        --len;
        strstart = ++p;

        if (delim == '"' && len >= 2 && *p == '<' && *(p+1) == '<')
        {
            /* save the current parsing position */
            if (ctx->tokcxmlvl >= TOKMACNEST)
                errsig(ctx->tokcxerr, ERR_MACNEST);
            ctx->tokcxmsav[ctx->tokcxmlvl] = p + 2;
            ctx->tokcxmsvl[ctx->tokcxmlvl] = len - 2;
            ctx->tokcxmlvl++;

            /* 
             *   read from the special "<<" expansion string - use the
             *   version for a "<<" at the very beginning of the string 
             */
            p = tokmac1s;
            len = strlen(p);
            ctx->tokcxflg |= TOKCXFINMAC;
            goto nexttoken;
        }
        tok->toktyp = (delim == '"' ? TOKTDSTRING : TOKTSSTRING);
        
        tok->tokofs = (*ctx->tokcxsst)(ctx->tokcxscx);  /* start the string */
        for (warned = FALSE ;; )
        {
            if (len >= 2 && *p == '\\')
            {
                if (*(p+1) == '"' || *(p+1) == '\'')
                {
                    (*ctx->tokcxsad)(ctx->tokcxscx, strstart,
                                     (ushort)(p - strstart));
                    strstart = p + 1;
                }
                p += 2;
                len -= 2;
            }
            else if (len == 0 || *p == delim ||
                     (delim == '"' && len >= 2 && *p == '<' && *(p+1) == '<'
                      && !(ctx->tokcxflg & TOKCXFINMAC)))
            {
                (*ctx->tokcxsad)(ctx->tokcxscx, strstart,
                                 (ushort)(p - strstart));
                if (len == 0)
                {
                    if (ctx->tokcxmlvl)
                    {
                        ctx->tokcxmlvl--;
                        p = ctx->tokcxmsav[ctx->tokcxmlvl];
                        len = ctx->tokcxmsvl[ctx->tokcxmlvl];
                    }
                    else
                        (*ctx->tokcxsad)(ctx->tokcxscx, " ", (ushort)1);
                    
                    while (len == 0)
                    {
                        if (tokgetlin(ctx, FALSE))
                            errsig(ctx->tokcxerr, ERR_STREOF);
                        p = ctx->tokcxptr;
                        len = ctx->tokcxlen;

                        /* warn if it looks like the end of an object */
                        if (!warned && len && (*p == ';' || *p == '}'))
                        {
                            errlog(ctx->tokcxerr, ERR_STREND);
                            warned = TRUE;     /* warn only once per string */
                        }

                        /* scan past whitespace at start of line */
                        while (len && t_isspace(*p)) ++p, --len;
                    }
                    strstart = p;
                }
                else break;
            }
            else
                ++p, --len;
        }

        /* end the string */
        (*ctx->tokcxsend)(ctx->tokcxscx);

        /* check to see how it ended */
        if (len != 0 && *p == delim)
        {
            /* 
             *   We ended with the matching delimiter.  Move past the
             *   closing delimiter. 
             */
            ++p;
            --len;

            /*
             *   If we have a pending close paren we need to put in
             *   because of an embedded expression that occurred earlier
             *   in the string, parse the macro to provide the paren.  
             */
            if ((ctx->tokcxflg & TOKCXF_EMBED_PAREN_AFT) != 0
                && !(ctx->tokcxflg & TOKCXFINMAC))
            {
                /* clear the flag */
                ctx->tokcxflg &= ~TOKCXF_EMBED_PAREN_AFT;

                /* push the current parsing position */
                if (ctx->tokcxmlvl >= TOKMACNEST)
                    errsig(ctx->tokcxerr, ERR_MACNEST);
                ctx->tokcxmsav[ctx->tokcxmlvl] = p;
                ctx->tokcxmsvl[ctx->tokcxmlvl] = len;
                ctx->tokcxmlvl++;

                /* parse the macro */
                p = tokmac4;
                len = strlen(p);
            }
        }
        else if (len != 0 && *p == '<')
        {
            /* save the current parsing position */
            if (ctx->tokcxmlvl >= TOKMACNEST)
                errsig(ctx->tokcxerr, ERR_MACNEST);
            ctx->tokcxmsav[ctx->tokcxmlvl] = p + 2;
            ctx->tokcxmsvl[ctx->tokcxmlvl] = len - 2;
            ctx->tokcxmlvl++;

            /* read from the "<<" expansion */
            p = tokmac1;
            len = strlen(p);
            ctx->tokcxflg |= TOKCXFINMAC;

            /* 
             *   Set the special push-a-paren flag: we'll return an open
             *   paren now, so that we have an open paren before the
             *   string, and then on the next call to toknext() we'll
             *   immediately return the string we've already parsed here.
             *   This will ensure that everything in the string is
             *   properly grouped together as a single indivisible
             *   expression.
             *   
             *   Note that we only need to do this for the first embedded
             *   expression in a string.  Once we have a close paren
             *   pending, we don't need more open parens.  
             */
            if (!(ctx->tokcxflg & TOKCXF_EMBED_PAREN_AFT))
            {
                ctx->tokcxflg |= TOKCXF_EMBED_PAREN_PRE;
                tok->toktyp = TOKTLPAR;
            }
        }
        goto done;
    }
    else if (len >= 2 && *p == '>' && *(p+1) == '>'
             && (ctx->tokcxflg & TOKCXFINMAC) != 0)
    {
        /* skip the ">>" */
        ctx->tokcxflg &= ~TOKCXFINMAC;
        p += 2;
        len -= 2;

        /* save the current parsing position */
        if (ctx->tokcxmlvl >= TOKMACNEST)
            errsig(ctx->tokcxerr, ERR_MACNEST);
        ctx->tokcxmsav[ctx->tokcxmlvl] = p;
        ctx->tokcxmsvl[ctx->tokcxmlvl] = len;
        ctx->tokcxmlvl++;

        if (*p == '"')
        {
            ++(ctx->tokcxmsav[ctx->tokcxmlvl - 1]);
            --(ctx->tokcxmsvl[ctx->tokcxmlvl - 1]);
            p = tokmac3;

            /* 
             *   we won't need an extra closing paren now, since tokmac3
             *   provides it 
             */
            ctx->tokcxflg &= ~TOKCXF_EMBED_PAREN_AFT;
        }
        else
        {
            /* 
             *   The string is continuing.  Set a flag to note that we
             *   need to provide a close paren after the end of the
             *   string, and parse the glue (tokmac2) that goes between
             *   the expression and the resumption of the string. 
             */
            ctx->tokcxflg |= TOKCXF_EMBED_PAREN_AFT;
            p = tokmac2;
        }

        len = strlen(p);
        goto nexttoken;
    }
    else
    {
        tokscdef *sc;
        
        for (sc = ctx->tokcxsc[ctx->tokcxinx[(uchar)*p]] ; sc ;
             sc = sc->tokscnxt)
        {
            if (toksceq(sc->tokscstr, p, sc->toksclen, len))
            {
                tok->toktyp = sc->toksctyp;
                p += sc->toksclen;
                len -= sc->toksclen;
                goto done;
            }
        }
        errsig(ctx->tokcxerr, ERR_INVTOK);
    }
    
done:
    ctx->tokcxptr = p;
    ctx->tokcxlen = len;
    return(tok->toktyp);
}
Example #6
0
/*
 * Get next token from string being parsed. Returns true if successful,
 * false if end of input string is reached.  On success, these output
 * parameters are filled in:
 *
 * *strval		pointer to token
 * *lenval		length of *strval
 * *pos_ptr		pointer to a palloc'd array of positions and weights
 *				associated with the token. If the caller is not interested
 *				in the information, NULL can be supplied. Otherwise
 *				the caller is responsible for pfreeing the array.
 * *poslen		number of elements in *pos_ptr
 * *endptr		scan resumption point
 *
 * Pass NULL for unwanted output parameters.
 */
bool
gettoken_tsvector(TSVectorParseState state,
				  char **strval, int *lenval,
				  WordEntryPos **pos_ptr, int *poslen,
				  char **endptr)
{
	int			oldstate = 0;
	char	   *curpos = state->word;
	int			statecode = WAITWORD;

	/*
	 * pos is for collecting the comma delimited list of positions followed by
	 * the actual token.
	 */
	WordEntryPos *pos = NULL;
	int			npos = 0;		/* elements of pos used */
	int			posalen = 0;	/* allocated size of pos */

	while (1)
	{
		if (statecode == WAITWORD)
		{
			if (*(state->prsbuf) == '\0')
				return false;
			else if (t_iseq(state->prsbuf, '\''))
				statecode = WAITENDCMPLX;
			else if (t_iseq(state->prsbuf, '\\'))
			{
				statecode = WAITNEXTCHAR;
				oldstate = WAITENDWORD;
			}
			else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
				PRSSYNTAXERROR;
			else if (!t_isspace(state->prsbuf))
			{
				COPYCHAR(curpos, state->prsbuf);
				curpos += pg_mblen(state->prsbuf);
				statecode = WAITENDWORD;
			}
		}
		else if (statecode == WAITNEXTCHAR)
		{
			if (*(state->prsbuf) == '\0')
				ereport(ERROR,
						(errcode(ERRCODE_SYNTAX_ERROR),
						 errmsg("there is no escaped character: \"%s\"",
								state->bufstart)));
			else
			{
				RESIZEPRSBUF;
				COPYCHAR(curpos, state->prsbuf);
				curpos += pg_mblen(state->prsbuf);
				Assert(oldstate != 0);
				statecode = oldstate;
			}
		}
		else if (statecode == WAITENDWORD)
		{
			if (t_iseq(state->prsbuf, '\\'))
			{
				statecode = WAITNEXTCHAR;
				oldstate = WAITENDWORD;
			}
			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
					 (state->oprisdelim && ISOPERATOR(state->prsbuf)))
			{
				RESIZEPRSBUF;
				if (curpos == state->word)
					PRSSYNTAXERROR;
				*(curpos) = '\0';
				RETURN_TOKEN;
			}
			else if (t_iseq(state->prsbuf, ':'))
			{
				if (curpos == state->word)
					PRSSYNTAXERROR;
				*(curpos) = '\0';
				if (state->oprisdelim)
					RETURN_TOKEN;
				else
					statecode = INPOSINFO;
			}
			else
			{
				RESIZEPRSBUF;
				COPYCHAR(curpos, state->prsbuf);
				curpos += pg_mblen(state->prsbuf);
			}
		}
		else if (statecode == WAITENDCMPLX)
		{
			if (t_iseq(state->prsbuf, '\''))
			{
				statecode = WAITCHARCMPLX;
			}
			else if (t_iseq(state->prsbuf, '\\'))
			{
				statecode = WAITNEXTCHAR;
				oldstate = WAITENDCMPLX;
			}
			else if (*(state->prsbuf) == '\0')
				PRSSYNTAXERROR;
			else
			{
				RESIZEPRSBUF;
				COPYCHAR(curpos, state->prsbuf);
				curpos += pg_mblen(state->prsbuf);
			}
		}
		else if (statecode == WAITCHARCMPLX)
		{
			if (t_iseq(state->prsbuf, '\''))
			{
				RESIZEPRSBUF;
				COPYCHAR(curpos, state->prsbuf);
				curpos += pg_mblen(state->prsbuf);
				statecode = WAITENDCMPLX;
			}
			else
			{
				RESIZEPRSBUF;
				*(curpos) = '\0';
				if (curpos == state->word)
					PRSSYNTAXERROR;
				if (state->oprisdelim)
				{
					/* state->prsbuf+=pg_mblen(state->prsbuf); */
					RETURN_TOKEN;
				}
				else
					statecode = WAITPOSINFO;
				continue;		/* recheck current character */
			}
		}
		else if (statecode == WAITPOSINFO)
		{
			if (t_iseq(state->prsbuf, ':'))
				statecode = INPOSINFO;
			else
				RETURN_TOKEN;
		}
		else if (statecode == INPOSINFO)
		{
			if (t_isdigit(state->prsbuf))
			{
				if (posalen == 0)
				{
					posalen = 4;
					pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
					npos = 0;
				}
				else if (npos + 1 >= posalen)
				{
					posalen *= 2;
					pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
				}
				npos++;
				WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
				/* we cannot get here in tsquery, so no need for 2 errmsgs */
				if (WEP_GETPOS(pos[npos - 1]) == 0)
					ereport(ERROR,
							(errcode(ERRCODE_SYNTAX_ERROR),
							 errmsg("wrong position info in tsvector: \"%s\"",
									state->bufstart)));
				WEP_SETWEIGHT(pos[npos - 1], 0);
				statecode = WAITPOSDELIM;
			}
			else
				PRSSYNTAXERROR;
		}
		else if (statecode == WAITPOSDELIM)
		{
			if (t_iseq(state->prsbuf, ','))
				statecode = INPOSINFO;
			else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
			{
				if (WEP_GETWEIGHT(pos[npos - 1]))
					PRSSYNTAXERROR;
				WEP_SETWEIGHT(pos[npos - 1], 3);
			}
			else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
			{
				if (WEP_GETWEIGHT(pos[npos - 1]))
					PRSSYNTAXERROR;
				WEP_SETWEIGHT(pos[npos - 1], 2);
			}
			else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
			{
				if (WEP_GETWEIGHT(pos[npos - 1]))
					PRSSYNTAXERROR;
				WEP_SETWEIGHT(pos[npos - 1], 1);
			}
			else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
			{
				if (WEP_GETWEIGHT(pos[npos - 1]))
					PRSSYNTAXERROR;
				WEP_SETWEIGHT(pos[npos - 1], 0);
			}
			else if (t_isspace(state->prsbuf) ||
					 *(state->prsbuf) == '\0')
				RETURN_TOKEN;
			else if (!t_isdigit(state->prsbuf))
				PRSSYNTAXERROR;
		}
		else	/* internal error */
			elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
				 statecode);

		/* get next char */
		state->prsbuf += pg_mblen(state->prsbuf);
	}
}
Example #7
0
File: tok.c Project: BPaden/garglk
/* get a new line from line source, processing '#' directives */
static int tokgetlin(tokcxdef *ctx, int dopound)
{
    for (;;)
    {
        if (linget(ctx->tokcxlin))
        {
            /* at eof in current source; resume parent if there is one */
            if (ctx->tokcxlin->linpar)
            {
                lindef *parent;
                
                parent = ctx->tokcxlin->linpar;          /* remember parent */
                lincls(ctx->tokcxlin);               /* close included file */
                if (!ctx->tokcxdbg)               /* if no debug context... */
                    mchfre(ctx->tokcxlin);              /* free line source */
                ctx->tokcxlin = parent;      /* reset to parent line source */
                if (parent->linflg & LINFCMODE)
                    ctx->tokcxflg |= TOKCXFCMODE;
                else
                    ctx->tokcxflg &= ~TOKCXFCMODE;
                continue;                       /* back for another attempt */
            }
            else
            {
                /* check for outstanding #if/#ifdef */
                if (ctx->tokcxifcnt)
                    errlog(ctx->tokcxerr, ERR_NOENDIF);

                /* return end-of-file indication */
                return TRUE;
            }
        }
        
        /* if this is a multi-segment line, copy it into our own buffer */
        if (ctx->tokcxlin->linflg & LINFMORE)
        {
            char *p;
            uint  rem;
            int   done;
            
            if (!ctx->tokcxbuf)
            {
                /* allocate 1k as a default buffer */
                ctx->tokcxbuf = (char *)mchalo(ctx->tokcxerr, 1024,
                                               "tok");
                ctx->tokcxbsz = 1024;
            }
            ctx->tokcxlen = 0;
            
            for (done = FALSE, p = ctx->tokcxbuf, rem = ctx->tokcxbsz ;
                 !done ; )
            {
                size_t len = ctx->tokcxlin->linlen;

                /* add the current segment's length into line length */
                ctx->tokcxlen += len;
                
                /* we're done after this piece if the last fetch was all */
                done = !(ctx->tokcxlin->linflg & LINFMORE);
                if (len + 1 > rem)
                {
                    char *newp;

                    /* increase the size of the buffer */
                    if (ctx->tokcxbsz > (unsigned)0x8000)
                        errsig(ctx->tokcxerr, ERR_LONGLIN);
                    rem += 4096;
                    ctx->tokcxbsz += 4096;
                    
                    /* allocate a new buffer and copy line into it */
                    newp = (char *)mchalo(ctx->tokcxerr, ctx->tokcxbsz, "tok");
                    memcpy(newp, ctx->tokcxbuf, (size_t)(p - ctx->tokcxbuf));
                    
                    /* free the original buffer, and use the new one */
                    p = (p - ctx->tokcxbuf) + newp;
                    mchfre(ctx->tokcxbuf);
                    ctx->tokcxbuf = newp;
                }
                
                /* add the line to the buffer */
                memcpy(p, ctx->tokcxlin->linbuf, len);
                p += len;
                rem -= len;
                
                /* get the next piece of the line if there is one */
                if (!done)
                {
                    if (linget(ctx->tokcxlin)) break;
                }
            }
            
            /* null-terminate the buffer, and use it for input */
            *p = '\0';
            ctx->tokcxptr = ctx->tokcxbuf;
        }
        else
        {
            ctx->tokcxptr = ctx->tokcxlin->linbuf;
            ctx->tokcxlen = ctx->tokcxlin->linlen;
        }
        
        /* check for preprocessor directives */
        if (dopound && ctx->tokcxlen != 0 && ctx->tokcxptr[0] == '#'
            && !(ctx->tokcxlin->linflg & LINFNOINC))
        {
            char   *p;
            int     len;
            static  struct
            {
                char  *nm;
                int    len;
                int    ok_in_if;
                void (*fn)(tokcxdef *, char *, int);
            }
            *dirp, dir[] =
            {
                { "include", 7, FALSE, tokinclude },
                { "pragma",  6, FALSE, tokpragma },
                { "define",  6, FALSE, tokdefine },
                { "ifdef",   5, TRUE, tokifdef },
                { "ifndef",  6, TRUE, tokifndef },
                { "if",      2, TRUE, tokif },
                { "else",    4, TRUE, tokelse },
                { "elif",    4, TRUE, tokelif },
                { "endif",   5, TRUE, tokendif },
                { "undef",   5, FALSE, tokundef },
                { "error",   5, FALSE, tok_p_error }
            };
            int  i;

            /* scan off spaces between '#' and directive */
            for (len = ctx->tokcxlen - 1, p = &ctx->tokcxptr[1] ;
                 len && t_isspace(*p) ; --len, ++p) ;

            /* find and process the directive */
            for (dirp = dir, i = sizeof(dir)/sizeof(dir[0]) ; i ; --i, ++dirp)
            {
                /* compare this directive; if it wins, call its function */
                if (len >= dirp->len && !memcmp(p, dirp->nm, (size_t)dirp->len)
                    && (len == dirp->len || t_isspace(*(p + dirp->len))))
                {
                    int cnt;
                    int stat;
                    
                    /*
                     *   if we're not in a #if's false part, or if the
                     *   directive is processed even in #if false parts,
                     *   process the line, otherwise skip it 
                     */
                    cnt = ctx->tokcxifcnt;
                    if (dirp->ok_in_if || cnt == 0
                        || ((stat = ctx->tokcxifcur) == TOKIF_IF_YES
                            || stat == TOKIF_ELSE_YES))
                    {
                        /* skip whitespace following the directive */
                        for (p += dirp->len, len -= dirp->len ;
                             len && t_isspace(*p) ;
                             --len, ++p) ;

                        /* invoke the function to process this directive */
                        (*dirp->fn)(ctx, p, len);
                    }

                    /* there's no need to look at more directives */
                    break;
                }
            }

            /* if we didn't find anything, flag the error */
            if (i == 0)
                errlog(ctx->tokcxerr, ERR_PRPDIR);

            /* ignore this line */
            continue;
        }
        else
        {
            /*
             *   Check the #if level.  If we're in an #if, and we're to
             *   ignore lines (because of a false condition or an #else
             *   part for a true condition), skip this line. 
             */
            if (ctx->tokcxifcnt != 0)
            {
                switch(ctx->tokcxifcur)
                {
                case TOKIF_IF_NO:
                case TOKIF_ELSE_NO:
                    /* ignore this line */
                    continue;

                default:
                    /* we're in a true part - keep the line */
                    break;
                }
            }
            
            ctx->tokcxlin->linflg &= ~LINFDBG;       /* no debug record yet */
            return(FALSE);                      /* return the line we found */
        }
    }
}
Example #8
0
/*
 *   Text-mode os_input_dialog implementation 
 */
int tio_input_dialog(int icon_id, const char *prompt,
                     int standard_button_set,
                     const char **buttons, int button_count,
                     int default_index, int cancel_index)
{
    /* ignore the icon ID - we can't display an icon in text mode */
    VARUSED(icon_id);

    /* keep going until we get a valid response */
    for (;;)
    {
        int i;
        char buf[256];
        const char *p;
        const char *cur;
        char *resp;
        int match_cnt;
        int last_found;
        static const struct
        {
            const char *buttons[3];
            int button_count;
        } std_btns[] =
        {
            { { "&OK" },                    1 },
            { { "&OK", "&Cancel" },         2 },
            { { "&Yes", "&No" },            2 },
            { { "&Yes", "&No", "&Cancel" }, 3 }
        };

        /* 
         *   if we have a standard button set selected, get our button
         *   labels 
         */
        switch(standard_button_set)
        {
        case 0:
            /* use the explicit buttons provided */
            break;

        case OS_INDLG_OK:
            i = 0;

        use_std_btns:
            /* use the selected standard button set */
            buttons = (const char **)std_btns[i].buttons;
            button_count = std_btns[i].button_count;
            break;

        case OS_INDLG_OKCANCEL:
            i = 1;
            goto use_std_btns;

        case OS_INDLG_YESNO:
            i = 2;
            goto use_std_btns;

        case OS_INDLG_YESNOCANCEL:
            i = 3;
            goto use_std_btns;

        default:
            /* 
             *   we don't recognize other standard button sets - return an
             *   error 
             */
            return 0;
        }

        /* 
         *   if there are no buttons defined, they'll never be able to
         *   respond, so we'd just loop forever - rather than let that
         *   happen, return failure 
         */
        if (button_count == 0)
            return 0;

        /* display a newline and the prompt string */
        outformat("\\n");
        outformat((char *)prompt);
        outformat(" ");

        /* display the response */
        for (i = 0 ; i < button_count ; ++i)
        {
            /* 
             *   display a slash to separate responses, if this isn't the
             *   first one 
             */
            if (i != 0)
                outformat("/");

            /* get the current button */
            cur = buttons[i];

            /* 
             *   Look for a "&" in the response string.  If we find it,
             *   remove the "&" and enclose the shortcut key in parens.  
             */
            for (p = cur ; *p != '&' && *p != '\0' ; ++p) ;

            /* if we found the "&", put the next character in parens */
            if (*p == '&')
            {
                /* reformat the response string */
                sprintf(buf, "%.*s(%c)%s", (int)(p - cur), cur, *(p+1), p+2);

                /* display it */
                outformat(buf);
            }
            else
            {
                /* no '&' - just display the response string as-is */
                outformat((char *)cur);
            }
        }

        /* if we're in HTML mode, switch to input font */
        if (tio_is_html_mode())
            outformat("<font face='TADS-Input'>");

        /* read the response */
        getstring(" >", buf, sizeof(buf));

        /* if we're in HTML mode, close the input font tag */
        if (tio_is_html_mode())
            outformat("</font>");

        /* skip any leading spaces in the reply */
        for (resp = buf ; t_isspace(*resp) ; ++resp) ;

        /* if it's one character, check it against the shortcut keys */
        if (strlen(resp) == 1)
        {
            /* scan the responses */
            for (i = 0 ; i < button_count ; ++i)
            {
                /* look for a '&' in this button */
                for (p = buttons[i] ; *p != '&' && *p != '\0' ; ++p) ;

                /* if we found the '&', check the shortcut */
                if (*p == '&' && toupper(*(p+1)) == toupper(*resp))
                {
                    /* 
                     *   this is the one - return the current index
                     *   (bumping it by one to get a 1-based value) 
                     */
                    return i + 1;
                }
            }
        }

        /* 
         *   Either it's not a one-character reply, or it didn't match a
         *   short-cut - check it against the leading substrings of the
         *   responses.  If it matches exactly one of the responses in its
         *   leading substring, use that response.  
         */
        for (i = 0, match_cnt = 0 ; i < button_count ; ++i)
        {
            const char *p1;
            const char *p2;

            /* 
             *   compare this response to the user's response; skip any
             *   '&' in the button label 
             */
            for (p1 = resp, p2 = buttons[i] ; *p1 != '\0' && *p2 != '\0' ;
                 ++p1, ++p2)
            {
                /* if this is a '&' in the button label, skip it */
                if (*p2 == '&')
                    ++p2;

                /* if these characters don't match, it's no match */
                if (toupper(*p1) != toupper(*p2))
                    break;
            }

            /* 
             *   if we reached the end of the user's response, we have a
             *   match in the leading substring - count it and remember
             *   this as the last one, but keep looking, since we need to
             *   make sure we don't have any other matches 
             */
            if (*p1 == '\0')
            {
                ++match_cnt;
                last_found = i;
            }
        }
        
        /* 
         *   if we found exactly one match, return it (adjusting to a
         *   1-based index); if we found more or less than one match, it's
         *   not a valid response, so start over with a new prompt 
         */
        if (match_cnt == 1)
            return last_found + 1;
    }
}
int main(int argc, char **argv)
{
    int       curarg;
    osfildef *fpin;
    osfildef *fpout;
    char      tmpfile[OSFNMAX + 1];
    char      inbuf[OSFNMAX + 1];
    char     *p;
    char     *infile;
    char      buf[128];
    opdef    *oplist = (opdef *)0;
    opctxdef  opctx;
    int       do_create = FALSE;

    /* print main banner */
    rscptf("TADS Resource Manager version 2.2.4\n");
    rscptf("Copyright (c) 1992, 1999 by Michael J. Roberts.  ");
    rscptf("All Rights Reserved.\n");
    if (argc < 2) usage();

    /* set default parsing options */
    opctx.restype = RESTYPE_DFLT;
    opctx.flag = OPFADD | OPFDEL;
    opctx.doing_type = FALSE;

    /* scan file options (these come before the filename) */
    for (curarg = 1 ; curarg < argc ; ++curarg)
    {
        /* check if it's an option - if not, stop looking */
        if (argv[curarg][0] != '-')
            break;
        
        /* check the option */
        if (!stricmp(argv[curarg], "-create"))
        {
            /* note that we want to create the file */
            do_create = TRUE;
        }
        else
        {
            rscptf("unrecognized file option \"%s\"", argv[curarg]);
            errexit("", 1);
        }
    }
    
    /* get the file name */
    infile = argv[curarg++];
    strcpy(inbuf, infile);
    os_defext(inbuf, "gam");

    /* open the file for reading, unless we're creating a new file */
    if (do_create)
    {
        /* creating - we have no input file */
        fpin = 0;
    }
    else if ((fpin = osfoprb(inbuf, OSFTGAME)) == 0)
    {
        /* 
         *   not creating, so the file must already exist - it doesn't, so
         *   issue an error and quit 
         */
        errexit("unable to open resource file", 1);
    }

    /* 
     *   if no operations are desired, and we're not creating a new file,
     *   just list the existing file's contents and quit 
     */
    if (curarg == argc && fpin != 0)
    {
        rscproc(fpin, (osfildef *)0, 0);
        osfcls(fpin);
        os_term(OSEXSUCC);
    }

    /*
     *   Create an output file.  If we're creating a new file, create the
     *   file named on the command line; otherwise, create a temporary
     *   file that we'll write to while working and then rename to the
     *   original input filename after we've finished with the original
     *   input file.  
     */
    if (do_create)
    {
        /* create the new file */
        if ((fpout = osfopwb(inbuf, OSFTGAME)) == 0)
            errexit("unable to create file", 1);

        /* report the creation */
        rscptf("\nFile created.\n");
    }
    else
    {
        /* generate a temporary filename */
        strcpy(tmpfile, inbuf);
        for (p = tmpfile + strlen(tmpfile) ; p > tmpfile &&
                 *(p-1) != ':' && *(p-1) != '\\' && *(p-1) != '/' ; --p);
        strcpy(p, "$TADSRSC.TMP");

        /* open the temporary file */
        if ((fpout = osfopwb(tmpfile, OSFTGAME)) == 0)
            errexit("unable to create temporary file", 1);
    }

    /* see if we need to read a response file */
    if (curarg < argc && argv[curarg][0] == '@')
    {
        osfildef *argfp;
        int       l;
        char     *p;
        
        if (!(argfp = osfoprt(argv[curarg]+1, OSFTTEXT)))
            errexit("unable to open response file", 1);
        
        for (;;)
        {
            if (!osfgets(buf, sizeof(buf), argfp)) break;
            l = strlen(buf);
            if (l && buf[l-1] == '\n') buf[--l] = '\0';
            for (p = buf ; t_isspace(*p) ; ++p);
            if (!*p) continue;
            oplist = addop(oplist, p, &opctx);
        }
        osfcls(argfp);
    }
    else
    {
        for ( ; curarg < argc ; ++curarg)
            oplist = addop(oplist, argv[curarg], &opctx);
    }

    /* process the resources */
    oplist = rscproc(fpin, fpout, oplist);

    /* make sure they all got processed */
    for ( ; oplist != 0 ; oplist = oplist->opnxt)
    {
        if (!(oplist->opflag & OPFDONE))
            rscptf("warning: resource \"%s\" not found\n", oplist->opres);
    }
    
    /* close files */
    if (fpin != 0)
        osfcls(fpin);
    if (fpout != 0)
        osfcls(fpout);
    
    /* 
     *   if we didn't create a new file, remove the original input file
     *   and rename the temp file to the original file name 
     */
    if (!do_create)
    {
        /* remove the original input file */
        if (remove(inbuf))
            errexit("error deleting input file", 1);

        /* rename the temp file to the output file */
        if (rename(tmpfile, inbuf))
            errexit("error renaming temporary file", 1);
    }

    /* success */
    os_term(OSEXSUCC);
    return OSEXSUCC;
}
/*
 *   Add an operation from a command line argument 
 */
static opdef *addop(opdef *cur, char *nam, opctxdef *opctx)
{
    char  *p;
    opdef *newop;
    void  *search_ctx;
    char   fname[OSFNMAX];
    char   resname[OSFNMAX];
    int    isdir;

    /* see if we're parsing a -type argument */
    if (opctx->doing_type)
    {
        /* 
         *   parse the type name, and store it as the type for following
         *   resources
         */
        opctx->restype = parse_res_type(nam);
        
        /* we're done parsing the -type argument */
        opctx->doing_type = FALSE;

        /* 
         *   we're done parsing this argument - we haven't added any
         *   operations, so return old list head 
         */
        return cur;
    }

    /* see if we have an option */
    if (*nam == '-')
    {
        /* see what we have */
        if (!stricmp(nam, "-type"))
        {
            /* 
             *   note that we're doing a type, so we parse it on the next
             *   argument 
             */
            opctx->doing_type = TRUE;
        }
        else if (!stricmp(nam, "-replace"))
        {
            /* set current flags to replace */
            opctx->flag = OPFADD | OPFDEL;
        }
        else if (!stricmp(nam, "-delete"))
        {
            /* set current flags to delete */
            opctx->flag = OPFDEL;
        }
        else if (!stricmp(nam, "-add"))
        {
            /* set current flags to add */
            opctx->flag = OPFADD;
        }
        else
        {
            /* invalid argument */
            rscptf("invalid option: %s", nam);
            errexit("", 1);
        }

        /* 
         *   done parsing this option - we didn't add a new operation, so
         *   return the current list head 
         */
        return cur;
    }

    /* look for '=' */
    for (p = nam ; *p && *p != '=' ; ++p);
    if (*p == '=')
    {
        /* 
         *   We found an '=', so an explicit resource name follows - use
         *   the given string as the resource name rather than basing the
         *   resource name on the filename.  First, overwrite the '=' with
         *   a null byte so that the filename string is terminated.  
         */
        *p = '\0';

        /* skip the '=' (now the null byte, of course) */
        ++p;

        /* 
         *   skip any spaces after the '='; leave p pointing to the start
         *   of the resource name 
         */
        while (t_isspace(*p))
            ++p;
    }
    else
    {
        /* 
         *   A resource name is not specified - synthesize a resource name
         *   based on the filename by converting from the local file
         *   system naming syntax to a relative URL 
         */
        os_cvt_dir_url(resname, sizeof(resname), nam, FALSE);

        /* point p to the synthesized resource name */
        p = resname;
    }
    
    /*
     *   If we're adding a directory, rather than returning a single op
     *   for the directory, expand the directory into ops for for all of
     *   the files in the directory. 
     */
    search_ctx = os_find_first_file("", nam, fname, sizeof(fname),
                                    &isdir, 0, 0);
    if (search_ctx != 0)
    {
        /* cancel the search; we only needed the one matching file */
        os_find_close(search_ctx);
        
        /* we found the file; if it's a directory, process its contents */
        if (isdir)
            return addopdir(cur, nam, opctx);
    }
    
    /* allocate space and set up new op */
    newop = (opdef *)malloc(sizeof(opdef) + strlen(p) + strlen(nam) + 2);

    newop->opnxt  = cur;
    newop->opflag = opctx->flag;
    newop->opres  = (char *)(newop + 1);
    newop->oprestype = get_file_restype(opctx->restype, nam);
    strcpy(newop->opres, p);
    newop->opfile = newop->opres + strlen(newop->opres) + 1;
    strcpy(newop->opfile, nam);

    return(newop);
}
Example #11
0
/*
 * initSuffixTree  - create suffix tree from file. Function converts
 * UTF8-encoded file into current encoding.
 */
static SuffixChar *
initSuffixTree(char *filename)
{
	SuffixChar *volatile rootSuffixTree = NULL;
	MemoryContext ccxt = CurrentMemoryContext;
	tsearch_readline_state trst;
	volatile bool skip;

	filename = get_tsearch_config_filename(filename, "rules");
	if (!tsearch_readline_begin(&trst, filename))
		ereport(ERROR,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("could not open unaccent file \"%s\": %m",
						filename)));

	do
	{
		/*
		 * pg_do_encoding_conversion() (called by tsearch_readline()) will
		 * emit exception if it finds untranslatable characters in current
		 * locale. We just skip such lines, continuing with the next.
		 */
		skip = true;

		PG_TRY();
		{
			char	   *line;

			while ((line = tsearch_readline(&trst)) != NULL)
			{
				/*
				 * The format of each line must be "src trg" where src and trg
				 * are sequences of one or more non-whitespace characters,
				 * separated by whitespace.  Whitespace at start or end of
				 * line is ignored.
				 */
				int			state;
				char	   *ptr;
				char	   *src = NULL;
				char	   *trg = NULL;
				int			ptrlen;
				int			srclen = 0;
				int			trglen = 0;

				state = 0;
				for (ptr = line; *ptr; ptr += ptrlen)
				{
					ptrlen = pg_mblen(ptr);
					/* ignore whitespace, but end src or trg */
					if (t_isspace(ptr))
					{
						if (state == 1)
							state = 2;
						else if (state == 3)
							state = 4;
						continue;
					}
					switch (state)
					{
						case 0:
							/* start of src */
							src = ptr;
							srclen = ptrlen;
							state = 1;
							break;
						case 1:
							/* continue src */
							srclen += ptrlen;
							break;
						case 2:
							/* start of trg */
							trg = ptr;
							trglen = ptrlen;
							state = 3;
							break;
						case 3:
							/* continue trg */
							trglen += ptrlen;
							break;
						default:
							/* bogus line format */
							state = -1;
							break;
					}
				}

				if (state >= 3)
					rootSuffixTree = placeChar(rootSuffixTree,
											   (unsigned char *) src, srclen,
											   trg, trglen);

				pfree(line);
			}
			skip = false;
		}
		PG_CATCH();
		{
			ErrorData  *errdata;
			MemoryContext ecxt;

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();
			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
			{
				FlushErrorState();
			}
			else
			{
				MemoryContextSwitchTo(ecxt);
				PG_RE_THROW();
			}
		}
		PG_END_TRY();
	}
	while (skip);

	tsearch_readline_end(&trst);

	return rootSuffixTree;
}