/* * Reads a stop-word file. Each word is run through 'wordop' * function, if given. wordop may either modify the input in-place, * or palloc a new version. */ void readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) { char **stop = NULL; s->len = 0; if (fname && *fname) { char *filename = get_tsearch_config_filename(fname, "stop"); tsearch_readline_state trst; char *line; int reallen = 0; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open stop-word file \"%s\": %m", filename))); while ((line = tsearch_readline(&trst)) != NULL) { char *pbuf = line; /* Trim trailing space */ while (*pbuf && !t_isspace(pbuf)) pbuf += pg_mblen(pbuf); *pbuf = '\0'; /* Skip empty lines */ if (*line == '\0') { pfree(line); continue; } if (s->len >= reallen) { if (reallen == 0) { reallen = 64; stop = (char **) palloc(sizeof(char *) * reallen); } else { reallen *= 2; stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen); } } if (wordop) { stop[s->len] = wordop(line); if (stop[s->len] != line) pfree(line); } else stop[s->len] = line; (s->len)++; } tsearch_readline_end(&trst); pfree(filename); } s->stop = stop; /* Sort to allow binary searching */ if (s->stop && s->len > 0) qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp); }
static void thesaurusRead(char *filename, DictThesaurus *d) { tsearch_readline_state trst; uint16 idsubst = 0; bool useasis = false; char *line; filename = get_tsearch_config_filename(filename, "ths"); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open thesaurus file \"%s\": %m", filename))); while ((line = tsearch_readline(&trst)) != NULL) { char *ptr; int state = TR_WAITLEX; char *beginwrd = NULL; uint16 posinsubst = 0; uint16 nwrd = 0; ptr = line; /* is it a comment? */ while (*ptr && t_isspace(ptr)) ptr += pg_mblen(ptr); if (t_iseq(ptr, '#') || *ptr == '\0' || t_iseq(ptr, '\n') || t_iseq(ptr, '\r')) { pfree(line); continue; } while (*ptr) { if (state == TR_WAITLEX) { if (t_iseq(ptr, ':')) { if (posinsubst == 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected delimiter"))); state = TR_WAITSUBS; } else if (!t_isspace(ptr)) { beginwrd = ptr; state = TR_INLEX; } } else if (state == TR_INLEX) { if (t_iseq(ptr, ':')) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITSUBS; } else if (t_isspace(ptr)) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITLEX; } } else if (state == TR_WAITSUBS) { if (t_iseq(ptr, '*')) { useasis = true; state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } else if (t_iseq(ptr, '\\')) { useasis = false; state = TR_INSUBS; beginwrd = ptr + pg_mblen(ptr); } else if (!t_isspace(ptr)) { useasis = false; beginwrd = ptr; state = TR_INSUBS; } } else if (state == TR_INSUBS) { if (t_isspace(ptr)) { if (ptr == beginwrd) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line or lexeme"))); addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); state = TR_WAITSUBS; } } else elog(ERROR, "unrecognized thesaurus state: %d", state); ptr += pg_mblen(ptr); } if (state == TR_INSUBS) { if (ptr == beginwrd) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line or lexeme"))); addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); } idsubst++; if (!(nwrd && posinsubst)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("unexpected end of line"))); pfree(line); } d->nsubst = idsubst; tsearch_readline_end(&trst); }
/* * get token from query string */ static int4 gettoken_query(QPRS_STATE *state, int4 *val, int4 *lenval, char **strval, uint16 *flag) { int charlen; for (;;) { charlen = pg_mblen(state->buf); switch (state->state) { case WAITOPERAND: if (charlen == 1 && t_iseq(state->buf, '!')) { (state->buf)++; *val = (int4) '!'; return OPR; } else if (charlen == 1 && t_iseq(state->buf, '(')) { state->count++; (state->buf)++; return OPEN; } else if (ISALNUM(state->buf)) { state->state = INOPERAND; *strval = state->buf; *lenval = charlen; *flag = 0; } else if (!t_isspace(state->buf)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand syntax error"))); break; case INOPERAND: if (ISALNUM(state->buf)) { if (*flag) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("modificators syntax error"))); *lenval += charlen; } else if (charlen == 1 && t_iseq(state->buf, '%')) *flag |= LVAR_SUBLEXEME; else if (charlen == 1 && t_iseq(state->buf, '@')) *flag |= LVAR_INCASE; else if (charlen == 1 && t_iseq(state->buf, '*')) *flag |= LVAR_ANYEND; else { state->state = WAITOPERATOR; return VAL; } break; case WAITOPERATOR: if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))) { state->state = WAITOPERAND; *val = (int4) *(state->buf); (state->buf)++; return OPR; } else if (charlen == 1 && t_iseq(state->buf, ')')) { (state->buf)++; state->count--; return (state->count < 0) ? ERR : CLOSE; } else if (*(state->buf) == '\0') return (state->count) ? ERR : END; else if (charlen == 1 && !t_iseq(state->buf, ' ')) return ERR; break; default: return ERR; break; } state->buf += charlen; } return END; }
/* * initTrie - create trie from file. * * Function converts UTF8-encoded file into current encoding. */ static TrieChar * initTrie(char *filename) { TrieChar *volatile rootTrie = NULL; MemoryContext ccxt = CurrentMemoryContext; tsearch_readline_state trst; volatile bool skip; filename = get_tsearch_config_filename(filename, "rules"); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open unaccent file \"%s\": %m", filename))); do { /* * pg_do_encoding_conversion() (called by tsearch_readline()) will * emit exception if it finds untranslatable characters in current * locale. We just skip such lines, continuing with the next. */ skip = true; PG_TRY(); { char *line; while ((line = tsearch_readline(&trst)) != NULL) { /*---------- * The format of each line must be "src" or "src trg", where * src and trg are sequences of one or more non-whitespace * characters, separated by whitespace. Whitespace at start * or end of line is ignored. If trg is omitted, an empty * string is used as the replacement. * * We use a simple state machine, with states * 0 initial (before src) * 1 in src * 2 in whitespace after src * 3 in trg * 4 in whitespace after trg * -1 syntax error detected *---------- */ int state; char *ptr; char *src = NULL; char *trg = NULL; int ptrlen; int srclen = 0; int trglen = 0; state = 0; for (ptr = line; *ptr; ptr += ptrlen) { ptrlen = pg_mblen(ptr); /* ignore whitespace, but end src or trg */ if (t_isspace(ptr)) { if (state == 1) state = 2; else if (state == 3) state = 4; continue; } switch (state) { case 0: /* start of src */ src = ptr; srclen = ptrlen; state = 1; break; case 1: /* continue src */ srclen += ptrlen; break; case 2: /* start of trg */ trg = ptr; trglen = ptrlen; state = 3; break; case 3: /* continue trg */ trglen += ptrlen; break; default: /* bogus line format */ state = -1; break; } } if (state == 1 || state == 2) { /* trg was omitted, so use "" */ trg = ""; trglen = 0; } if (state > 0) rootTrie = placeChar(rootTrie, (unsigned char *) src, srclen, trg, trglen); else if (state < 0) ereport(WARNING, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid syntax: more than two strings in unaccent rule"))); pfree(line); } skip = false; } PG_CATCH(); { ErrorData *errdata; MemoryContext ecxt; ecxt = MemoryContextSwitchTo(ccxt); errdata = CopyErrorData(); if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER) { FlushErrorState(); } else { MemoryContextSwitchTo(ecxt); PG_RE_THROW(); } } PG_END_TRY(); } while (skip); tsearch_readline_end(&trst); return rootTrie; }
/* get the next token, removing it from the input stream */ int toknext(tokcxdef *ctx) { char *p; tokdef *tok = &ctx->tokcxcur; int len; /* * Check for the special case that we pushed an open paren prior to * a string containing an embedded expression. If this is the case, * immediately return the string we previously parsed. */ if ((ctx->tokcxflg & TOKCXF_EMBED_PAREN_PRE) != 0) { /* * convert the token to a string - note that the offset * information for the string is already in the current token * structure, since we set everything up for it on the previous * call where we actually parsed the beginning of the string */ tok->toktyp = TOKTDSTRING; /* clear the special flag - we've now consumed the pushed string */ ctx->tokcxflg &= ~TOKCXF_EMBED_PAREN_PRE; /* immediately return the string */ return tok->toktyp; } /* set up at the current scanning position */ p = ctx->tokcxptr; len = ctx->tokcxlen; /* scan off whitespace and comments until we find something */ do { skipblanks: /* if there's nothing on this line, get the next one */ if (len == 0) { /* if we're in a macro expansion, continue after it */ if (ctx->tokcxmlvl) { ctx->tokcxmlvl--; p = ctx->tokcxmsav[ctx->tokcxmlvl]; len = ctx->tokcxmsvl[ctx->tokcxmlvl]; } else { if (tokgetlin(ctx, TRUE)) { tok->toktyp = TOKTEOF; goto done; } p = ctx->tokcxptr; len = ctx->tokcxlen; } } while (len && t_isspace(*p)) ++p, --len; /* scan off whitespace */ /* check for comments, and remove if present */ if (len >= 2 && *p == '/' && *(p+1) == '/') len = 0; else if (len >= 2 && *p == '/' && *(p+1) == '*') { while (len < 2 || *p != '*' || *(p+1) != '/') { if (len != 0) ++p, --len; if (len == 0) { if (ctx->tokcxmlvl != 0) { ctx->tokcxmlvl--; p = ctx->tokcxmsav[ctx->tokcxmlvl]; len = ctx->tokcxmsvl[ctx->tokcxmlvl]; } else { if (tokgetlin(ctx, FALSE)) { ctx->tokcxptr = p; tok->toktyp = TOKTEOF; goto done; } p = ctx->tokcxptr; len = ctx->tokcxlen; } } } p += 2; len -= 2; goto skipblanks; } } while (len == 0); nexttoken: if (isalpha((uchar)*p) || *p == '_' || *p == '$') { int l; int hash; char *q; toktdef *tab; int found = FALSE; uchar thischar; tokdfdef *df; for (hash = 0, l = 0, q = tok->toknam ; len != 0 && TOKISSYM(*p) && l < TOKNAMMAX ; (thischar = ((isupper((uchar)*p) && (ctx->tokcxflg & TOKCXCASEFOLD)) ? tolower((uchar)*p) : *p)), (hash = ((hash + thischar) & (TOKHASHSIZE - 1))), (*q++ = thischar), ++p, --len, ++l) ; *q = '\0'; if (len != 0 && TOKISSYM(*p)) { while (len != 0 && TOKISSYM(*p)) ++p, --len; errlog1(ctx->tokcxerr, ERR_TRUNC, ERRTSTR, errstr(ctx->tokcxerr, tok->toknam, tok->toklen)); } tok->toklen = l; tok->tokhash = hash; /* * check for the special defined() preprocessor operator */ if (l == 9 && !memcmp(tok->toknam, ((ctx->tokcxflg & TOKCXCASEFOLD) ? "__defined" : "__DEFINED"), (size_t)9) && len > 2 && *p == '(' && TOKISSYM(*(p+1)) && !isdigit((uchar)*(p+1))) { int symlen; char mysym[TOKNAMMAX]; /* find the matching ')', allowing only symbolic characters */ ++p, --len; for (symlen = 0, q = p ; len && *p != ')' && TOKISSYM(*p) ; ++p, --len, ++symlen) ; /* make sure we found the closing paren */ if (!len || *p != ')') errsig(ctx->tokcxerr, ERR_BADISDEF); ++p, --len; /* if we're folding case, convert the symbol to lower case */ q = tok_casefold_defsym(ctx, mysym, q, symlen); /* check to see if it's defined */ tok->toktyp = TOKTNUMBER; tok->tokval = (tok_find_define(ctx, q, symlen) != 0); goto done; } /* substitute the preprocessor #define, if any */ if ((df = tok_find_define(ctx, tok->toknam, l)) != 0) { /* save the current parsing position */ if (ctx->tokcxmlvl >= TOKMACNEST) errsig(ctx->tokcxerr, ERR_MACNEST); ctx->tokcxmsav[ctx->tokcxmlvl] = p; ctx->tokcxmsvl[ctx->tokcxmlvl] = len; ctx->tokcxmlvl++; /* point to the token's expansion and keep going */ p = df->expan; len = df->explen; goto nexttoken; } /* look up in symbol table(s), if any */ for (tab = ctx->tokcxstab ; tab ; tab = tab->toktnxt) { if ((found = (*tab->toktfsea)(tab, tok->toknam, l, hash, &tok->toksym)) != 0) break; } if (found && tok->toksym.tokstyp == TOKSTKW) tok->toktyp = tok->toksym.toksval; else { tok->toktyp = TOKTSYMBOL; if (!found) tok->toksym.tokstyp = TOKSTUNK; } goto done; } else if (isdigit((uchar)*p)) { long acc = 0; /* check for octal/hex */ if (*p == '0') { ++p, --len; if (len && (*p == 'x' || *p == 'X')) { /* hex */ ++p, --len; while (len && TOKISHEX(*p)) { acc = (acc << 4) + TOKHEX2INT(*p); ++p, --len; } } else { /* octal */ while (len && TOKISOCT(*p)) { acc = (acc << 3) + TOKOCT2INT(*p); ++p, --len; } } } else { /* decimal */ while (len && isdigit((uchar)*p)) { acc = (acc << 1) + (acc << 3) + TOKDEC2INT(*p); ++p, --len; } } tok->tokval = acc; tok->toktyp = TOKTNUMBER; goto done; } else if (*p == '"' || *p == '\'') { char delim; /* closing delimiter we're looking for */ char *strstart; /* pointer to start of string */ int warned; delim = *p; --len; strstart = ++p; if (delim == '"' && len >= 2 && *p == '<' && *(p+1) == '<') { /* save the current parsing position */ if (ctx->tokcxmlvl >= TOKMACNEST) errsig(ctx->tokcxerr, ERR_MACNEST); ctx->tokcxmsav[ctx->tokcxmlvl] = p + 2; ctx->tokcxmsvl[ctx->tokcxmlvl] = len - 2; ctx->tokcxmlvl++; /* * read from the special "<<" expansion string - use the * version for a "<<" at the very beginning of the string */ p = tokmac1s; len = strlen(p); ctx->tokcxflg |= TOKCXFINMAC; goto nexttoken; } tok->toktyp = (delim == '"' ? TOKTDSTRING : TOKTSSTRING); tok->tokofs = (*ctx->tokcxsst)(ctx->tokcxscx); /* start the string */ for (warned = FALSE ;; ) { if (len >= 2 && *p == '\\') { if (*(p+1) == '"' || *(p+1) == '\'') { (*ctx->tokcxsad)(ctx->tokcxscx, strstart, (ushort)(p - strstart)); strstart = p + 1; } p += 2; len -= 2; } else if (len == 0 || *p == delim || (delim == '"' && len >= 2 && *p == '<' && *(p+1) == '<' && !(ctx->tokcxflg & TOKCXFINMAC))) { (*ctx->tokcxsad)(ctx->tokcxscx, strstart, (ushort)(p - strstart)); if (len == 0) { if (ctx->tokcxmlvl) { ctx->tokcxmlvl--; p = ctx->tokcxmsav[ctx->tokcxmlvl]; len = ctx->tokcxmsvl[ctx->tokcxmlvl]; } else (*ctx->tokcxsad)(ctx->tokcxscx, " ", (ushort)1); while (len == 0) { if (tokgetlin(ctx, FALSE)) errsig(ctx->tokcxerr, ERR_STREOF); p = ctx->tokcxptr; len = ctx->tokcxlen; /* warn if it looks like the end of an object */ if (!warned && len && (*p == ';' || *p == '}')) { errlog(ctx->tokcxerr, ERR_STREND); warned = TRUE; /* warn only once per string */ } /* scan past whitespace at start of line */ while (len && t_isspace(*p)) ++p, --len; } strstart = p; } else break; } else ++p, --len; } /* end the string */ (*ctx->tokcxsend)(ctx->tokcxscx); /* check to see how it ended */ if (len != 0 && *p == delim) { /* * We ended with the matching delimiter. Move past the * closing delimiter. */ ++p; --len; /* * If we have a pending close paren we need to put in * because of an embedded expression that occurred earlier * in the string, parse the macro to provide the paren. */ if ((ctx->tokcxflg & TOKCXF_EMBED_PAREN_AFT) != 0 && !(ctx->tokcxflg & TOKCXFINMAC)) { /* clear the flag */ ctx->tokcxflg &= ~TOKCXF_EMBED_PAREN_AFT; /* push the current parsing position */ if (ctx->tokcxmlvl >= TOKMACNEST) errsig(ctx->tokcxerr, ERR_MACNEST); ctx->tokcxmsav[ctx->tokcxmlvl] = p; ctx->tokcxmsvl[ctx->tokcxmlvl] = len; ctx->tokcxmlvl++; /* parse the macro */ p = tokmac4; len = strlen(p); } } else if (len != 0 && *p == '<') { /* save the current parsing position */ if (ctx->tokcxmlvl >= TOKMACNEST) errsig(ctx->tokcxerr, ERR_MACNEST); ctx->tokcxmsav[ctx->tokcxmlvl] = p + 2; ctx->tokcxmsvl[ctx->tokcxmlvl] = len - 2; ctx->tokcxmlvl++; /* read from the "<<" expansion */ p = tokmac1; len = strlen(p); ctx->tokcxflg |= TOKCXFINMAC; /* * Set the special push-a-paren flag: we'll return an open * paren now, so that we have an open paren before the * string, and then on the next call to toknext() we'll * immediately return the string we've already parsed here. * This will ensure that everything in the string is * properly grouped together as a single indivisible * expression. * * Note that we only need to do this for the first embedded * expression in a string. Once we have a close paren * pending, we don't need more open parens. */ if (!(ctx->tokcxflg & TOKCXF_EMBED_PAREN_AFT)) { ctx->tokcxflg |= TOKCXF_EMBED_PAREN_PRE; tok->toktyp = TOKTLPAR; } } goto done; } else if (len >= 2 && *p == '>' && *(p+1) == '>' && (ctx->tokcxflg & TOKCXFINMAC) != 0) { /* skip the ">>" */ ctx->tokcxflg &= ~TOKCXFINMAC; p += 2; len -= 2; /* save the current parsing position */ if (ctx->tokcxmlvl >= TOKMACNEST) errsig(ctx->tokcxerr, ERR_MACNEST); ctx->tokcxmsav[ctx->tokcxmlvl] = p; ctx->tokcxmsvl[ctx->tokcxmlvl] = len; ctx->tokcxmlvl++; if (*p == '"') { ++(ctx->tokcxmsav[ctx->tokcxmlvl - 1]); --(ctx->tokcxmsvl[ctx->tokcxmlvl - 1]); p = tokmac3; /* * we won't need an extra closing paren now, since tokmac3 * provides it */ ctx->tokcxflg &= ~TOKCXF_EMBED_PAREN_AFT; } else { /* * The string is continuing. Set a flag to note that we * need to provide a close paren after the end of the * string, and parse the glue (tokmac2) that goes between * the expression and the resumption of the string. */ ctx->tokcxflg |= TOKCXF_EMBED_PAREN_AFT; p = tokmac2; } len = strlen(p); goto nexttoken; } else { tokscdef *sc; for (sc = ctx->tokcxsc[ctx->tokcxinx[(uchar)*p]] ; sc ; sc = sc->tokscnxt) { if (toksceq(sc->tokscstr, p, sc->toksclen, len)) { tok->toktyp = sc->toksctyp; p += sc->toksclen; len -= sc->toksclen; goto done; } } errsig(ctx->tokcxerr, ERR_INVTOK); } done: ctx->tokcxptr = p; ctx->tokcxlen = len; return(tok->toktyp); }
/* * Get next token from string being parsed. Returns true if successful, * false if end of input string is reached. On success, these output * parameters are filled in: * * *strval pointer to token * *lenval length of *strval * *pos_ptr pointer to a palloc'd array of positions and weights * associated with the token. If the caller is not interested * in the information, NULL can be supplied. Otherwise * the caller is responsible for pfreeing the array. * *poslen number of elements in *pos_ptr * *endptr scan resumption point * * Pass NULL for unwanted output parameters. */ bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr) { int oldstate = 0; char *curpos = state->word; int statecode = WAITWORD; /* * pos is for collecting the comma delimited list of positions followed by * the actual token. */ WordEntryPos *pos = NULL; int npos = 0; /* elements of pos used */ int posalen = 0; /* allocated size of pos */ while (1) { if (statecode == WAITWORD) { if (*(state->prsbuf) == '\0') return false; else if (t_iseq(state->prsbuf, '\'')) statecode = WAITENDCMPLX; else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) PRSSYNTAXERROR; else if (!t_isspace(state->prsbuf)) { COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDWORD; } } else if (statecode == WAITNEXTCHAR) { if (*(state->prsbuf) == '\0') ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("there is no escaped character: \"%s\"", state->bufstart))); else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); Assert(oldstate != 0); statecode = oldstate; } } else if (statecode == WAITENDWORD) { if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf))) { RESIZEPRSBUF; if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; RETURN_TOKEN; } else if (t_iseq(state->prsbuf, ':')) { if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; if (state->oprisdelim) RETURN_TOKEN; else statecode = INPOSINFO; } else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITENDCMPLX) { if (t_iseq(state->prsbuf, '\'')) { statecode = WAITCHARCMPLX; } else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDCMPLX; } else if (*(state->prsbuf) == '\0') PRSSYNTAXERROR; else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITCHARCMPLX) { if (t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDCMPLX; } else { RESIZEPRSBUF; *(curpos) = '\0'; if (curpos == state->word) PRSSYNTAXERROR; if (state->oprisdelim) { /* state->prsbuf+=pg_mblen(state->prsbuf); */ RETURN_TOKEN; } else statecode = WAITPOSINFO; continue; /* recheck current character */ } } else if (statecode == WAITPOSINFO) { if (t_iseq(state->prsbuf, ':')) statecode = INPOSINFO; else RETURN_TOKEN; } else if (statecode == INPOSINFO) { if (t_isdigit(state->prsbuf)) { if (posalen == 0) { posalen = 4; pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen); npos = 0; } else if (npos + 1 >= posalen) { posalen *= 2; pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen); } npos++; WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); /* we cannot get here in tsquery, so no need for 2 errmsgs */ if (WEP_GETPOS(pos[npos - 1]) == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong position info in tsvector: \"%s\"", state->bufstart))); WEP_SETWEIGHT(pos[npos - 1], 0); statecode = WAITPOSDELIM; } else PRSSYNTAXERROR; } else if (statecode == WAITPOSDELIM) { if (t_iseq(state->prsbuf, ',')) statecode = INPOSINFO; else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 3); } else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 2); } else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 1); } else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; else if (!t_isdigit(state->prsbuf)) PRSSYNTAXERROR; } else /* internal error */ elog(ERROR, "unrecognized state in gettoken_tsvector: %d", statecode); /* get next char */ state->prsbuf += pg_mblen(state->prsbuf); } }
/* get a new line from line source, processing '#' directives */ static int tokgetlin(tokcxdef *ctx, int dopound) { for (;;) { if (linget(ctx->tokcxlin)) { /* at eof in current source; resume parent if there is one */ if (ctx->tokcxlin->linpar) { lindef *parent; parent = ctx->tokcxlin->linpar; /* remember parent */ lincls(ctx->tokcxlin); /* close included file */ if (!ctx->tokcxdbg) /* if no debug context... */ mchfre(ctx->tokcxlin); /* free line source */ ctx->tokcxlin = parent; /* reset to parent line source */ if (parent->linflg & LINFCMODE) ctx->tokcxflg |= TOKCXFCMODE; else ctx->tokcxflg &= ~TOKCXFCMODE; continue; /* back for another attempt */ } else { /* check for outstanding #if/#ifdef */ if (ctx->tokcxifcnt) errlog(ctx->tokcxerr, ERR_NOENDIF); /* return end-of-file indication */ return TRUE; } } /* if this is a multi-segment line, copy it into our own buffer */ if (ctx->tokcxlin->linflg & LINFMORE) { char *p; uint rem; int done; if (!ctx->tokcxbuf) { /* allocate 1k as a default buffer */ ctx->tokcxbuf = (char *)mchalo(ctx->tokcxerr, 1024, "tok"); ctx->tokcxbsz = 1024; } ctx->tokcxlen = 0; for (done = FALSE, p = ctx->tokcxbuf, rem = ctx->tokcxbsz ; !done ; ) { size_t len = ctx->tokcxlin->linlen; /* add the current segment's length into line length */ ctx->tokcxlen += len; /* we're done after this piece if the last fetch was all */ done = !(ctx->tokcxlin->linflg & LINFMORE); if (len + 1 > rem) { char *newp; /* increase the size of the buffer */ if (ctx->tokcxbsz > (unsigned)0x8000) errsig(ctx->tokcxerr, ERR_LONGLIN); rem += 4096; ctx->tokcxbsz += 4096; /* allocate a new buffer and copy line into it */ newp = (char *)mchalo(ctx->tokcxerr, ctx->tokcxbsz, "tok"); memcpy(newp, ctx->tokcxbuf, (size_t)(p - ctx->tokcxbuf)); /* free the original buffer, and use the new one */ p = (p - ctx->tokcxbuf) + newp; mchfre(ctx->tokcxbuf); ctx->tokcxbuf = newp; } /* add the line to the buffer */ memcpy(p, ctx->tokcxlin->linbuf, len); p += len; rem -= len; /* get the next piece of the line if there is one */ if (!done) { if (linget(ctx->tokcxlin)) break; } } /* null-terminate the buffer, and use it for input */ *p = '\0'; ctx->tokcxptr = ctx->tokcxbuf; } else { ctx->tokcxptr = ctx->tokcxlin->linbuf; ctx->tokcxlen = ctx->tokcxlin->linlen; } /* check for preprocessor directives */ if (dopound && ctx->tokcxlen != 0 && ctx->tokcxptr[0] == '#' && !(ctx->tokcxlin->linflg & LINFNOINC)) { char *p; int len; static struct { char *nm; int len; int ok_in_if; void (*fn)(tokcxdef *, char *, int); } *dirp, dir[] = { { "include", 7, FALSE, tokinclude }, { "pragma", 6, FALSE, tokpragma }, { "define", 6, FALSE, tokdefine }, { "ifdef", 5, TRUE, tokifdef }, { "ifndef", 6, TRUE, tokifndef }, { "if", 2, TRUE, tokif }, { "else", 4, TRUE, tokelse }, { "elif", 4, TRUE, tokelif }, { "endif", 5, TRUE, tokendif }, { "undef", 5, FALSE, tokundef }, { "error", 5, FALSE, tok_p_error } }; int i; /* scan off spaces between '#' and directive */ for (len = ctx->tokcxlen - 1, p = &ctx->tokcxptr[1] ; len && t_isspace(*p) ; --len, ++p) ; /* find and process the directive */ for (dirp = dir, i = sizeof(dir)/sizeof(dir[0]) ; i ; --i, ++dirp) { /* compare this directive; if it wins, call its function */ if (len >= dirp->len && !memcmp(p, dirp->nm, (size_t)dirp->len) && (len == dirp->len || t_isspace(*(p + dirp->len)))) { int cnt; int stat; /* * if we're not in a #if's false part, or if the * directive is processed even in #if false parts, * process the line, otherwise skip it */ cnt = ctx->tokcxifcnt; if (dirp->ok_in_if || cnt == 0 || ((stat = ctx->tokcxifcur) == TOKIF_IF_YES || stat == TOKIF_ELSE_YES)) { /* skip whitespace following the directive */ for (p += dirp->len, len -= dirp->len ; len && t_isspace(*p) ; --len, ++p) ; /* invoke the function to process this directive */ (*dirp->fn)(ctx, p, len); } /* there's no need to look at more directives */ break; } } /* if we didn't find anything, flag the error */ if (i == 0) errlog(ctx->tokcxerr, ERR_PRPDIR); /* ignore this line */ continue; } else { /* * Check the #if level. If we're in an #if, and we're to * ignore lines (because of a false condition or an #else * part for a true condition), skip this line. */ if (ctx->tokcxifcnt != 0) { switch(ctx->tokcxifcur) { case TOKIF_IF_NO: case TOKIF_ELSE_NO: /* ignore this line */ continue; default: /* we're in a true part - keep the line */ break; } } ctx->tokcxlin->linflg &= ~LINFDBG; /* no debug record yet */ return(FALSE); /* return the line we found */ } } }
/* * Text-mode os_input_dialog implementation */ int tio_input_dialog(int icon_id, const char *prompt, int standard_button_set, const char **buttons, int button_count, int default_index, int cancel_index) { /* ignore the icon ID - we can't display an icon in text mode */ VARUSED(icon_id); /* keep going until we get a valid response */ for (;;) { int i; char buf[256]; const char *p; const char *cur; char *resp; int match_cnt; int last_found; static const struct { const char *buttons[3]; int button_count; } std_btns[] = { { { "&OK" }, 1 }, { { "&OK", "&Cancel" }, 2 }, { { "&Yes", "&No" }, 2 }, { { "&Yes", "&No", "&Cancel" }, 3 } }; /* * if we have a standard button set selected, get our button * labels */ switch(standard_button_set) { case 0: /* use the explicit buttons provided */ break; case OS_INDLG_OK: i = 0; use_std_btns: /* use the selected standard button set */ buttons = (const char **)std_btns[i].buttons; button_count = std_btns[i].button_count; break; case OS_INDLG_OKCANCEL: i = 1; goto use_std_btns; case OS_INDLG_YESNO: i = 2; goto use_std_btns; case OS_INDLG_YESNOCANCEL: i = 3; goto use_std_btns; default: /* * we don't recognize other standard button sets - return an * error */ return 0; } /* * if there are no buttons defined, they'll never be able to * respond, so we'd just loop forever - rather than let that * happen, return failure */ if (button_count == 0) return 0; /* display a newline and the prompt string */ outformat("\\n"); outformat((char *)prompt); outformat(" "); /* display the response */ for (i = 0 ; i < button_count ; ++i) { /* * display a slash to separate responses, if this isn't the * first one */ if (i != 0) outformat("/"); /* get the current button */ cur = buttons[i]; /* * Look for a "&" in the response string. If we find it, * remove the "&" and enclose the shortcut key in parens. */ for (p = cur ; *p != '&' && *p != '\0' ; ++p) ; /* if we found the "&", put the next character in parens */ if (*p == '&') { /* reformat the response string */ sprintf(buf, "%.*s(%c)%s", (int)(p - cur), cur, *(p+1), p+2); /* display it */ outformat(buf); } else { /* no '&' - just display the response string as-is */ outformat((char *)cur); } } /* if we're in HTML mode, switch to input font */ if (tio_is_html_mode()) outformat("<font face='TADS-Input'>"); /* read the response */ getstring(" >", buf, sizeof(buf)); /* if we're in HTML mode, close the input font tag */ if (tio_is_html_mode()) outformat("</font>"); /* skip any leading spaces in the reply */ for (resp = buf ; t_isspace(*resp) ; ++resp) ; /* if it's one character, check it against the shortcut keys */ if (strlen(resp) == 1) { /* scan the responses */ for (i = 0 ; i < button_count ; ++i) { /* look for a '&' in this button */ for (p = buttons[i] ; *p != '&' && *p != '\0' ; ++p) ; /* if we found the '&', check the shortcut */ if (*p == '&' && toupper(*(p+1)) == toupper(*resp)) { /* * this is the one - return the current index * (bumping it by one to get a 1-based value) */ return i + 1; } } } /* * Either it's not a one-character reply, or it didn't match a * short-cut - check it against the leading substrings of the * responses. If it matches exactly one of the responses in its * leading substring, use that response. */ for (i = 0, match_cnt = 0 ; i < button_count ; ++i) { const char *p1; const char *p2; /* * compare this response to the user's response; skip any * '&' in the button label */ for (p1 = resp, p2 = buttons[i] ; *p1 != '\0' && *p2 != '\0' ; ++p1, ++p2) { /* if this is a '&' in the button label, skip it */ if (*p2 == '&') ++p2; /* if these characters don't match, it's no match */ if (toupper(*p1) != toupper(*p2)) break; } /* * if we reached the end of the user's response, we have a * match in the leading substring - count it and remember * this as the last one, but keep looking, since we need to * make sure we don't have any other matches */ if (*p1 == '\0') { ++match_cnt; last_found = i; } } /* * if we found exactly one match, return it (adjusting to a * 1-based index); if we found more or less than one match, it's * not a valid response, so start over with a new prompt */ if (match_cnt == 1) return last_found + 1; } }
int main(int argc, char **argv) { int curarg; osfildef *fpin; osfildef *fpout; char tmpfile[OSFNMAX + 1]; char inbuf[OSFNMAX + 1]; char *p; char *infile; char buf[128]; opdef *oplist = (opdef *)0; opctxdef opctx; int do_create = FALSE; /* print main banner */ rscptf("TADS Resource Manager version 2.2.4\n"); rscptf("Copyright (c) 1992, 1999 by Michael J. Roberts. "); rscptf("All Rights Reserved.\n"); if (argc < 2) usage(); /* set default parsing options */ opctx.restype = RESTYPE_DFLT; opctx.flag = OPFADD | OPFDEL; opctx.doing_type = FALSE; /* scan file options (these come before the filename) */ for (curarg = 1 ; curarg < argc ; ++curarg) { /* check if it's an option - if not, stop looking */ if (argv[curarg][0] != '-') break; /* check the option */ if (!stricmp(argv[curarg], "-create")) { /* note that we want to create the file */ do_create = TRUE; } else { rscptf("unrecognized file option \"%s\"", argv[curarg]); errexit("", 1); } } /* get the file name */ infile = argv[curarg++]; strcpy(inbuf, infile); os_defext(inbuf, "gam"); /* open the file for reading, unless we're creating a new file */ if (do_create) { /* creating - we have no input file */ fpin = 0; } else if ((fpin = osfoprb(inbuf, OSFTGAME)) == 0) { /* * not creating, so the file must already exist - it doesn't, so * issue an error and quit */ errexit("unable to open resource file", 1); } /* * if no operations are desired, and we're not creating a new file, * just list the existing file's contents and quit */ if (curarg == argc && fpin != 0) { rscproc(fpin, (osfildef *)0, 0); osfcls(fpin); os_term(OSEXSUCC); } /* * Create an output file. If we're creating a new file, create the * file named on the command line; otherwise, create a temporary * file that we'll write to while working and then rename to the * original input filename after we've finished with the original * input file. */ if (do_create) { /* create the new file */ if ((fpout = osfopwb(inbuf, OSFTGAME)) == 0) errexit("unable to create file", 1); /* report the creation */ rscptf("\nFile created.\n"); } else { /* generate a temporary filename */ strcpy(tmpfile, inbuf); for (p = tmpfile + strlen(tmpfile) ; p > tmpfile && *(p-1) != ':' && *(p-1) != '\\' && *(p-1) != '/' ; --p); strcpy(p, "$TADSRSC.TMP"); /* open the temporary file */ if ((fpout = osfopwb(tmpfile, OSFTGAME)) == 0) errexit("unable to create temporary file", 1); } /* see if we need to read a response file */ if (curarg < argc && argv[curarg][0] == '@') { osfildef *argfp; int l; char *p; if (!(argfp = osfoprt(argv[curarg]+1, OSFTTEXT))) errexit("unable to open response file", 1); for (;;) { if (!osfgets(buf, sizeof(buf), argfp)) break; l = strlen(buf); if (l && buf[l-1] == '\n') buf[--l] = '\0'; for (p = buf ; t_isspace(*p) ; ++p); if (!*p) continue; oplist = addop(oplist, p, &opctx); } osfcls(argfp); } else { for ( ; curarg < argc ; ++curarg) oplist = addop(oplist, argv[curarg], &opctx); } /* process the resources */ oplist = rscproc(fpin, fpout, oplist); /* make sure they all got processed */ for ( ; oplist != 0 ; oplist = oplist->opnxt) { if (!(oplist->opflag & OPFDONE)) rscptf("warning: resource \"%s\" not found\n", oplist->opres); } /* close files */ if (fpin != 0) osfcls(fpin); if (fpout != 0) osfcls(fpout); /* * if we didn't create a new file, remove the original input file * and rename the temp file to the original file name */ if (!do_create) { /* remove the original input file */ if (remove(inbuf)) errexit("error deleting input file", 1); /* rename the temp file to the output file */ if (rename(tmpfile, inbuf)) errexit("error renaming temporary file", 1); } /* success */ os_term(OSEXSUCC); return OSEXSUCC; }
/* * Add an operation from a command line argument */ static opdef *addop(opdef *cur, char *nam, opctxdef *opctx) { char *p; opdef *newop; void *search_ctx; char fname[OSFNMAX]; char resname[OSFNMAX]; int isdir; /* see if we're parsing a -type argument */ if (opctx->doing_type) { /* * parse the type name, and store it as the type for following * resources */ opctx->restype = parse_res_type(nam); /* we're done parsing the -type argument */ opctx->doing_type = FALSE; /* * we're done parsing this argument - we haven't added any * operations, so return old list head */ return cur; } /* see if we have an option */ if (*nam == '-') { /* see what we have */ if (!stricmp(nam, "-type")) { /* * note that we're doing a type, so we parse it on the next * argument */ opctx->doing_type = TRUE; } else if (!stricmp(nam, "-replace")) { /* set current flags to replace */ opctx->flag = OPFADD | OPFDEL; } else if (!stricmp(nam, "-delete")) { /* set current flags to delete */ opctx->flag = OPFDEL; } else if (!stricmp(nam, "-add")) { /* set current flags to add */ opctx->flag = OPFADD; } else { /* invalid argument */ rscptf("invalid option: %s", nam); errexit("", 1); } /* * done parsing this option - we didn't add a new operation, so * return the current list head */ return cur; } /* look for '=' */ for (p = nam ; *p && *p != '=' ; ++p); if (*p == '=') { /* * We found an '=', so an explicit resource name follows - use * the given string as the resource name rather than basing the * resource name on the filename. First, overwrite the '=' with * a null byte so that the filename string is terminated. */ *p = '\0'; /* skip the '=' (now the null byte, of course) */ ++p; /* * skip any spaces after the '='; leave p pointing to the start * of the resource name */ while (t_isspace(*p)) ++p; } else { /* * A resource name is not specified - synthesize a resource name * based on the filename by converting from the local file * system naming syntax to a relative URL */ os_cvt_dir_url(resname, sizeof(resname), nam, FALSE); /* point p to the synthesized resource name */ p = resname; } /* * If we're adding a directory, rather than returning a single op * for the directory, expand the directory into ops for for all of * the files in the directory. */ search_ctx = os_find_first_file("", nam, fname, sizeof(fname), &isdir, 0, 0); if (search_ctx != 0) { /* cancel the search; we only needed the one matching file */ os_find_close(search_ctx); /* we found the file; if it's a directory, process its contents */ if (isdir) return addopdir(cur, nam, opctx); } /* allocate space and set up new op */ newop = (opdef *)malloc(sizeof(opdef) + strlen(p) + strlen(nam) + 2); newop->opnxt = cur; newop->opflag = opctx->flag; newop->opres = (char *)(newop + 1); newop->oprestype = get_file_restype(opctx->restype, nam); strcpy(newop->opres, p); newop->opfile = newop->opres + strlen(newop->opres) + 1; strcpy(newop->opfile, nam); return(newop); }
/* * initSuffixTree - create suffix tree from file. Function converts * UTF8-encoded file into current encoding. */ static SuffixChar * initSuffixTree(char *filename) { SuffixChar *volatile rootSuffixTree = NULL; MemoryContext ccxt = CurrentMemoryContext; tsearch_readline_state trst; volatile bool skip; filename = get_tsearch_config_filename(filename, "rules"); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open unaccent file \"%s\": %m", filename))); do { /* * pg_do_encoding_conversion() (called by tsearch_readline()) will * emit exception if it finds untranslatable characters in current * locale. We just skip such lines, continuing with the next. */ skip = true; PG_TRY(); { char *line; while ((line = tsearch_readline(&trst)) != NULL) { /* * The format of each line must be "src trg" where src and trg * are sequences of one or more non-whitespace characters, * separated by whitespace. Whitespace at start or end of * line is ignored. */ int state; char *ptr; char *src = NULL; char *trg = NULL; int ptrlen; int srclen = 0; int trglen = 0; state = 0; for (ptr = line; *ptr; ptr += ptrlen) { ptrlen = pg_mblen(ptr); /* ignore whitespace, but end src or trg */ if (t_isspace(ptr)) { if (state == 1) state = 2; else if (state == 3) state = 4; continue; } switch (state) { case 0: /* start of src */ src = ptr; srclen = ptrlen; state = 1; break; case 1: /* continue src */ srclen += ptrlen; break; case 2: /* start of trg */ trg = ptr; trglen = ptrlen; state = 3; break; case 3: /* continue trg */ trglen += ptrlen; break; default: /* bogus line format */ state = -1; break; } } if (state >= 3) rootSuffixTree = placeChar(rootSuffixTree, (unsigned char *) src, srclen, trg, trglen); pfree(line); } skip = false; } PG_CATCH(); { ErrorData *errdata; MemoryContext ecxt; ecxt = MemoryContextSwitchTo(ccxt); errdata = CopyErrorData(); if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER) { FlushErrorState(); } else { MemoryContextSwitchTo(ecxt); PG_RE_THROW(); } } PG_END_TRY(); } while (skip); tsearch_readline_end(&trst); return rootSuffixTree; }