/* * r_reg -- display a regular file in reverse order by line. */ static int r_reg(FILE *fp, enum STYLE style, off_t off, struct stat *sbp) { off_t start, pos, end; int ch; end = sbp->st_size; if (end == 0) return (0); /* Position before char, ignore last char whether newline or not */ pos = end-2; ch = EOF; start = 0; if (style == RBYTES && off < end) start = end - off; for (; pos >= start; pos--) { /* A seek per char isn't a problem with a smart stdio */ if (fseeko(fp, pos, SEEK_SET) != 0) { ierr(); return (0); } if ((ch = getc(fp)) == '\n') { while (--end > pos) COPYCHAR(fp, ch); end++; if (style == RLINES && --off == 0) break; } else if (ch == EOF) { ierr(); return (0); } } if (pos < start) { if (ch != EOF && ungetc(ch, fp) == EOF) { ierr(); return (0); } while (--end >= start) COPYCHAR(fp, ch); } return (0); }
void RS_compile(Regis *r, bool issuffix, const char *str) { int len = strlen(str); int state = RS_IN_WAIT; const char *c = str; RegisNode *ptr = NULL; memset(r, 0, sizeof(Regis)); r->issuffix = (issuffix) ? 1 : 0; while (*c) { if (state == RS_IN_WAIT) { if (t_isalpha(c)) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); COPYCHAR(ptr->data, c); ptr->type = RSF_ONEOF; ptr->len = pg_mblen(c); } else if (t_iseq(c, '[')) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); ptr->type = RSF_ONEOF; state = RS_IN_ONEOF; } else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else if (state == RS_IN_ONEOF) { if (t_iseq(c, '^')) { ptr->type = RSF_NONEOF; state = RS_IN_NONEOF; } else if (t_isalpha(c)) { COPYCHAR(ptr->data, c); ptr->len = pg_mblen(c); state = RS_IN_ONEOF_IN; } else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { if (t_isalpha(c)) { COPYCHAR(ptr->data + ptr->len, c); ptr->len += pg_mblen(c); } else if (t_iseq(c, ']')) state = RS_IN_WAIT; else /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); } else elog(ERROR, "internal error in RS_compile: state %d", state); c += pg_mblen(c); } if (state != RS_IN_WAIT) /* shouldn't get here */ elog(ERROR, "invalid regis pattern: \"%s\"", str); ptr = r->node; while (ptr) { r->nchar++; ptr = ptr->next; } }
/* * Get next token from string being parsed. Returns true if successful, * false if end of input string is reached. On success, these output * parameters are filled in: * * *strval pointer to token * *lenval length of *strval * *pos_ptr pointer to a palloc'd array of positions and weights * associated with the token. If the caller is not interested * in the information, NULL can be supplied. Otherwise * the caller is responsible for pfreeing the array. * *poslen number of elements in *pos_ptr * *endptr scan resumption point * * Pass NULL for unwanted output parameters. */ bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr) { int oldstate = 0; char *curpos = state->word; int statecode = WAITWORD; /* * pos is for collecting the comma delimited list of positions followed by * the actual token. */ WordEntryPos *pos = NULL; int npos = 0; /* elements of pos used */ int posalen = 0; /* allocated size of pos */ while (1) { if (statecode == WAITWORD) { if (*(state->prsbuf) == '\0') return false; else if (t_iseq(state->prsbuf, '\'')) statecode = WAITENDCMPLX; else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) PRSSYNTAXERROR; else if (!t_isspace(state->prsbuf)) { COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDWORD; } } else if (statecode == WAITNEXTCHAR) { if (*(state->prsbuf) == '\0') ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("there is no escaped character: \"%s\"", state->bufstart))); else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); Assert(oldstate != 0); statecode = oldstate; } } else if (statecode == WAITENDWORD) { if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf))) { RESIZEPRSBUF; if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; RETURN_TOKEN; } else if (t_iseq(state->prsbuf, ':')) { if (curpos == state->word) PRSSYNTAXERROR; *(curpos) = '\0'; if (state->oprisdelim) RETURN_TOKEN; else statecode = INPOSINFO; } else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITENDCMPLX) { if (t_iseq(state->prsbuf, '\'')) { statecode = WAITCHARCMPLX; } else if (t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDCMPLX; } else if (*(state->prsbuf) == '\0') PRSSYNTAXERROR; else { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); } } else if (statecode == WAITCHARCMPLX) { if (t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); curpos += pg_mblen(state->prsbuf); statecode = WAITENDCMPLX; } else { RESIZEPRSBUF; *(curpos) = '\0'; if (curpos == state->word) PRSSYNTAXERROR; if (state->oprisdelim) { /* state->prsbuf+=pg_mblen(state->prsbuf); */ RETURN_TOKEN; } else statecode = WAITPOSINFO; continue; /* recheck current character */ } } else if (statecode == WAITPOSINFO) { if (t_iseq(state->prsbuf, ':')) statecode = INPOSINFO; else RETURN_TOKEN; } else if (statecode == INPOSINFO) { if (t_isdigit(state->prsbuf)) { if (posalen == 0) { posalen = 4; pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen); npos = 0; } else if (npos + 1 >= posalen) { posalen *= 2; pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen); } npos++; WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); /* we cannot get here in tsquery, so no need for 2 errmsgs */ if (WEP_GETPOS(pos[npos - 1]) == 0) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("wrong position info in tsvector: \"%s\"", state->bufstart))); WEP_SETWEIGHT(pos[npos - 1], 0); statecode = WAITPOSDELIM; } else PRSSYNTAXERROR; } else if (statecode == WAITPOSDELIM) { if (t_iseq(state->prsbuf, ',')) statecode = INPOSINFO; else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 3); } else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 2); } else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 1); } else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) { if (WEP_GETWEIGHT(pos[npos - 1])) PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; else if (!t_isdigit(state->prsbuf)) PRSSYNTAXERROR; } else /* internal error */ elog(ERROR, "unrecognized state in gettoken_tsvector: %d", statecode); /* get next char */ state->prsbuf += pg_mblen(state->prsbuf); } }