/* * Return TRUE if dot is at beginning of a word or at beginning * of line, else FALSE. */ int atbow(void) { if (curwp->w_doto == 0) return (TRUE); if (ISWORD(curwp->w_dotp->l_text[curwp->w_doto]) && !ISWORD(curwp->w_dotp->l_text[curwp->w_doto - 1])) return (TRUE); return (FALSE); }
/* * Return TRUE if the character at dot is a character that is considered to be * part of a word. The word character list is hard coded. Should be settable. */ int inword(void) { /* can't use lgetc in ISWORD due to bug in OSK cpp */ return (curwp->w_doto != llength(curwp->w_dotp) && ISWORD(curwp->w_dotp->l_text[curwp->w_doto])); }
/* * The "lp1" and "lp2" point to list structures. The "cpos" is a horizontal * position in the name. Return the longest block of characters that can be * autocompleted at this point. Sometimes the two symbols are the same, but * this is normal. */ int getxtra(struct list *lp1, struct list *lp2, int cpos, int wflag) { int i; i = cpos; for (;;) { if (lp1->l_name[i] != lp2->l_name[i]) break; if (lp1->l_name[i] == '\0') break; ++i; if (wflag && !ISWORD(lp1->l_name[i - 1])) break; } return (i - cpos); }
/* - slow - step through the string more deliberately == static const char *slow(struct match *m, const char *start, \ == const char *stop, sopno startst, sopno stopst); */ static const char * /* where it ended */ slow( struct match *m, const char *start, const char *stop, sopno startst, sopno stopst) { states st = m->st; states empty = m->empty; states tmp = m->tmp; const char *p = start; wint_t c; wint_t lastc; /* previous c */ wint_t flagch; int i; const char *matchp; /* last p at which a match ended */ size_t clen; AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; if (start == m->beginp) c = OUT; else { /* * XXX Wrong if the previous character was multi-byte. * Newline never is (in supported encodings), * so this only breaks the ISWORD tests below. */ c = (uch)*(start - 1); } for (;;) { /* next character */ lastc = c; if (p == m->endp) { c = OUT; clen = 0; } else clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || (lastc == OUT && !(m->eflags®_NOTBOL)) ) { flagch = BOL; i = m->g->nbol; } if ( (c == '\n' && m->g->cflags®_NEWLINE) || (c == OUT && !(m->eflags®_NOTEOL)) ) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("sboleol", st, c); } /* how about a word boundary? */ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c)) ) { flagch = BOW; } if ( (lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ if (ISSET(st, stopst)) matchp = p; if (EQ(st, empty) || p == stop || clen > stop - p) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p += clen; } return(matchp); }
/* - backref - figure out what matched what, figuring in back references == static const char *backref(struct match *m, const char *start, \ == const char *stop, sopno startst, sopno stopst, sopno lev); */ static const char * /* == stop (success) or NULL (failure) */ backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, /* PLUS nesting level */ int rec) { int i; sopno ss; /* start sop of current subRE */ const char *sp; /* start of string matched by it */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ const char *ssp; /* start of string matched by subsubRE */ const char *dp; size_t len; int hard; sop s; regoff_t offsave; cset *cs; wint_t wc; AT("back", start, stop, startst, stopst); sp = start; /* get as far as we can with easy stuff */ hard = 0; for (ss = startst; !hard && ss < stopst; ss++) switch (OP(s = m->g->strip[ss])) { case OCHAR: if (sp == stop) return(NULL); sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); if (wc != OPND(s)) return(NULL); break; case OANY: if (sp == stop) return(NULL); sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); if (wc == BADCHAR) return (NULL); break; case OANYOF: if (sp == stop) return (NULL); cs = &m->g->sets[OPND(s)]; sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); if (wc == BADCHAR || !CHIN(cs, wc)) return(NULL); break; case OBOL: if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags®_NEWLINE)) ) { /* yes */ } else return(NULL); break; case OEOL: if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags®_NEWLINE)) ) { /* yes */ } else return(NULL); break; case OBOW: if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags®_NEWLINE)) || (sp > m->beginp && !ISWORD(*(sp-1))) ) && (sp < m->endp && ISWORD(*sp)) ) { /* yes */ } else return(NULL); break; case OEOW: if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags®_NEWLINE)) || (sp < m->endp && !ISWORD(*sp)) ) && (sp > m->beginp && ISWORD(*(sp-1))) ) { /* yes */ } else return(NULL); break; case O_QUEST: break; case OOR1: /* matches null but needs to skip */ ss++; s = m->g->strip[ss]; do { assert(OP(s) == OOR2); ss += OPND(s); } while (OP(s = m->g->strip[ss]) != O_CH); /* note that the ss++ gets us past the O_CH */ break; default: /* have to make a choice */ hard = 1; break; } if (!hard) { /* that was it! */ if (sp != stop) return(NULL); return(sp); } ss--; /* adjust for the for's final increment */ /* the hard stuff */ AT("hard", sp, stop, ss, stopst); s = m->g->strip[ss]; switch (OP(s)) { case OBACK_: /* the vilest depths */ i = OPND(s); assert(0 < i && i <= m->g->nsub); if (m->pmatch[i].rm_eo == -1) return(NULL); assert(m->pmatch[i].rm_so != -1); len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; if (len == 0 && rec++ > MAX_RECURSION) return(NULL); assert(stop - m->beginp >= len); if (sp > stop - len) return(NULL); /* not enough left to match */ ssp = m->offp + m->pmatch[i].rm_so; if (memcmp(sp, ssp, len) != 0) return(NULL); while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); break; case OQUEST_: /* to null or not */ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); /* not */ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); break; case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); /* try another pass */ m->lastpos[lev] = sp; dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); if (dp == NULL) return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); else return(dp); break; case OCH_: /* find the right one, if any */ ssub = ss + 1; esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ dp = backref(m, sp, stop, ssub, esub, lev, rec); if (dp != NULL) return(dp); /* that one missed, try next one */ if (OP(m->g->strip[esub]) == O_CH) return(NULL); /* there is none */ esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); if (OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } break; case OLPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); m->pmatch[i].rm_so = offsave; return(NULL); break; case ORPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); m->pmatch[i].rm_eo = offsave; return(NULL); break; default: /* uh oh */ assert(nope); break; } /* "can't happen" */ assert(nope); /* NOTREACHED */ return "shut up gcc"; }
static char * veread(const char *fp, char *buf, size_t nbuf, int flag, va_list ap) { int dynbuf = (buf == NULL); int cpos, epos; /* cursor, end position in buf */ int c, i, y; int cplflag = FALSE; /* display completion list */ int cwin = FALSE; /* completion list created */ int mr = 0; /* match left arrow */ int ml = 0; /* match right arrow */ int esc = 0; /* position in esc pattern */ struct buffer *bp; /* completion list buffer */ struct mgwin *wp; /* window for compl list */ int match; /* esc match found */ int cc, rr; /* saved ttcol, ttrow */ char *ret; /* return value */ static char emptyval[] = ""; /* XXX hackish way to return err msg*/ if (inmacro) { if (dynbuf) { if ((buf = malloc(maclcur->l_used + 1)) == NULL) return (NULL); } else if (maclcur->l_used >= nbuf) return (NULL); bcopy(maclcur->l_text, buf, maclcur->l_used); buf[maclcur->l_used] = '\0'; maclcur = maclcur->l_fp; return (buf); } epos = cpos = 0; ml = mr = esc = 0; cplflag = FALSE; if ((flag & EFNEW) != 0 || ttrow != nrow - 1) { ttcolor(CTEXT); ttmove(nrow - 1, 0); epresf = TRUE; } else eputc(' '); eformat(fp, ap); if ((flag & EFDEF) != 0) { if (buf == NULL) return (NULL); eputs(buf); epos = cpos += strlen(buf); } tteeol(); ttflush(); for (;;) { c = getkey(FALSE); if ((flag & EFAUTO) != 0 && c == CCHR('I')) { if (cplflag == TRUE) { complt_list(flag, buf, cpos); cwin = TRUE; } else if (complt(flag, c, buf, nbuf, epos, &i) == TRUE) { cplflag = TRUE; epos += i; cpos = epos; } continue; } cplflag = FALSE; if (esc > 0) { /* ESC sequence started */ match = 0; if (ml == esc && key_left[ml] && c == key_left[ml]) { match++; if (key_left[++ml] == '\0') { c = CCHR('B'); esc = 0; } } if (mr == esc && key_right[mr] && c == key_right[mr]) { match++; if (key_right[++mr] == '\0') { c = CCHR('F'); esc = 0; } } if (match == 0) { esc = 0; continue; /* hack. how do we know esc pattern is done? */ } if (esc > 0) { esc++; continue; } } switch (c) { case CCHR('A'): /* start of line */ while (cpos > 0) { if (ISCTRL(buf[--cpos]) != FALSE) { ttputc('\b'); --ttcol; } ttputc('\b'); --ttcol; } ttflush(); break; case CCHR('D'): if (cpos != epos) { tteeol(); epos--; rr = ttrow; cc = ttcol; for (i = cpos; i < epos; i++) { buf[i] = buf[i + 1]; eputc(buf[i]); } ttmove(rr, cc); ttflush(); } break; case CCHR('E'): /* end of line */ while (cpos < epos) { eputc(buf[cpos++]); } ttflush(); break; case CCHR('B'): /* back */ if (cpos > 0) { if (ISCTRL(buf[--cpos]) != FALSE) { ttputc('\b'); --ttcol; } ttputc('\b'); --ttcol; ttflush(); } break; case CCHR('F'): /* forw */ if (cpos < epos) { eputc(buf[cpos++]); ttflush(); } break; case CCHR('Y'): /* yank from kill buffer */ i = 0; while ((y = kremove(i++)) >= 0 && y != '\n') { int t; if (dynbuf && epos + 1 >= nbuf) { void *newp; size_t newsize = epos + epos + 16; if ((newp = realloc(buf, newsize)) == NULL) goto memfail; buf = newp; nbuf = newsize; } if (!dynbuf && epos + 1 >= nbuf) { ewprintf("Line too long"); return (emptyval); } for (t = epos; t > cpos; t--) buf[t] = buf[t - 1]; buf[cpos++] = (char)y; epos++; eputc((char)y); cc = ttcol; rr = ttrow; for (t = cpos; t < epos; t++) eputc(buf[t]); ttmove(rr, cc); } ttflush(); break; case CCHR('K'): /* copy here-EOL to kill buffer */ kdelete(); for (i = cpos; i < epos; i++) kinsert(buf[i], KFORW); tteeol(); epos = cpos; ttflush(); break; case CCHR('['): ml = mr = esc = 1; break; case CCHR('J'): c = CCHR('M'); /* FALLTHROUGH */ case CCHR('M'): /* return, done */ /* if there's nothing in the minibuffer, abort */ if (epos == 0 && !(flag & EFNUL)) { (void)ctrlg(FFRAND, 0); ttflush(); return (NULL); } if ((flag & EFFUNC) != 0) { if (complt(flag, c, buf, nbuf, epos, &i) == FALSE) continue; if (i > 0) epos += i; } buf[epos] = '\0'; if ((flag & EFCR) != 0) { ttputc(CCHR('M')); ttflush(); } if (macrodef) { struct line *lp; if ((lp = lalloc(cpos)) == NULL) goto memfail; lp->l_fp = maclcur->l_fp; maclcur->l_fp = lp; lp->l_bp = maclcur; maclcur = lp; bcopy(buf, lp->l_text, cpos); } ret = buf; goto done; case CCHR('G'): /* bell, abort */ eputc(CCHR('G')); (void)ctrlg(FFRAND, 0); ttflush(); ret = NULL; goto done; case CCHR('H'): /* rubout, erase */ case CCHR('?'): if (cpos != 0) { y = buf[--cpos]; epos--; ttputc('\b'); ttcol--; if (ISCTRL(y) != FALSE) { ttputc('\b'); ttcol--; } rr = ttrow; cc = ttcol; for (i = cpos; i < epos; i++) { buf[i] = buf[i + 1]; eputc(buf[i]); } ttputc(' '); if (ISCTRL(y) != FALSE) { ttputc(' '); ttputc('\b'); } ttputc('\b'); ttmove(rr, cc); ttflush(); } break; case CCHR('X'): /* kill line */ case CCHR('U'): while (cpos != 0) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; if (ISCTRL(buf[--cpos]) != FALSE) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; } epos--; } ttflush(); break; case CCHR('W'): /* kill to beginning of word */ while ((cpos > 0) && !ISWORD(buf[cpos - 1])) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; if (ISCTRL(buf[--cpos]) != FALSE) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; } epos--; } while ((cpos > 0) && ISWORD(buf[cpos - 1])) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; if (ISCTRL(buf[--cpos]) != FALSE) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; } epos--; } ttflush(); break; case CCHR('\\'): case CCHR('Q'): /* quote next */ c = getkey(FALSE); /* FALLTHROUGH */ default: if (dynbuf && epos + 1 >= nbuf) { void *newp; size_t newsize = epos + epos + 16; if ((newp = realloc(buf, newsize)) == NULL) goto memfail; buf = newp; nbuf = newsize; } if (!dynbuf && epos + 1 >= nbuf) { ewprintf("Line too long"); return (emptyval); } for (i = epos; i > cpos; i--) buf[i] = buf[i - 1]; buf[cpos++] = (char)c; epos++; eputc((char)c); cc = ttcol; rr = ttrow; for (i = cpos; i < epos; i++) eputc(buf[i]); ttmove(rr, cc); ttflush(); } } done: if (cwin == TRUE) { /* blow away cpltion window */ bp = bfind("*Completions*", TRUE); if ((wp = popbuf(bp, WEPHEM)) != NULL) { if (wp->w_flag & WEPHEM) { curwp = wp; delwind(FFRAND, 1); } else { killbuffer(bp); } } } return (ret); memfail: if (dynbuf && buf) free(buf); ewprintf("Out of memory"); return (emptyval); }
/* ** Perform Re-Pair on a file */ void executeRepair_File (PROG_INFO *prog_struct, BLOCK_INFO *block_struct) { R_UINT curr_seq_buf_len = 0; R_UINT items_read = 0; R_UINT k = 0; R_UINT m = 0; R_UINT i = 0; /* Declare various buffers, depending on which data type is used as input */ R_UINT input_buffer[INPUT_BUFFER_SIZE]; /* input_buffer_end points just off array */ R_UINT *input_buffer_end = input_buffer + INPUT_BUFFER_SIZE; R_UINT *input_buffer_p = input_buffer_end; R_UCHAR *input_buffer_c = NULL; R_USHRT *input_buffer_s = NULL; if (prog_struct -> base_datatype == (R_UINT) sizeof (R_UCHAR)) { input_buffer_c = wmalloc (sizeof (R_UCHAR) * INPUT_BUFFER_SIZE); } else if (prog_struct -> base_datatype == (R_UINT) sizeof (R_USHRT)) { input_buffer_s = wmalloc (sizeof (R_USHRT) * INPUT_BUFFER_SIZE); } /* Fill one block */ while ((block_struct -> input_stack_size != 0) || (input_buffer_p < input_buffer_end) || (ftell (prog_struct -> in_file) < (R_L_INT) prog_struct -> in_file_size)) { initRepair_OneBlock (prog_struct, block_struct); curr_seq_buf_len = 0; curr_seq_buf_len += block_struct -> input_stack_size; block_struct -> input_stack_size = 0; /* Fill one sequence */ while (curr_seq_buf_len < block_struct -> seq_buf_len && ((ftell (prog_struct -> in_file) < (R_L_INT) prog_struct -> in_file_size) || (input_buffer_p < input_buffer_end))) { if (input_buffer_p == input_buffer_end) { switch (prog_struct -> base_datatype) { case 1: items_read = (R_UINT) fread (input_buffer_c, sizeof (R_UCHAR), (size_t) INPUT_BUFFER_SIZE, prog_struct -> in_file); for (i = 0; i < items_read; i++) { input_buffer[i] = (R_UINT) input_buffer_c[i]; } break; case 2: items_read = (R_UINT) fread (input_buffer_s, sizeof (R_USHRT), (size_t) INPUT_BUFFER_SIZE, prog_struct -> in_file); for (i = 0; i < items_read; i++) { input_buffer[i] = (R_UINT) input_buffer_s[i]; } break; case 4: items_read = (R_UINT) fread (input_buffer, sizeof (R_UINT), (size_t) INPUT_BUFFER_SIZE, prog_struct -> in_file); break; } input_buffer_p = input_buffer; input_buffer_end = input_buffer + items_read; } if (ferror (prog_struct -> in_file) != R_FALSE) { fprintf (stderr, "Fatal error in reading from input file!\n"); exit (EXIT_FAILURE); } if ((*input_buffer_p & NO_FLAGS) >= block_struct -> prims_array_size) { fprintf (stderr, "Symbol %u encountered.\n", *input_buffer_p); fprintf (stderr, "Symbol out of range in input buffer in %s, line %u.\n", __FILE__, __LINE__); exit (EXIT_FAILURE); } /* New primitive found */ if (block_struct -> prims_array[(*input_buffer_p & NO_FLAGS)] == UNINITIALIZED_GENERATION) { block_struct -> num_prims += 1; block_struct -> prims_array[(*input_buffer_p & NO_FLAGS)] = 0; } /* Do not increment if maximum number of primitives is reached; ** basically prevents counter from overflowing back to 0. */ if (block_struct -> prims_array[(*input_buffer_p & NO_FLAGS)] != UNINITIALIZED_GENERATION - 1) { block_struct -> prims_array[(*input_buffer_p & NO_FLAGS)] += 1; } initSeqNode ((R_UINT) *input_buffer_p, &(block_struct -> seq_buf[curr_seq_buf_len])); curr_seq_buf_len++; input_buffer_p++; } if (curr_seq_buf_len < block_struct -> seq_buf_len) { block_struct -> seq_buf_len = curr_seq_buf_len; block_struct -> seq_buf_end = block_struct -> seq_buf + (block_struct -> seq_buf_len - 1); } /* Rollback sequence */ if ((prog_struct -> apply_heuristics == HEUR_WA) && ((ftell (prog_struct -> in_file) < (R_L_INT) prog_struct -> in_file_size) || (input_buffer_p < input_buffer_end))) { k = block_struct -> seq_buf_len - 1; while ((k > 0) && (!ISWORD (block_struct -> seq_buf[k].value))) { k--; } while ((k > 0) && (ISWORD (block_struct -> seq_buf[k].value))) { k--; } /* ** At this point, k will point to the last SEQ_NODE of the shortened ** block_struct -> seq_buf. */ if (k != 1) { /* ** m is used to iterate through the end of the array to copy ** the values to an "input_stack". */ m = k + 1; block_struct -> input_stack = wmalloc (((block_struct -> seq_buf_len - m) * sizeof (SEQ_NODE))); for (k = 0; k < block_struct -> seq_buf_len - m; k++) { initSeqNode ((R_UINT) block_struct -> seq_buf[m + k].value, &block_struct -> input_stack[k]); block_struct -> prims_array[block_struct -> seq_buf[m + k].value]--; if (block_struct -> prims_array[block_struct -> seq_buf[m + k].value] == 0) { block_struct -> num_prims--; block_struct -> prims_array[block_struct -> seq_buf[m + k].value] = UNINITIALIZED_GENERATION; } } /* Decrease sequence from block_struct -> seq_buf_len by the number ** of characters copied */ block_struct -> seq_buf_len -= k; block_struct -> seq_buf_end = block_struct -> seq_buf + (block_struct -> seq_buf_len - 1); block_struct -> input_stack_size = k; } else { /* Roll back sequence to the beginning */ } } (block_struct -> sizelist) = initSListNode (block_struct -> num_prims); executeRepair_OneBlock (prog_struct, block_struct); encodeHierarchy_OneBlock (prog_struct, block_struct); encodeSequence_OneBlock (prog_struct, block_struct); displayStats_OneBlock (prog_struct, block_struct); uninitRepair_OneBlock (prog_struct, block_struct); } if (input_buffer_c != NULL) { wfree (input_buffer_c); } else if (input_buffer_s != NULL) { wfree (input_buffer_s); } return; }
static void initRepair_OneBlock (PROG_INFO *prog_struct, BLOCK_INFO *block_struct) { R_UINT i = 0; uninitRepair_OneBlock (prog_struct, block_struct); /* Initialize sequence buffer */ block_struct -> seq_buf_len = prog_struct -> max_buffer_size; block_struct -> seq_buf = wmalloc ((block_struct -> seq_buf_len) * sizeof (SEQ_NODE)); block_struct -> seq_buf_end = (block_struct -> seq_buf) + (block_struct -> seq_buf_len - 1); /* Create and initialize array for primitives */ block_struct -> prims_array_size = prog_struct -> max_prims; block_struct -> prims_array = wmalloc (block_struct -> prims_array_size * sizeof (R_UINT)); block_struct -> temp_phrases_size = prog_struct -> max_prims; block_struct -> temp_phrases = wmalloc ((block_struct -> temp_phrases_size) * (sizeof (PHRASE))); /* Initialize all primitives in the temp_phrases array */ for (i = 0; i < block_struct -> prims_array_size; i++) { block_struct -> temp_phrases[i].left = i; /* i is the ASCII value */ block_struct -> temp_phrases[i].left_chiastic = 0; block_struct -> temp_phrases[i].right = i; block_struct -> temp_phrases[i].right_chiastic = 0; block_struct -> temp_phrases[i].unit = i; /* Unit = ASCII value for primitives only */ block_struct -> temp_phrases[i].generation = 0; /* Generation of 0 */ block_struct -> temp_phrases[i].length = 1; block_struct -> temp_phrases[i].temp_index = i; if (prog_struct -> apply_heuristics == HEUR_WA) { if (ISWORD (i)) { block_struct -> temp_phrases[i].mytype = PT_WORD; } else { block_struct -> temp_phrases[i].mytype = PT_NON_WORD; } } block_struct -> temp_phrases[i].myside = SIDE_NONE; } if (prog_struct -> add_prims == R_TRUE) { for (i = 0; i < block_struct -> prims_array_size; i++) { block_struct -> num_prims += 1; block_struct -> prims_array[i] = 1; } } else { for (i = 0; i < block_struct -> prims_array_size; i++) { block_struct -> prims_array[i] = UNINITIALIZED_GENERATION; } } /* Ensure that the zero-length word is always accounted for */ if (prog_struct -> base_datatype != (R_UINT) sizeof (R_UCHAR)) { block_struct -> num_prims += 1; block_struct -> prims_array[0] = 1; } /* ** Copy the end of the last buffer to this one */ if (block_struct -> input_stack_size != 0) { for (i = 0; i < block_struct -> input_stack_size; i++) { initSeqNode ((R_UINT) block_struct -> input_stack[i].value, &block_struct -> seq_buf[i]); if (block_struct -> prims_array[(R_UINT) block_struct -> input_stack[i].value] == UNINITIALIZED_GENERATION) { block_struct -> prims_array[(R_UINT) block_struct -> input_stack[i].value] = 0; block_struct -> num_prims++; } block_struct -> prims_array[(R_UINT) block_struct -> input_stack[i].value]++; } wfree (block_struct -> input_stack); } block_struct -> input_stack = NULL; /* Initialize tent_phrases hash table */ block_struct -> tent_phrases_size = (R_UINT) TENTPHRASE_SIZE; block_struct -> tent_phrases = wmalloc (block_struct -> tent_phrases_size * sizeof (TPHRASE*)); for (i = 0; i < block_struct -> tent_phrases_size; i++) { block_struct -> tent_phrases[i] = NULL; } prog_struct -> total_blocks++; return; }
/* - slow - step through the string more deliberately */ static char * /* where it ended */ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) { states st = m->st; states empty = m->empty; states tmp = m->tmp; char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; int i; char *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; for (;;) { /* next character */ lastc = c; c = (p == m->endp) ? OUT : *p; /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ( (lastc == '\n' && m->g->cflags&R_REGEX_NEWLINE) || (lastc == OUT && !(m->eflags&R_REGEX_NOTBOL)) ) { flagch = BOL; i = m->g->nbol; } if ( (c == '\n' && m->g->cflags&R_REGEX_NEWLINE) || (c == OUT && !(m->eflags&R_REGEX_NOTEOL)) ) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("sboleol", st, c); } /* how about a word boundary? */ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c)) ) { flagch = BOW; } if ( (lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ if (ISSET(st, stopst)) matchp = p; if (EQ(st, empty) || p == stop) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p++; } return(matchp); }
/* - fast - step through the string at top speed */ static char * /* where tentative match ended, or NULL */ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) { states st = m->st; states fresh = m->fresh; states tmp = m->tmp; char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; int i; char *coldp; /* last p after which no match was underway */ CLEAR(st); SET1(st, startst); st = step(m->g, startst, stopst, st, NOTHING, st); ASSIGN(fresh, st); SP("start", st, *p); coldp = NULL; for (;;) { /* next character */ lastc = c; c = (p == m->endp) ? OUT : *p; if (st==fresh) coldp = p; /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ( (lastc == '\n' && m->g->cflags&R_REGEX_NEWLINE) || (lastc == OUT && !(m->eflags&R_REGEX_NOTBOL)) ) { flagch = BOL; i = m->g->nbol; } if ( (c == '\n' && m->g->cflags&R_REGEX_NEWLINE) || (c == OUT && !(m->eflags&R_REGEX_NOTEOL)) ) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("boleol", st, c); } /* how about a word boundary? */ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c)) ) { flagch = BOW; } if ( (lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("boweow", st, c); } /* are we done? */ if (ISSET(st, stopst) || p == stop) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, fresh); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("aft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p++; } assert(coldp != NULL); m->coldp = coldp; if (ISSET(st, stopst)) return(p+1); return NULL; }