bool Lexer::matchStr(T const * string, size_t len) { for (size_t i = 0; i < len; ++i) { if (!matchc(string[i])) { return false; } } return true; }
void bx_boollit_tail(triple *t, boolean_t jmp_type_one, boolean_t jmp_to_next, boolean_t sense, oprtype *addr) /* search the Boolean in t (recursively) for literal leaves; the logic is similar to bx_tail * the rest of the arguments parallel those in bx_boolop and used primarily handling basic Boolean operations (ON, NOR, AND, NAND) * to get the jump target and sense right for the left-hand operand of the operation * jmp_type_one gives the sense of the jump associated with the first operand * jmp_to_next gives whether we need a second jump to complete the operation * sense gives the sense of the requested operation * *addr points the operand for the jump and is eventually used by logic back in the invocation stack to fill in a target location */ { boolean_t sin[ARRAYSIZE(t->operand)], tv[ARRAYSIZE(t->operand)]; int com, comval, dummy, j, neg, num, tvr; mval *mv, *v[ARRAYSIZE(t->operand)]; opctype c; oprtype *i, *p; triple *cob[ARRAYSIZE(t->operand)], *ref0, *tl[ARRAYSIZE(t->operand)]; assert(OCT_BOOL & oc_tab[t->opcode].octype); assert(TRIP_REF == t->operand[0].oprclass); assert((OC_COBOOL != t->opcode) && (OC_COM != t->opcode) || (TRIP_REF == t->operand[1].oprclass)); for (i = t->operand, j = 0; i < ARRAYTOP(t->operand); i++, j++) { /* checkout an operand to see if we can simplify it */ p = i; com = 0; for (tl[j] = i->oprval.tref; OCT_UNARY & oc_tab[(c = tl[j]->opcode)].octype; tl[j] = p->oprval.tref) { /* find the real object of affection; WARNING assignment above */ assert((TRIP_REF == tl[j]->operand[0].oprclass) && (NO_REF == tl[j]->operand[1].oprclass)); com ^= (OC_COM == c); /* if we make a recursive call below, COM matters, but NEG and FORCENUM don't */ p = &tl[j]->operand[0]; } if (OCT_ARITH & oc_tab[c].octype) ex_tail(p); /* chained arithmetic */ else if (OCT_BOOL & oc_tab[c].octype) { /* recursively check an operand */ sin[j] = sense; p = addr; if (!j && !(OCT_REL & oc_tab[t->opcode].octype)) { /* left hand operand of parent */ sin[j] = jmp_type_one; if (jmp_to_next) { /* left operands need extra attention to decide between jump next or to the end */ p = (oprtype *)mcalloc(SIZEOF(oprtype)); *p = put_tjmp(t); } } bx_boollit(tl[j], sin[j] ^ com, p); } if ((OC_JMPTRUE != tl[j]->opcode) && (OC_JMPFALSE != tl[j]->opcode) && (OC_LIT != tl[j]->opcode)) return; /* this operation doesn't qualify */ com = comval = neg = num = 0; cob[j] = NULL; for (ref0 = i->oprval.tref; OCT_UNARY & oc_tab[(c = ref0->opcode)].octype; ref0 = ref0->operand[0].oprval.tref) { /* we may be able to clean up this operand; WARNING assignment above */ assert((TRIP_REF == ref0->operand[0].oprclass) && (NO_REF == ref0->operand[1].oprclass)); num += (OC_FORCENUM == c); com += (OC_COM == c); if (!com) /* "outside" com renders neg mute */ neg ^= (OC_NEG == c); if (!comval && (NULL == cob[j])) { if (comval = (OC_COMVAL == c)) /* WARNING assignment */ { if (ref0 != t->operand[j].oprval.tref) dqdel(t->operand[j].oprval.tref, exorder); t->operand[j].oprval.tref = tl[j]; /* need mval: no COBOOL needed */ } else if (OC_COBOOL == c) { /* the operand needs a COBOOL in case its operator remains unresolved */ cob[j] = t->operand[j].oprval.tref; if (ref0 == cob[j]) continue; /* already where it belongs */ cob[j]->opcode = OC_COBOOL; cob[j]->operand[0].oprval.tref = tl[j]; } else if (ref0 == t->operand[j].oprval.tref) continue; } dqdel(ref0, exorder); } assert(ref0 == tl[j]); if (!comval && (NULL == cob[j]) && (tl[j] != t->operand[j].oprval.tref)) { /* left room for a COBOOL, but there's no need */ dqdel(t->operand[j].oprval.tref, exorder); t->operand[j].oprval.tref = tl[j]; } if ((OC_JMPTRUE == ref0->opcode) || (OC_JMPFALSE == ref0->opcode)) { /* switch to a literal representation of TRUE / FALSE */ assert(INDR_REF == ref0->operand[0].oprclass); ref0->operand[1] = ref0->operand[0]; /* track info as we switch opcode */ PUT_LITERAL_TRUTH((sin[j] ? OC_JMPFALSE : OC_JMPTRUE) == ref0->opcode, ref0); ref0->opcode = OC_LIT; com = 0; /* already accounted for by sin */ } assert((OC_LIT == ref0->opcode) && (MLIT_REF == ref0->operand[0].oprclass)); v[j] = &ref0->operand[0].oprval.mlit->v; if (com) { /* any complement reduces the literal value to [unsigned] 1 or 0 */ unuse_literal(v[j]); tv[j] = (0 == v[j]->m[1]); assert(ref0 == tl[j]); PUT_LITERAL_TRUTH(tv[j], ref0); v[j] = &ref0->operand[0].oprval.mlit->v; num = 0; /* any complement trumps num */ } if (neg || num) { /* get literal into uniform state */ unuse_literal(v[j]); mv = (mval *)mcalloc(SIZEOF(mval)); *mv = *v[j]; if (neg) { if (MV_INT & mv->mvtype) { if (0 != mv->m[1]) mv->m[1] = -mv->m[1]; else mv->sgn = 0; } else if (MV_NM & mv->mvtype) mv->sgn = !mv->sgn; } else s2n(mv); n2s(mv); v[j] = mv; assert(ref0 == tl[j]); put_lit_s(v[j], ref0); } } assert(tl[0] != tl[1]); /* start processing a live one */ for (tvr = j, j = 0; j < tvr; j++) { /* both arguments are literals, so do the operation at compile time */ if (NULL != cob[j]) dqdel(cob[j], exorder); v[j] = &tl[j]->operand[0].oprval.mlit->v; tv[j] = (0 != v[j]->m[1]); unuse_literal(v[j]); tl[j]->opcode = OC_NOOP; tl[j]->operand[0].oprclass = NO_REF; } t->operand[1].oprclass = NO_REF; switch (c = t->opcode) /* WARNING assignment */ { /* optimize the Boolean operations here */ case OC_NAND: case OC_AND: tvr = (tv[0] && tv[1]); break; case OC_NOR: case OC_OR: tvr = (tv[0] || tv[1]); break; case OC_NCONTAIN: case OC_CONTAIN: tvr = 1; (void)matchc(v[1]->str.len, (unsigned char *)v[1]->str.addr, v[0]->str.len, (unsigned char *)v[0]->str.addr, &dummy, &tvr); tvr ^= 1; break; case OC_NEQU: case OC_EQU: tvr = is_equ(v[0], v[1]); break; case OC_NFOLLOW: case OC_FOLLOW: tvr = 0 < memvcmp(v[0]->str.addr, v[0]->str.len, v[1]->str.addr, v[1]->str.len); break; case OC_NGT: case OC_GT: tvr = 0 < numcmp(v[0], v[1]); break; case OC_NLT: case OC_LT: tvr = 0 > numcmp(v[0], v[1]); break; case OC_NPATTERN: case OC_PATTERN: tvr = !(*(uint4 *)v[1]->str.addr) ? do_pattern(v[0], v[1]) : do_patfixed(v[0], v[1]); break; case OC_NSORTS_AFTER: case OC_SORTS_AFTER: tvr = 0 < sorts_after(v[0], v[1]); break; default: assertpro(FALSE); } tvr ^= !sense; t->operand[0] = put_indr(addr); t->opcode = tvr ? OC_JMPFALSE : OC_JMPTRUE; return; }
/* * ---------------------------------------------------------- * Set piece procedure (unicode flavor). * Set pieces first through last to expr. * * Arguments: * src - source mval * del - delimiter string mval * expr - expression string mval * first - starting index in source mval to be set * last - last index * dst - destination mval where the result is saved. * * Return: * none * ---------------------------------------------------------- */ void op_setpiece(mval *src, mval *del, mval *expr, int4 first, int4 last, mval *dst) { size_t str_len, delim_cnt; int match_res, len, src_len, first_src_ind, second_src_ind, numpcs; unsigned char *match_ptr, *src_str, *str_addr, *tmp_str; delimfmt unichar; /* --- code start --- */ assert(gtm_utf8_mode); if (--first < 0) first = 0; second_src_ind = last - first; MV_FORCE_STR(del); /* Null delimiter */ if (0 == del->str.len) { if (first && src->mvtype) { /* concat src & expr to dst */ op_cat(VARLSTCNT(3) dst, src, expr); return; } MV_FORCE_STR(expr); *dst = *expr; return; } MV_FORCE_STR(expr); if (!MV_DEFINED(src)) { first_src_ind = 0; second_src_ind = -1; } else { /* Valid delimiter - See if we can take a short cut to op_fnp1. If so, delimiter value needs to be reformated */ if ((1 == second_src_ind) && (1 == MV_FORCE_LEN(del))) { /* Both valid chars of char_len=1 and single byte invalid chars get the fast path */ unichar.unichar_val = 0; assert(SIZEOF(unichar.unibytes_val) >= del->str.len); memcpy(unichar.unibytes_val, del->str.addr, del->str.len); op_setp1(src, unichar.unichar_val, expr, last, dst); /* Use last since it has not been changed */ return; } /* We have a valid src with something in it */ MV_FORCE_STR(src); src_str = (unsigned char *)src->str.addr; src_len = src->str.len; /* skip all pieces until start one */ if (first) { numpcs = first; /* copy int4 type "first" into "int" type numpcs for passing to matchc */ match_ptr = matchc(del->str.len, (uchar_ptr_t)del->str.addr, src_len, src_str, &match_res, &numpcs); /* Note: "numpcs" is modified above by the function "matchc" to reflect the # of unmatched pieces */ first = numpcs; /* copy updated "numpcs" value back into "first" */ } else { match_ptr = src_str; match_res = 1; } first_src_ind = INTCAST(match_ptr - (unsigned char *)src->str.addr); if (0 == match_res) /* if match not found */ second_src_ind = -1; else { src_len -= INTCAST(match_ptr - src_str); src_str = match_ptr; /* skip # delimiters this piece will replace, e.g. if we are setting * pieces 2 - 4, then the pieces 2-4 will be replaced by one piece - expr. */ match_ptr = matchc(del->str.len, (uchar_ptr_t)del->str.addr, src_len, src_str, &match_res, &second_src_ind); second_src_ind = (0 == match_res) ? -1 : INTCAST(match_ptr - (unsigned char *)src->str.addr - del->str.len); } } delim_cnt = (size_t)first; /* Calculate total string len. */ str_len = (size_t)expr->str.len + ((size_t)first_src_ind + ((size_t)del->str.len * delim_cnt)); /* add len. of trailing chars past insertion point */ if (0 <= second_src_ind) str_len += (size_t)(src->str.len - second_src_ind); if (MAX_STRLEN < str_len) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN); return; } ENSURE_STP_FREE_SPACE((int)str_len); str_addr = stringpool.free; /* copy prefix */ if (first_src_ind) { memcpy(str_addr, src->str.addr, first_src_ind); str_addr += first_src_ind; } /* copy delimiters */ if (gtm_utf8_mode && (1 < del->str.len)) { /* In this mode, delimiters can exceed 1 character so copy them this way */ while (0 < delim_cnt--) { memcpy(str_addr, del->str.addr, del->str.len); str_addr += del->str.len; } } else { /* If delimiters are 1 byte (M mode always and perhaps UTF8 mode), use this simpler/faster method */ memset(str_addr, (char)*del->str.addr, delim_cnt); str_addr += delim_cnt; } /* copy expression */ memcpy(str_addr, expr->str.addr, expr->str.len); str_addr += expr->str.len; /* copy trailing pieces */ if (0 <= second_src_ind) { len = src->str.len - second_src_ind; tmp_str = (unsigned char *)src->str.addr + second_src_ind; memcpy(str_addr, tmp_str, len); str_addr += len; } assert(IS_AT_END_OF_STRINGPOOL(str_addr, -str_len)); dst->mvtype = MV_STR; dst->str.len = INTCAST(str_addr - stringpool.free); dst->str.addr = (char *)stringpool.free; stringpool.free = str_addr; return; }