int f_piece(oprtype *a, opctype op) { mval *delim_mval; triple *delimiter, *first, *last, *r; oprtype x; delimfmt unichar; error_def(ERR_COMMA); r = maketriple(op); if (!strexpr(&(r->operand[0]))) return FALSE; if (window_token != TK_COMMA) { stx_error(ERR_COMMA); return FALSE; } advancewindow(); delimiter = newtriple(OC_PARAMETER); r->operand[1] = put_tref(delimiter); first = newtriple(OC_PARAMETER); delimiter->operand[1] = put_tref(first); if (!strexpr(&x)) return FALSE; if (window_token != TK_COMMA) first->operand[0] = put_ilit(1); else { advancewindow(); if (!intexpr(&(first->operand[0]))) return FALSE; } assert(x.oprclass == TRIP_REF); if (window_token != TK_COMMA && x.oprval.tref->opcode == OC_LIT && (1 == ((gtm_utf8_mode && OC_FNZPIECE != op) ? MV_FORCE_LEN(&x.oprval.tref->operand[0].oprval.mlit->v) : x.oprval.tref->operand[0].oprval.mlit->v.str.len))) { /* Potential shortcut to op_fnzp1 or op_fnp1. Make some further checks */ delim_mval = &x.oprval.tref->operand[0].oprval.mlit->v; /* Both valid chars of char_len 1 and invalid chars of byte length 1 get the fast path */ unichar.unichar_val = 0; if (!gtm_utf8_mode || OC_FNZPIECE == op) { /* Single byte delimiter */ r->opcode = OC_FNZP1; unichar.unibytes_val[0] = *delim_mval->str.addr; } else { /* Potentially multiple bytes in one int */ r->opcode = OC_FNP1; assert(SIZEOF(int) >= delim_mval->str.len); memcpy(unichar.unibytes_val, delim_mval->str.addr, delim_mval->str.len); } delimiter->operand[0] = put_ilit(unichar.unichar_val); ins_triple(r); *a = put_tref(r); return TRUE; }
int m_set(void) { /* Some comment on "parse_warn". It is set to TRUE whenever the parse encounters an invalid setleft target. * Note that even if "parse_warn" is TRUE, we should not return FALSE right away but need to continue the parse * until the end of the current SET command. This way any remaining commands in the current parse line will be * parsed and triples generated for them. This is necessary just in case the currently parsed invalid SET command * does not get executed at runtime (due to postconditionals etc.) * * Some comment on the need for "first_setleft_invalid". This variable is needed only in the * case we encounter an invalid-SVN/invalid-FCN/unsettable-SVN as a target of the SET. We need to evaluate the * right-hand-side of the SET command only if at least one valid setleft target is parsed before an invalid setleft * target is encountered. This is because we still need to execute the valid setlefts at runtime before triggering * a runtime error for the invalid setleft. If the first setleft target is an invalid one, then there is no need * to evaluate the right-hand-side. In fact, in this case, adding triples (corresponding to the right hand side) * to the execution chain could cause problems with emit_code later in the compilation as the destination * for the right hand side triples could now be undefined (for example a valid SVN on the left side of the * SET would have generated an OC_SVPUT triple with one of its operands holding the result of the right * hand side evaluation, but an invalid SVN on the left side which would have instead caused an OC_RTERROR triple * to have been generated leaving no triple to receive the result of the right hand side evaluation thus causing * emit_code to be confused and GTMASSERT). Therefore discard all triples generated by the right hand side in this case. * By the same reasoning, discard all triples generated by setleft targets AFTER this invalid one as well. * "first_setleft_invalid" is set to TRUE if the first setleft target is invalid and set to FALSE if the first setleft * target is valid. It is initialized to -1 before the start of the parse. */ int index, setop, delimlen; int first_val_lit, last_val_lit, nakedzalias; boolean_t first_is_lit, last_is_lit, got_lparen, delim1char, is_extract, valid_char; boolean_t alias_processing, have_lh_alias; opctype put_oc; oprtype v, delimval, firstval, lastval, *result, resptr; triple *curtargchain, *delimiter, discardcurtchain, *first, *get, *jmptrp1, *jmptrp2, *last, *obp, *put; triple *s, *s0, *s1, save_targchain, *save_curtchain, *save_curtchain1, *sub, targchain, *tmp; mint delimlit; mval *delim_mval; mvar *mvarptr; boolean_t parse_warn; /* set to TRUE in case of an invalid SVN etc. */ boolean_t curtchain_switched; /* set to TRUE if a setcurtchain was done */ int first_setleft_invalid; /* set to TRUE if the first setleft target is invalid */ boolean_t temp_subs_was_FALSE; union { uint4 unichar_val; unsigned char unibytes_val[4]; } unichar; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; TREF(temp_subs) = FALSE; dqinit(&targchain, exorder); result = (oprtype *)mcalloc(SIZEOF(oprtype)); resptr = put_indr(result); delimiter = sub = last = NULL; /* A SET clause must be entirely alias related or a normal set. Parenthized multiple sets of aliases are not allowed * and will trigger an error. This is because the source and targets of aliases require different values and references * than normal sets do and thus cannot be mixed. */ if (alias_processing = (TK_ASTERISK == window_token)) advancewindow(); if (got_lparen = (TK_LPAREN == window_token)) { if (alias_processing) stx_error(ERR_NOALIASLIST); advancewindow(); TREF(temp_subs) = TRUE; } /* Some explanation: The triples from the left hand side of the SET expression that are * expressly associated with fetching (in case of set $piece/$extract) and/or storing of * the target value are removed from curtchain and placed on the targchain. Later, these * triples will be added to the end of curtchain to do the finishing store of the target * after the righthand side has been evaluated. This is per the M standard. * * Note that SET $PIECE/$EXTRACT have special conditions in which the first argument is not referenced at all. * (e.g. set $piece(^a," ",3,2) in this case 3 > 2 so this should not evaluate ^a and therefore should not * modify the naked indicator). That is, the triples that do these conditional checks need to be inserted * ahead of the OC_GVNAME of ^a, all of which need to be inserted on the targchain. But the conditionalization * can be done only after parsing the first argument of the SET $PIECE and examining the remaining arguments. * Therefore we maintain the "curtargchain" variable which stores the value of the "targchain" at the beginning * of the iteration (at the start of the $PIECE parsing) and all the conditionalization will be inserted right * here which is guaranteed to be ahead of where the OC_GVNAME gets inserted. * * For example, SET $PIECE(^A(x,y),delim,first,last)=RHS will generate a final triple chain as follows * * A - Triples to evaluate subscripts (x,y) of the global ^A * A - Triples to evaluate delim * A - Triples to evaluate first * A - Triples to evaluate last * B - Triples to evaluate RHS * C - Triples to do conditional check (e.g. first > last etc.) * C - Triples to branch around if the checks indicate this is a null operation SET $PIECE * D - Triple that does OC_GVNAME of ^A * D - Triple that does OC_SETPIECE to determine the new value * D - Triple that does OC_GVPUT of the new value into ^A(x,y) * This is the point where the conditional check triples will branch around to if they chose to. * * A - triples that evaluates the arguments/subscripts in the left-hand-side of the SET command * These triples are built in "curtchain" * B - triples that evaluates the arguments/subscripts in the right-hand-side of the SET command * These triples are built in "curtchain" * C - triples that do conditional check for any $PIECE/$EXTRACT in the left side of the SET command. * These triples are built in "curtargchain" * D - triples that generate the reference to the target of the SET and the store into the target. * These triples are built in "targchain" * * Note alias processing does not support the SET *(...)=.. type syntax because the type of argument * created for RHS processing is dependent on the LHS receiver type and we do not support more than one * type of source argument in a single SET. */ first_setleft_invalid = FIRST_SETLEFT_NOTSEEN; curtchain_switched = FALSE; nakedzalias = have_lh_alias = FALSE; save_curtchain = NULL; assert(FIRST_SETLEFT_NOTSEEN != TRUE); assert(FIRST_SETLEFT_NOTSEEN != FALSE); for (parse_warn = FALSE; ; parse_warn = FALSE) { curtargchain = targchain.exorder.bl; jmptrp1 = jmptrp2 = NULL; delim1char = is_extract = FALSE; allow_dzwrtac_as_mident(); /* Allows $ZWRTACxxx as target to be treated as an mident */ switch (window_token) { case TK_IDENT: /* A slight diversion first. If this is a $ZWRTAC set (indication of $ in first char * is currently enough to signify that), then we need to check a few conditions first. * If this is a "naked $ZWRTAC", meaning no numeric suffix, then this is a flag that * all the $ZWRTAC vars in the local variable tree need to be kill *'d which will not * be generating a SET instruction. First we need to verify that fact and make sure * we are not in PARENs and not doing alias processing. Note *any* value can be * specified as the source but while it will be evaluated, it is NOT stored anywhere. */ if ('$' == *window_ident.addr) { /* We have a $ZWRTAC<xx> target */ if (got_lparen) /* We don't allow $ZWRTACxxx to be specified in a parenthesized list. * Verify that first */ SYNTAX_ERROR(ERR_DZWRNOPAREN); if (STR_LIT_LEN(DOLLAR_ZWRTAC) == window_ident.len) { /* Ok, this is a naked $ZWRTAC targeted set */ if (alias_processing) SYNTAX_ERROR(ERR_DZWRNOALIAS); nakedzalias = TRUE; /* This opcode doesn't really need args but it is easier to fit in with the rest * of m_set processing to pass it the result arg, which there may actually be * a use for someday.. */ put = maketriple(OC_CLRALSVARS); put->operand[0] = resptr; dqins(targchain.exorder.bl, exorder, put); advancewindow(); break; } } /* If we are doing alias processing, there are two possibilities: * 1) LHS is unsubscripted - it is an alias variable being created or replaced. Need to parse * the varname as if this were a regular set. * 2) LHS is subscripted - it is an alias container variable being created or replaced. The * processing here is to pass the base variable index to the store routine so bypass the * lvn() call. */ if (!alias_processing || TK_LPAREN == director_token) { /* Normal variable processing or we have a lh alias container */ if (!lvn(&v, OC_PUTINDX, 0)) SYNTAX_ERROR_NOREPORT_HERE; if (OC_PUTINDX == v.oprval.tref->opcode) { dqdel(v.oprval.tref, exorder); dqins(targchain.exorder.bl, exorder, v.oprval.tref); sub = v.oprval.tref; put_oc = OC_PUTINDX; if (TREF(temp_subs)) m_set_create_temporaries(sub, put_oc); } } else { /* Have alias variable. Argument is index into var table rather than pointer to var */ have_lh_alias = TRUE; /* We only want the variable index in this case. Since the entire hash structure to which * this variable is going to be pointing to is changing, doing anything that calls fetch() * is somewhat pointless so we avoid it by just accessing the variable information * directly. */ mvarptr = get_mvaddr(&window_ident); v = put_ilit(mvarptr->mvidx); advancewindow(); } /* Determine correct storing triple */ put = maketriple((!alias_processing ? OC_STO : (have_lh_alias ? OC_SETALS2ALS : OC_SETALSIN2ALSCT))); put->operand[0] = v; put->operand[1] = resptr; dqins(targchain.exorder.bl, exorder, put); break; case TK_CIRCUMFLEX: if (alias_processing) SYNTAX_ERROR(ERR_ALIASEXPECTED); s1 = curtchain->exorder.bl; if (!gvn()) SYNTAX_ERROR_NOREPORT_HERE; for (sub = curtchain->exorder.bl; sub != s1; sub = sub->exorder.bl) { put_oc = sub->opcode; if (OC_GVNAME == put_oc || OC_GVNAKED == put_oc || OC_GVEXTNAM == put_oc) break; } assert(OC_GVNAME == put_oc || OC_GVNAKED == put_oc || OC_GVEXTNAM == put_oc); dqdel(sub, exorder); dqins(targchain.exorder.bl, exorder, sub); if (TREF(temp_subs)) m_set_create_temporaries(sub, put_oc); put = maketriple(OC_GVPUT); put->operand[0] = resptr; dqins(targchain.exorder.bl, exorder, put); break; case TK_ATSIGN: if (alias_processing) SYNTAX_ERROR(ERR_ALIASEXPECTED); if (!indirection(&v)) SYNTAX_ERROR_NOREPORT_HERE; if (!got_lparen && TK_EQUAL != window_token) { assert(!curtchain_switched); put = newtriple(OC_COMMARG); put->operand[0] = v; put->operand[1] = put_ilit(indir_set); return TRUE; } put = maketriple(OC_INDSET); put->operand[0] = v; put->operand[1] = resptr; dqins(targchain.exorder.bl, exorder, put); break; case TK_DOLLAR: if (alias_processing) SYNTAX_ERROR(ERR_ALIASEXPECTED); advancewindow(); if (TK_IDENT != window_token) SYNTAX_ERROR(ERR_VAREXPECTED); if (TK_LPAREN != director_token) { /* Look for intrinsic special variables */ s1 = curtchain->exorder.bl; if (0 > (index = namelook(svn_index, svn_names, window_ident.addr, window_ident.len))) { STX_ERROR_WARN(ERR_INVSVN); /* sets "parse_warn" to TRUE */ } else if (!svn_data[index].can_set) { STX_ERROR_WARN(ERR_SVNOSET); /* sets "parse_warn" to TRUE */ } advancewindow(); if (!parse_warn) { if (SV_ETRAP != svn_data[index].opcode && SV_ZTRAP != svn_data[index].opcode) { /* Setting of $ZTRAP or $ETRAP must go through opp_svput because they * may affect the stack pointer. All others directly to op_svput(). */ put = maketriple(OC_SVPUT); } else put = maketriple(OC_PSVPUT); put->operand[0] = put_ilit(svn_data[index].opcode); put->operand[1] = resptr; dqins(targchain.exorder.bl, exorder, put); } else { /* OC_RTERROR triple would have been inserted in curtchain by ins_errtriple * (invoked by stx_error). To maintain consistency with the "if" portion of * this code, we need to move this triple to the "targchain". */ tmp = curtchain->exorder.bl; /* corresponds to put_ilit(FALSE) in ins_errtriple */ tmp = tmp->exorder.bl; /* corresponds to put_ilit(in_error) in ins_errtriple */ tmp = tmp->exorder.bl; /* corresponds to newtriple(OC_RTERROR) in ins_errtriple */ assert(OC_RTERROR == tmp->opcode); dqdel(tmp, exorder); dqins(targchain.exorder.bl, exorder, tmp); CHKTCHAIN(&targchain); } break; } /* Only 4 function names allowed on left side: $[Z]Piece and $[Z]Extract */ index = namelook(fun_index, fun_names, window_ident.addr, window_ident.len); if (0 > index) { STX_ERROR_WARN(ERR_INVFCN); /* sets "parse_warn" to TRUE */ /* OC_RTERROR triple would have been inserted in "curtchain" by ins_errtriple * (invoked by stx_error). We need to switch it to "targchain" to be consistent * with every other codepath in this module. */ tmp = curtchain->exorder.bl; /* corresponds to put_ilit(FALSE) in ins_errtriple */ tmp = tmp->exorder.bl; /* corresponds to put_ilit(in_error) in ins_errtriple */ tmp = tmp->exorder.bl; /* corresponds to newtriple(OC_RTERROR) in ins_errtriple */ assert(OC_RTERROR == tmp->opcode); dqdel(tmp, exorder); dqins(targchain.exorder.bl, exorder, tmp); CHKTCHAIN(&targchain); advancewindow(); /* skip past the function name */ advancewindow(); /* skip past the left paren */ /* Parse the remaining arguments until corresponding RIGHT-PAREN/SPACE/EOL is reached */ if (!parse_until_rparen_or_space()) SYNTAX_ERROR_NOREPORT_HERE; } else { switch(fun_data[index].opcode) { case OC_FNPIECE: setop = OC_SETPIECE; break; case OC_FNEXTRACT: is_extract = TRUE; setop = OC_SETEXTRACT; break; case OC_FNZPIECE: setop = OC_SETZPIECE; break; case OC_FNZEXTRACT: is_extract = TRUE; setop = OC_SETZEXTRACT; break; default: SYNTAX_ERROR(ERR_VAREXPECTED); } advancewindow(); advancewindow(); /* Although we see the get (target) variable first, we need to save it's processing * on another chain -- the targchain -- because the retrieval of the target is bypassed * and the naked indicator is not reset if the first/last parameters are not set in a * logical manner (must be > 0 and first <= last). So the evaluation order is * delimiter (if $piece), first, last, RHS of the set and then the target if applicable. * Set up primary action triple now since it is ref'd by the put triples generated below. */ s = maketriple(setop); /* Even for SET[Z]PIECE and SET[Z]EXTRACT, the SETxxxxx opcodes * do not do the final store, they only create the final value TO be * stored so generate the triples that will actually do the store now. * Note we are still building triples on the original curtchain. */ switch (window_token) { case TK_IDENT: if (!lvn(&v, OC_PUTINDX, 0)) SYNTAX_ERROR(ERR_VAREXPECTED); if (OC_PUTINDX == v.oprval.tref->opcode) { dqdel(v.oprval.tref, exorder); dqins(targchain.exorder.bl, exorder, v.oprval.tref); sub = v.oprval.tref; put_oc = OC_PUTINDX; if (TREF(temp_subs)) m_set_create_temporaries(sub, put_oc); } get = maketriple(OC_FNGET); get->operand[0] = v; put = maketriple(OC_STO); put->operand[0] = v; put->operand[1] = put_tref(s); break; case TK_ATSIGN: if (!indirection(&v)) SYNTAX_ERROR(ERR_VAREXPECTED); get = maketriple(OC_INDGET); get->operand[0] = v; get->operand[1] = put_str(0, 0); put = maketriple(OC_INDSET); put->operand[0] = v; put->operand[1] = put_tref(s); break; case TK_CIRCUMFLEX: s1 = curtchain->exorder.bl; if (!gvn()) SYNTAX_ERROR_NOREPORT_HERE; for (sub = curtchain->exorder.bl; sub != s1 ; sub = sub->exorder.bl) { put_oc = sub->opcode; if ((OC_GVNAME == put_oc) || (OC_GVNAKED == put_oc) || (OC_GVEXTNAM == put_oc)) break; } assert((OC_GVNAME == put_oc) || (OC_GVNAKED == put_oc) || (OC_GVEXTNAM == put_oc)); dqdel(sub, exorder); dqins(targchain.exorder.bl, exorder, sub); if (TREF(temp_subs)) m_set_create_temporaries(sub, put_oc); get = maketriple(OC_FNGVGET); get->operand[0] = put_str(0, 0); put = maketriple(OC_GVPUT); put->operand[0] = put_tref(s); break; default: SYNTAX_ERROR(ERR_VAREXPECTED); } s->operand[0] = put_tref(get); /* Code to fetch args for target triple are on targchain. Put get there now too. */ dqins(targchain.exorder.bl, exorder, get); CHKTCHAIN(&targchain); if (!is_extract) { /* Set $[z]piece */ delimiter = newtriple(OC_PARAMETER); s->operand[1] = put_tref(delimiter); first = newtriple(OC_PARAMETER); delimiter->operand[1] = put_tref(first); /* Process delimiter string ($[z]piece only) */ if (TK_COMMA != window_token) SYNTAX_ERROR(ERR_COMMA); advancewindow(); if (!strexpr(&delimval)) SYNTAX_ERROR_NOREPORT_HERE; assert(TRIP_REF == delimval.oprclass); } else { /* Set $[Z]Extract */ first = newtriple(OC_PARAMETER); s->operand[1] = put_tref(first); } /* Process first integer value */ if (window_token != TK_COMMA) firstval = put_ilit(1); else { advancewindow(); if (!intexpr(&firstval)) SYNTAX_ERROR(ERR_COMMA); assert(firstval.oprclass == TRIP_REF); } first->operand[0] = firstval; if (first_is_lit = (OC_ILIT == firstval.oprval.tref->opcode)) { assert(ILIT_REF ==firstval.oprval.tref->operand[0].oprclass); first_val_lit = firstval.oprval.tref->operand[0].oprval.ilit; } if (TK_COMMA != window_token) { /* There is no "last" value. Only if 1 char literal delimiter and * no "last" value can we generate shortcut code to op_set[z]p1 entry * instead of op_set[z]piece. Note if UTF8 mode is in effect, then this * optimization applies if the literal is one unicode char which may in * fact be up to 4 bytes but will still be passed as a single unsigned * integer. */ if (!is_extract) { delim_mval = &delimval.oprval.tref->operand[0].oprval.mlit->v; valid_char = TRUE; /* Basic assumption unles proven otherwise */ if (delimval.oprval.tref->opcode == OC_LIT && (1 == (gtm_utf8_mode ? MV_FORCE_LEN(delim_mval) : delim_mval->str.len))) { /* Single char delimiter for set $piece */ UNICODE_ONLY( if (gtm_utf8_mode) { /* We have a supposed single char delimiter but it * must be a valid utf8 char to be used by * op_setp1() and MV_FORCE_LEN won't tell us that. */ valid_char = UTF8_VALID(delim_mval->str.addr, (delim_mval->str.addr + delim_mval->str.len), delimlen); if (!valid_char && !badchar_inhibit) UTF8_BADCHAR(0, delim_mval->str.addr, (delim_mval->str.addr + delim_mval->str.len), 0, NULL); } ); if (valid_char || 1 == delim_mval->str.len) { /* This reference to a one character literal or a single * byte invalid utf8 character that needs to be turned into * an explict formated integer literal instead */ unichar.unichar_val = 0; if (!gtm_utf8_mode) { /* Single byte delimiter */ assert(1 == delim_mval->str.len); UNIX_ONLY(s->opcode = OC_SETZP1); VMS_ONLY(s->opcode = OC_SETP1); unichar.unibytes_val[0] = *delim_mval->str.addr; } UNICODE_ONLY( else { /* Potentially multiple bytes in one int */ assert(SIZEOF(int) >= delim_mval->str.len); memcpy(unichar.unibytes_val, delim_mval->str.addr, delim_mval->str.len); s->opcode = OC_SETP1; } ); delimlit = (mint)unichar.unichar_val; delimiter->operand[0] = put_ilit(delimlit); delim1char = TRUE; } } }
void op_fnlength(mval *src, mval *dest) { MV_FORCE_STR(src); MV_FORCE_LEN(src); MV_FORCE_MVAL(dest, (int)src->str.char_len); }
/* * ---------------------------------------------------------- * Set piece procedure (unicode flavor). * Set pieces first through last to expr. * * Arguments: * src - source mval * del - delimiter string mval * expr - expression string mval * first - starting index in source mval to be set * last - last index * dst - destination mval where the result is saved. * * Return: * none * ---------------------------------------------------------- */ void op_setpiece(mval *src, mval *del, mval *expr, int4 first, int4 last, mval *dst) { size_t str_len, delim_cnt; int match_res, len, src_len, first_src_ind, second_src_ind, numpcs; unsigned char *match_ptr, *src_str, *str_addr, *tmp_str; delimfmt unichar; /* --- code start --- */ assert(gtm_utf8_mode); if (--first < 0) first = 0; second_src_ind = last - first; MV_FORCE_STR(del); /* Null delimiter */ if (0 == del->str.len) { if (first && src->mvtype) { /* concat src & expr to dst */ op_cat(VARLSTCNT(3) dst, src, expr); return; } MV_FORCE_STR(expr); *dst = *expr; return; } MV_FORCE_STR(expr); if (!MV_DEFINED(src)) { first_src_ind = 0; second_src_ind = -1; } else { /* Valid delimiter - See if we can take a short cut to op_fnp1. If so, delimiter value needs to be reformated */ if ((1 == second_src_ind) && (1 == MV_FORCE_LEN(del))) { /* Both valid chars of char_len=1 and single byte invalid chars get the fast path */ unichar.unichar_val = 0; assert(SIZEOF(unichar.unibytes_val) >= del->str.len); memcpy(unichar.unibytes_val, del->str.addr, del->str.len); op_setp1(src, unichar.unichar_val, expr, last, dst); /* Use last since it has not been changed */ return; } /* We have a valid src with something in it */ MV_FORCE_STR(src); src_str = (unsigned char *)src->str.addr; src_len = src->str.len; /* skip all pieces until start one */ if (first) { numpcs = first; /* copy int4 type "first" into "int" type numpcs for passing to matchc */ match_ptr = matchc(del->str.len, (uchar_ptr_t)del->str.addr, src_len, src_str, &match_res, &numpcs); /* Note: "numpcs" is modified above by the function "matchc" to reflect the # of unmatched pieces */ first = numpcs; /* copy updated "numpcs" value back into "first" */ } else { match_ptr = src_str; match_res = 1; } first_src_ind = INTCAST(match_ptr - (unsigned char *)src->str.addr); if (0 == match_res) /* if match not found */ second_src_ind = -1; else { src_len -= INTCAST(match_ptr - src_str); src_str = match_ptr; /* skip # delimiters this piece will replace, e.g. if we are setting * pieces 2 - 4, then the pieces 2-4 will be replaced by one piece - expr. */ match_ptr = matchc(del->str.len, (uchar_ptr_t)del->str.addr, src_len, src_str, &match_res, &second_src_ind); second_src_ind = (0 == match_res) ? -1 : INTCAST(match_ptr - (unsigned char *)src->str.addr - del->str.len); } } delim_cnt = (size_t)first; /* Calculate total string len. */ str_len = (size_t)expr->str.len + ((size_t)first_src_ind + ((size_t)del->str.len * delim_cnt)); /* add len. of trailing chars past insertion point */ if (0 <= second_src_ind) str_len += (size_t)(src->str.len - second_src_ind); if (MAX_STRLEN < str_len) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN); return; } ENSURE_STP_FREE_SPACE((int)str_len); str_addr = stringpool.free; /* copy prefix */ if (first_src_ind) { memcpy(str_addr, src->str.addr, first_src_ind); str_addr += first_src_ind; } /* copy delimiters */ if (gtm_utf8_mode && (1 < del->str.len)) { /* In this mode, delimiters can exceed 1 character so copy them this way */ while (0 < delim_cnt--) { memcpy(str_addr, del->str.addr, del->str.len); str_addr += del->str.len; } } else { /* If delimiters are 1 byte (M mode always and perhaps UTF8 mode), use this simpler/faster method */ memset(str_addr, (char)*del->str.addr, delim_cnt); str_addr += delim_cnt; } /* copy expression */ memcpy(str_addr, expr->str.addr, expr->str.len); str_addr += expr->str.len; /* copy trailing pieces */ if (0 <= second_src_ind) { len = src->str.len - second_src_ind; tmp_str = (unsigned char *)src->str.addr + second_src_ind; memcpy(str_addr, tmp_str, len); str_addr += len; } assert(IS_AT_END_OF_STRINGPOOL(str_addr, -str_len)); dst->mvtype = MV_STR; dst->str.len = INTCAST(str_addr - stringpool.free); dst->str.addr = (char *)stringpool.free; stringpool.free = str_addr; return; }
/* * ---------------------------------------------------------- * Set version of $extract * * Arguments: * src - source mval * expr - expression string mval to be inserted into source * schar - starting character index to be replaced * echar - ending character index to be replaced * dst - destination mval where the result is saved. * * Return: * none * ---------------------------------------------------------- */ void op_setextract(mval *src, mval *expr, int schar, int echar, mval *dst) { int srclen, padlen, pfxlen, sfxoff, sfxlen, dstlen, bytelen, skip, char_len; unsigned char *srcptr, *srcbase, *srctop, *straddr; error_def(ERR_MAXSTRLEN); padlen = pfxlen = sfxlen = 0; MV_FORCE_STR(expr); /* Expression to put into piece place */ if (MV_DEFINED(src)) { MV_FORCE_STR(src); /* Make sure is string prior to length check */ srclen = src->str.len; } else /* Source is not defined -- treat as a null string */ srclen = 0; schar = MAX(schar, 1); /* schar starts at 1 at a minimum */ /* There are four cases in the spec: 1) schar > echar or echar < 1 -- glvn and naked indicator are not changed. This is handled by generated code in m_set 2) echar >= schar-1 > $L(src) -- dst = src_$J("",schar-1-$L(src))_expr 3) schar-1 <= $L(src) < echar -- dst = $E(src,1,schar-1)_expr 4) All others -- dst = $E(src,1,schar-1)_expr_$E(src,echar+1,$L(src)) */ srcbase = (unsigned char *)src->str.addr; srctop = srcbase + srclen; for (srcptr = srcbase, skip = schar - 1; (skip > 0 && srcptr < srctop); --skip) { /* skip the first schar - 1 characters */ if (!UTF8_VALID(srcptr, srctop, bytelen) && !badchar_inhibit) utf8_badchar(0, srcptr, srctop, 0, NULL); srcptr += bytelen; } pfxlen = (int)(srcptr - srcbase); if (skip > 0) { /* Case #2: schar is past the string length. echar test handled in generated code. Should be padded with as many spaces as characters remained to be skipped */ padlen = skip; } for (skip = echar - schar + 1; (skip > 0 && srcptr < srctop); --skip) { /* skip up to the character position echar */ if (!UTF8_VALID(srcptr, srctop, bytelen) && !badchar_inhibit) utf8_badchar(0, srcptr, srctop, 0, NULL); srcptr += bytelen; } char_len = 0; if (skip <= 0) { /* Case #4: echar is within the string length, suffix to be added */ sfxoff = (int)(srcptr - srcbase); sfxlen = (int)(srctop - srcptr); if (!badchar_inhibit && sfxlen > 0) { /* validate the suffix, and we can compute char_len as well */ for (; (srcptr < srctop); ++char_len) { if (!UTF8_VALID(srcptr, srctop, bytelen)) utf8_badchar(0, srcptr, srctop, 0, NULL); srcptr += bytelen; } MV_FORCE_LEN(expr); char_len += schar - 1 + expr->str.char_len; } } /* Calculate total string len */ dstlen = pfxlen + padlen + expr->str.len + sfxlen; if (dstlen > MAX_STRLEN) rts_error(VARLSTCNT(1) ERR_MAXSTRLEN); ENSURE_STP_FREE_SPACE(dstlen); srcbase = (unsigned char *)src->str.addr; straddr = stringpool.free; if (0 < pfxlen) { /* copy prefix */ memcpy(straddr, srcbase, pfxlen); straddr += pfxlen; } if (0 < padlen) { /* insert padding */ memset(straddr, ' ', padlen); straddr += padlen; } if (0 < expr->str.len) { /* copy expression */ memcpy(straddr, expr->str.addr, expr->str.len); straddr += expr->str.len; } if (0 < sfxlen) { /* copy suffix */ memcpy(straddr, srcbase + sfxoff, sfxlen); straddr += sfxlen; } assert(straddr - stringpool.free == dstlen); MV_INIT_STRING(dst, straddr - stringpool.free, (char *)stringpool.free); if (0 < char_len) { dst->mvtype |= MV_UTF_LEN; dst->str.char_len = char_len; } stringpool.free = straddr; }