Пример #1
0
int f_piece(oprtype *a, opctype op)
{
	mval		*delim_mval;
	triple		*delimiter, *first, *last, *r;
	oprtype		x;
	delimfmt	unichar;

	error_def(ERR_COMMA);

	r = maketriple(op);
	if (!strexpr(&(r->operand[0])))
		return FALSE;
	if (window_token != TK_COMMA)
	{
		stx_error(ERR_COMMA);
		return FALSE;
	}
	advancewindow();
	delimiter = newtriple(OC_PARAMETER);
	r->operand[1] = put_tref(delimiter);
	first = newtriple(OC_PARAMETER);
	delimiter->operand[1] = put_tref(first);
	if (!strexpr(&x))
		return FALSE;
	if (window_token != TK_COMMA)
		first->operand[0] = put_ilit(1);
	else
	{
		advancewindow();
		if (!intexpr(&(first->operand[0])))
			return FALSE;
	}
	assert(x.oprclass == TRIP_REF);
	if (window_token != TK_COMMA && x.oprval.tref->opcode == OC_LIT &&
	    (1 == ((gtm_utf8_mode && OC_FNZPIECE != op) ?  MV_FORCE_LEN(&x.oprval.tref->operand[0].oprval.mlit->v) :
		   x.oprval.tref->operand[0].oprval.mlit->v.str.len)))
	{	/* Potential shortcut to op_fnzp1 or op_fnp1. Make some further checks */
		delim_mval = &x.oprval.tref->operand[0].oprval.mlit->v;
		/* Both valid chars of char_len 1 and invalid chars of byte length 1 get the fast path */
		unichar.unichar_val = 0;
		if (!gtm_utf8_mode || OC_FNZPIECE == op)
		{       /* Single byte delimiter */
			r->opcode = OC_FNZP1;
			unichar.unibytes_val[0] = *delim_mval->str.addr;
		} else
		{       /* Potentially multiple bytes in one int */
			r->opcode = OC_FNP1;
			assert(SIZEOF(int) >= delim_mval->str.len);
			memcpy(unichar.unibytes_val, delim_mval->str.addr, delim_mval->str.len);
		}
		delimiter->operand[0] = put_ilit(unichar.unichar_val);
		ins_triple(r);
		*a = put_tref(r);
		return TRUE;
	}
Пример #2
0
int m_set(void)
{
	/* Some comment on "parse_warn". It is set to TRUE whenever the parse encounters an
	   invalid setleft target.

	   * Note that even if "parse_warn" is TRUE, we should not return FALSE right away but need to continue the parse
	   * until the end of the current SET command. This way any remaining commands in the current parse line will be
	   * parsed and triples generated for them. This is necessary just in case the currently parsed invalid SET command
	   * does not get executed at runtime (due to postconditionals etc.)
	   *
	   * Some comment on the need for "first_setleft_invalid". This variable is needed only in the
	   * case we encounter an invalid-SVN/invalid-FCN/unsettable-SVN as a target of the SET. We need to evaluate the
	   * right-hand-side of the SET command only if at least one valid setleft target is parsed before an invalid setleft
	   * target is encountered. This is because we still need to execute the valid setlefts at runtime before triggering
	   * a runtime error for the invalid setleft. If the first setleft target is an invalid one, then there is no need
	   * to evaluate the right-hand-side. In fact, in this case, adding triples (corresponding to the right hand side)
	   * to the execution chain could cause problems with emit_code later in the compilation as the destination
	   * for the right hand side triples could now be undefined (for example a valid SVN on the left side of the
	   * SET would have generated an OC_SVPUT triple with one of its operands holding the result of the right
	   * hand side evaluation, but an invalid SVN on the left side which would have instead caused an OC_RTERROR triple
	   * to have been generated leaving no triple to receive the result of the right hand side evaluation thus causing
	   * emit_code to be confused and GTMASSERT). Therefore discard all triples generated by the right hand side in this case.
	   * By the same reasoning, discard all triples generated by setleft targets AFTER this invalid one as well.
	   * "first_setleft_invalid" is set to TRUE if the first setleft target is invalid and set to FALSE if the first setleft
	   * target is valid. It is initialized to -1 before the start of the parse.
	   */

	int		index, setop, delimlen;
	int		first_val_lit, last_val_lit, nakedzalias;
	boolean_t	first_is_lit, last_is_lit, got_lparen, delim1char, is_extract, valid_char;
	boolean_t 	alias_processing, have_lh_alias;
	opctype		put_oc;
	oprtype		v, delimval, firstval, lastval, *result, resptr;
	triple		*curtargchain, *delimiter, discardcurtchain, *first, *get, *jmptrp1, *jmptrp2, *last, *obp, *put;
	triple		*s, *s0, *s1, save_targchain, *save_curtchain, *save_curtchain1, *sub, targchain, *tmp;
	mint		delimlit;
	mval		*delim_mval;
	mvar		*mvarptr;
	boolean_t	parse_warn;	/* set to TRUE in case of an invalid SVN etc. */
	boolean_t	curtchain_switched;	/* set to TRUE if a setcurtchain was done */
	int		first_setleft_invalid;	/* set to TRUE if the first setleft target is invalid */
	boolean_t	temp_subs_was_FALSE;
	union
	{
		uint4		unichar_val;
		unsigned char	unibytes_val[4];
	} unichar;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	TREF(temp_subs) = FALSE;
	dqinit(&targchain, exorder);
	result = (oprtype *)mcalloc(SIZEOF(oprtype));
	resptr = put_indr(result);
	delimiter = sub = last = NULL;
	/* A SET clause must be entirely alias related or a normal set. Parenthized multiple sets of aliases are not allowed
	 * and will trigger an error. This is because the source and targets of aliases require different values and references
	 * than normal sets do and thus cannot be mixed.
	 */
	if (alias_processing = (TK_ASTERISK == window_token))
		advancewindow();
	if (got_lparen = (TK_LPAREN == window_token))
	{
		if (alias_processing)
			stx_error(ERR_NOALIASLIST);
		advancewindow();
		TREF(temp_subs) = TRUE;
	}
	/* Some explanation: The triples from the left hand side of the SET expression that are
	 * expressly associated with fetching (in case of set $piece/$extract) and/or storing of
	 * the target value are removed from curtchain and placed on the targchain. Later, these
	 * triples will be added to the end of curtchain to do the finishing store of the target
	 * after the righthand side has been evaluated. This is per the M standard.
	 *
	 * Note that SET $PIECE/$EXTRACT have special conditions in which the first argument is not referenced at all.
	 * (e.g. set $piece(^a," ",3,2) in this case 3 > 2 so this should not evaluate ^a and therefore should not
	 * modify the naked indicator). That is, the triples that do these conditional checks need to be inserted
	 * ahead of the OC_GVNAME of ^a, all of which need to be inserted on the targchain. But the conditionalization
	 * can be done only after parsing the first argument of the SET $PIECE and examining the remaining arguments.
	 * Therefore we maintain the "curtargchain" variable which stores the value of the "targchain" at the beginning
	 * of the iteration (at the start of the $PIECE parsing) and all the conditionalization will be inserted right
	 * here which is guaranteed to be ahead of where the OC_GVNAME gets inserted.
	 *
	 * For example, SET $PIECE(^A(x,y),delim,first,last)=RHS will generate a final triple chain as follows
	 *
	 *	A - Triples to evaluate subscripts (x,y) of the global ^A
	 *	A - Triples to evaluate delim
	 *	A - Triples to evaluate first
	 *	A - Triples to evaluate last
	 *	B - Triples to evaluate RHS
	 *	C - Triples to do conditional check (e.g. first > last etc.)
	 *	C - Triples to branch around if the checks indicate this is a null operation SET $PIECE
	 *	D - Triple that does OC_GVNAME of ^A
	 *	D - Triple that does OC_SETPIECE to determine the new value
	 *	D - Triple that does OC_GVPUT of the new value into ^A(x,y)
	 *	This is the point where the conditional check triples will branch around to if they chose to.
	 *
	 *	A - triples that evaluates the arguments/subscripts in the left-hand-side of the SET command
	 *		These triples are built in "curtchain"
	 *	B - triples that evaluates the arguments/subscripts in the right-hand-side of the SET command
	 *		These triples are built in "curtchain"
	 *	C - triples that do conditional check for any $PIECE/$EXTRACT in the left side of the SET command.
	 *		These triples are built in "curtargchain"
	 *	D - triples that generate the reference to the target of the SET and the store into the target.
	 *		These triples are built in "targchain"
	 *
	 * Note alias processing does not support the SET *(...)=.. type syntax because the type of argument
	 * created for RHS processing is dependent on the LHS receiver type and we do not support more than one
	 * type of source argument in a single SET.
	 */
	first_setleft_invalid = FIRST_SETLEFT_NOTSEEN;
	curtchain_switched = FALSE;
	nakedzalias = have_lh_alias = FALSE;
	save_curtchain = NULL;
	assert(FIRST_SETLEFT_NOTSEEN != TRUE);
	assert(FIRST_SETLEFT_NOTSEEN != FALSE);
	for (parse_warn = FALSE; ; parse_warn = FALSE)
	{
		curtargchain = targchain.exorder.bl;
		jmptrp1 = jmptrp2 = NULL;
		delim1char = is_extract = FALSE;
		allow_dzwrtac_as_mident();	/* Allows $ZWRTACxxx as target to be treated as an mident */
		switch (window_token)
		{
			case TK_IDENT:
				/* A slight diversion first. If this is a $ZWRTAC set (indication of $ in first char
				 * is currently enough to signify that), then we need to check a few conditions first.
				 * If this is a "naked $ZWRTAC", meaning no numeric suffix, then this is a flag that
				 * all the $ZWRTAC vars in the local variable tree need to be kill *'d which will not
				 * be generating a SET instruction. First we need to verify that fact and make sure
				 * we are not in PARENs and not doing alias processing. Note *any* value can be
				 * specified as the source but while it will be evaluated, it is NOT stored anywhere.
				 */
				if ('$' == *window_ident.addr)
				{	/* We have a $ZWRTAC<xx> target */
					if (got_lparen)
						/* We don't allow $ZWRTACxxx to be specified in a parenthesized list.
						 * Verify that first
						 */
						SYNTAX_ERROR(ERR_DZWRNOPAREN);
					if (STR_LIT_LEN(DOLLAR_ZWRTAC) == window_ident.len)
					{	/* Ok, this is a naked $ZWRTAC targeted set */
						if (alias_processing)
							SYNTAX_ERROR(ERR_DZWRNOALIAS);
						nakedzalias = TRUE;
						/* This opcode doesn't really need args but it is easier to fit in with the rest
						 * of m_set processing to pass it the result arg, which there may actually be
						 * a use for someday..
						 */
						put = maketriple(OC_CLRALSVARS);
						put->operand[0] = resptr;
						dqins(targchain.exorder.bl, exorder, put);
						advancewindow();
						break;
					}
				}
				/* If we are doing alias processing, there are two possibilities:
				 *  1) LHS is unsubscripted - it is an alias variable being created or replaced. Need to parse
				 *  the varname as if this were a regular set.
				 *  2) LHS is subscripted - it is an alias container variable being created or replaced. The
				 *  processing here is to pass the base variable index to the store routine so bypass the
				 *  lvn() call.
				 */
				if (!alias_processing || TK_LPAREN == director_token)
				{	/* Normal variable processing or we have a lh alias container */
					if (!lvn(&v, OC_PUTINDX, 0))
						SYNTAX_ERROR_NOREPORT_HERE;
					if (OC_PUTINDX == v.oprval.tref->opcode)
					{
						dqdel(v.oprval.tref, exorder);
						dqins(targchain.exorder.bl, exorder, v.oprval.tref);
						sub = v.oprval.tref;
						put_oc = OC_PUTINDX;
						if (TREF(temp_subs))
							m_set_create_temporaries(sub, put_oc);
					}
				} else
				{	/* Have alias variable. Argument is index into var table rather than pointer to var */
					have_lh_alias = TRUE;
					/* We only want the variable index in this case. Since the entire hash structure to which
					 * this variable is going to be pointing to is changing, doing anything that calls fetch()
					 * is somewhat pointless so we avoid it by just accessing the variable information
					 * directly.
					 */
					mvarptr = get_mvaddr(&window_ident);
					v = put_ilit(mvarptr->mvidx);
					advancewindow();
				}
				/* Determine correct storing triple */
				put = maketriple((!alias_processing ? OC_STO :
						  (have_lh_alias ? OC_SETALS2ALS : OC_SETALSIN2ALSCT)));
				put->operand[0] = v;
				put->operand[1] = resptr;
				dqins(targchain.exorder.bl, exorder, put);
				break;
			case TK_CIRCUMFLEX:
				if (alias_processing)
					SYNTAX_ERROR(ERR_ALIASEXPECTED);
				s1 = curtchain->exorder.bl;
				if (!gvn())
					SYNTAX_ERROR_NOREPORT_HERE;
				for (sub = curtchain->exorder.bl; sub != s1; sub = sub->exorder.bl)
				{
					put_oc = sub->opcode;
					if (OC_GVNAME == put_oc || OC_GVNAKED == put_oc || OC_GVEXTNAM == put_oc)
						break;
				}
				assert(OC_GVNAME == put_oc || OC_GVNAKED == put_oc || OC_GVEXTNAM == put_oc);
				dqdel(sub, exorder);
				dqins(targchain.exorder.bl, exorder, sub);
				if (TREF(temp_subs))
					m_set_create_temporaries(sub, put_oc);
				put = maketriple(OC_GVPUT);
				put->operand[0] = resptr;
				dqins(targchain.exorder.bl, exorder, put);
				break;
			case TK_ATSIGN:
				if (alias_processing)
					SYNTAX_ERROR(ERR_ALIASEXPECTED);
				if (!indirection(&v))
					SYNTAX_ERROR_NOREPORT_HERE;
				if (!got_lparen && TK_EQUAL != window_token)
				{
					assert(!curtchain_switched);
					put = newtriple(OC_COMMARG);
					put->operand[0] = v;
					put->operand[1] = put_ilit(indir_set);
					return TRUE;
				}
				put = maketriple(OC_INDSET);
				put->operand[0] = v;
				put->operand[1] = resptr;
				dqins(targchain.exorder.bl, exorder, put);
				break;
			case TK_DOLLAR:
				if (alias_processing)
					SYNTAX_ERROR(ERR_ALIASEXPECTED);
				advancewindow();
				if (TK_IDENT != window_token)
					SYNTAX_ERROR(ERR_VAREXPECTED);
				if (TK_LPAREN != director_token)
				{	/* Look for intrinsic special variables */
					s1 = curtchain->exorder.bl;
					if (0 > (index = namelook(svn_index, svn_names, window_ident.addr, window_ident.len)))
					{
						STX_ERROR_WARN(ERR_INVSVN);	/* sets "parse_warn" to TRUE */
					} else if (!svn_data[index].can_set)
					{
						STX_ERROR_WARN(ERR_SVNOSET);	/* sets "parse_warn" to TRUE */
					}
					advancewindow();
					if (!parse_warn)
					{
						if (SV_ETRAP != svn_data[index].opcode && SV_ZTRAP != svn_data[index].opcode)
						{	/* Setting of $ZTRAP or $ETRAP must go through opp_svput because they
							 * may affect the stack pointer. All others directly to op_svput().
							 */
							put = maketriple(OC_SVPUT);
						} else
							put = maketriple(OC_PSVPUT);
						put->operand[0] = put_ilit(svn_data[index].opcode);
						put->operand[1] = resptr;
						dqins(targchain.exorder.bl, exorder, put);
					} else
					{	/* OC_RTERROR triple would have been inserted in curtchain by ins_errtriple
						 * (invoked by stx_error). To maintain consistency with the "if" portion of
						 * this code, we need to move this triple to the "targchain".
						 */
						tmp = curtchain->exorder.bl; /* corresponds to put_ilit(FALSE) in ins_errtriple */
						tmp = tmp->exorder.bl;	/* corresponds to put_ilit(in_error) in ins_errtriple */
						tmp = tmp->exorder.bl;	/* corresponds to newtriple(OC_RTERROR) in ins_errtriple */
						assert(OC_RTERROR == tmp->opcode);
						dqdel(tmp, exorder);
						dqins(targchain.exorder.bl, exorder, tmp);
						CHKTCHAIN(&targchain);
					}
					break;
				}
				/* Only 4 function names allowed on left side: $[Z]Piece and $[Z]Extract */
				index = namelook(fun_index, fun_names, window_ident.addr, window_ident.len);
				if (0 > index)
				{
					STX_ERROR_WARN(ERR_INVFCN);	/* sets "parse_warn" to TRUE */
					/* OC_RTERROR triple would have been inserted in "curtchain" by ins_errtriple
					 * (invoked by stx_error). We need to switch it to "targchain" to be consistent
					 * with every other codepath in this module.
					 */
					tmp = curtchain->exorder.bl; /* corresponds to put_ilit(FALSE) in ins_errtriple */
					tmp = tmp->exorder.bl;	/* corresponds to put_ilit(in_error) in ins_errtriple */
					tmp = tmp->exorder.bl;	/* corresponds to newtriple(OC_RTERROR) in ins_errtriple */
					assert(OC_RTERROR == tmp->opcode);
					dqdel(tmp, exorder);
					dqins(targchain.exorder.bl, exorder, tmp);
					CHKTCHAIN(&targchain);
					advancewindow();	/* skip past the function name */
					advancewindow();	/* skip past the left paren */
					/* Parse the remaining arguments until corresponding RIGHT-PAREN/SPACE/EOL is reached */
					if (!parse_until_rparen_or_space())
						SYNTAX_ERROR_NOREPORT_HERE;
				} else
				{
					switch(fun_data[index].opcode)
					{
						case OC_FNPIECE:
							setop = OC_SETPIECE;
							break;
						case OC_FNEXTRACT:
							is_extract = TRUE;
							setop = OC_SETEXTRACT;
							break;
						case OC_FNZPIECE:
							setop = OC_SETZPIECE;
							break;
						case OC_FNZEXTRACT:
							is_extract = TRUE;
							setop = OC_SETZEXTRACT;
							break;
						default:
							SYNTAX_ERROR(ERR_VAREXPECTED);
					}
					advancewindow();
					advancewindow();
					/* Although we see the get (target) variable first, we need to save it's processing
					 * on another chain -- the targchain -- because the retrieval of the target is bypassed
					 * and the naked indicator is not reset if the first/last parameters are not set in a
					 * logical manner (must be > 0 and first <= last). So the evaluation order is
					 * delimiter (if $piece), first, last, RHS of the set and then the target if applicable.
					 * Set up primary action triple now since it is ref'd by the put triples generated below.
					 */
					s = maketriple(setop);
					/* Even for SET[Z]PIECE and SET[Z]EXTRACT, the SETxxxxx opcodes
					 * do not do the final store, they only create the final value TO be
					 * stored so generate the triples that will actually do the store now.
					 * Note we are still building triples on the original curtchain.
					 */
					switch (window_token)
					{
						case TK_IDENT:
							if (!lvn(&v, OC_PUTINDX, 0))
								SYNTAX_ERROR(ERR_VAREXPECTED);
							if (OC_PUTINDX == v.oprval.tref->opcode)
							{
								dqdel(v.oprval.tref, exorder);
								dqins(targchain.exorder.bl, exorder, v.oprval.tref);
								sub = v.oprval.tref;
								put_oc = OC_PUTINDX;
								if (TREF(temp_subs))
									m_set_create_temporaries(sub, put_oc);
							}
							get = maketriple(OC_FNGET);
							get->operand[0] = v;
							put = maketriple(OC_STO);
							put->operand[0] = v;
							put->operand[1] = put_tref(s);
							break;
						case TK_ATSIGN:
							if (!indirection(&v))
								SYNTAX_ERROR(ERR_VAREXPECTED);
							get = maketriple(OC_INDGET);
							get->operand[0] = v;
							get->operand[1] = put_str(0, 0);
							put = maketriple(OC_INDSET);
							put->operand[0] = v;
							put->operand[1] = put_tref(s);
							break;
						case TK_CIRCUMFLEX:
							s1 = curtchain->exorder.bl;
							if (!gvn())
								SYNTAX_ERROR_NOREPORT_HERE;
							for (sub = curtchain->exorder.bl; sub != s1 ; sub = sub->exorder.bl)
							{
								put_oc = sub->opcode;
								if ((OC_GVNAME == put_oc) || (OC_GVNAKED == put_oc)
								    || (OC_GVEXTNAM == put_oc))
									break;
							}
							assert((OC_GVNAME == put_oc) || (OC_GVNAKED == put_oc)
							       || (OC_GVEXTNAM == put_oc));
							dqdel(sub, exorder);
							dqins(targchain.exorder.bl, exorder, sub);
							if (TREF(temp_subs))
								m_set_create_temporaries(sub, put_oc);
							get = maketriple(OC_FNGVGET);
							get->operand[0] = put_str(0, 0);
							put = maketriple(OC_GVPUT);
							put->operand[0] = put_tref(s);
							break;
						default:
							SYNTAX_ERROR(ERR_VAREXPECTED);
					}
					s->operand[0] = put_tref(get);
					/* Code to fetch args for target triple are on targchain. Put get there now too. */
					dqins(targchain.exorder.bl, exorder, get);
					CHKTCHAIN(&targchain);
					if (!is_extract)
					{	/* Set $[z]piece */
						delimiter = newtriple(OC_PARAMETER);
						s->operand[1] = put_tref(delimiter);
						first = newtriple(OC_PARAMETER);
						delimiter->operand[1] = put_tref(first);
						/* Process delimiter string ($[z]piece only) */
						if (TK_COMMA != window_token)
							SYNTAX_ERROR(ERR_COMMA);
						advancewindow();
						if (!strexpr(&delimval))
							SYNTAX_ERROR_NOREPORT_HERE;
						assert(TRIP_REF == delimval.oprclass);
					} else
					{	/* Set $[Z]Extract */
						first = newtriple(OC_PARAMETER);
						s->operand[1] = put_tref(first);
					}
					/* Process first integer value */
					if (window_token != TK_COMMA)
						firstval = put_ilit(1);
					else
					{
						advancewindow();
						if (!intexpr(&firstval))
							SYNTAX_ERROR(ERR_COMMA);
						assert(firstval.oprclass == TRIP_REF);
					}
					first->operand[0] = firstval;
					if (first_is_lit = (OC_ILIT == firstval.oprval.tref->opcode))
					{
						assert(ILIT_REF ==firstval.oprval.tref->operand[0].oprclass);
						first_val_lit = firstval.oprval.tref->operand[0].oprval.ilit;
					}
					if (TK_COMMA != window_token)
					{	/* There is no "last" value. Only if 1 char literal delimiter and
						 * no "last" value can we generate shortcut code to op_set[z]p1 entry
						 * instead of op_set[z]piece. Note if UTF8 mode is in effect, then this
						 * optimization applies if the literal is one unicode char which may in
						 * fact be up to 4 bytes but will still be passed as a single unsigned
						 * integer.
						 */
						if (!is_extract)
						{
							delim_mval = &delimval.oprval.tref->operand[0].oprval.mlit->v;
							valid_char = TRUE;	/* Basic assumption unles proven otherwise */
							if (delimval.oprval.tref->opcode == OC_LIT &&
							    (1 == (gtm_utf8_mode ?
								   MV_FORCE_LEN(delim_mval) : delim_mval->str.len)))
							{	/* Single char delimiter for set $piece */
								UNICODE_ONLY(
									if (gtm_utf8_mode)
									{	/*  We have a supposed single char delimiter but it
										 *  must be a valid utf8 char to be used by
										 *  op_setp1() and MV_FORCE_LEN won't tell us that.
										 */
										valid_char = UTF8_VALID(delim_mval->str.addr,
													(delim_mval->str.addr
													 + delim_mval->str.len),
													delimlen);
										if (!valid_char && !badchar_inhibit)
											UTF8_BADCHAR(0, delim_mval->str.addr,
												     (delim_mval->str.addr
												      + delim_mval->str.len),
												     0, NULL);
									}
									     );
								if (valid_char || 1 == delim_mval->str.len)
								{	/* This reference to a one character literal or a single
									 * byte invalid utf8 character that needs to be turned into
									 * an explict formated integer literal instead
									 */
									unichar.unichar_val = 0;
									if (!gtm_utf8_mode)
									{	/* Single byte delimiter */
										assert(1 == delim_mval->str.len);
										UNIX_ONLY(s->opcode = OC_SETZP1);
										VMS_ONLY(s->opcode = OC_SETP1);
										unichar.unibytes_val[0] = *delim_mval->str.addr;
									}
									UNICODE_ONLY(
								        else
									{	/* Potentially multiple bytes in one int */
										assert(SIZEOF(int) >= delim_mval->str.len);
										memcpy(unichar.unibytes_val,
										       delim_mval->str.addr,
										       delim_mval->str.len);
										s->opcode = OC_SETP1;
									}
										     );
									delimlit = (mint)unichar.unichar_val;
									delimiter->operand[0] = put_ilit(delimlit);
									delim1char = TRUE;
								}
							}
						}
Пример #3
0
void op_fnlength(mval *src, mval *dest)
{
	MV_FORCE_STR(src);
	MV_FORCE_LEN(src);
	MV_FORCE_MVAL(dest, (int)src->str.char_len);
}
Пример #4
0
/*
 * ----------------------------------------------------------
 * Set piece procedure (unicode flavor).
 * Set pieces first through last to expr.
 *
 * Arguments:
 *	src	- source mval
 *	del	- delimiter string mval
 *	expr	- expression string mval
 *	first	- starting index in source mval to be set
 *	last	- last index
 *	dst	- destination mval where the result is saved.
 *
 * Return:
 *	none
 * ----------------------------------------------------------
 */
void op_setpiece(mval *src, mval *del, mval *expr, int4 first, int4 last, mval *dst)
{
	size_t		str_len, delim_cnt;
	int 		match_res, len, src_len, first_src_ind, second_src_ind, numpcs;
	unsigned char 	*match_ptr, *src_str, *str_addr, *tmp_str;
	delimfmt	unichar;

	/* --- code start --- */
	assert(gtm_utf8_mode);
	if (--first < 0)
		first = 0;
	second_src_ind = last - first;
	MV_FORCE_STR(del);
	/* Null delimiter */
	if (0 == del->str.len)
	{
		if (first && src->mvtype)
		{	/* concat src & expr to dst */
			op_cat(VARLSTCNT(3) dst, src, expr);
			return;
		}
		MV_FORCE_STR(expr);
		*dst = *expr;
		return;
	}
	MV_FORCE_STR(expr);
	if (!MV_DEFINED(src))
	{
		first_src_ind = 0;
		second_src_ind = -1;
	} else
	{
		/* Valid delimiter -  See if we can take a short cut to op_fnp1. If so, delimiter value needs to be reformated */
		if ((1 == second_src_ind) && (1 == MV_FORCE_LEN(del)))
		{	/* Both valid chars of char_len=1 and single byte invalid chars get the fast path */
			unichar.unichar_val = 0;
			assert(SIZEOF(unichar.unibytes_val) >= del->str.len);
			memcpy(unichar.unibytes_val, del->str.addr, del->str.len);
			op_setp1(src, unichar.unichar_val, expr, last, dst); /* Use last since it has not been changed */
			return;
		}
		/* We have a valid src with something in it */
		MV_FORCE_STR(src);
		src_str = (unsigned char *)src->str.addr;
		src_len = src->str.len;
		/* skip all pieces until start one */
		if (first)
		{
			numpcs = first;	/* copy int4 type "first" into "int" type numpcs for passing to matchc */
			match_ptr = matchc(del->str.len, (uchar_ptr_t)del->str.addr, src_len, src_str, &match_res, &numpcs);
			/* Note: "numpcs" is modified above by the function "matchc" to reflect the # of unmatched pieces */
			first = numpcs;	/* copy updated "numpcs" value back into "first" */
		} else
		{
			match_ptr = src_str;
			match_res = 1;
		}
		first_src_ind = INTCAST(match_ptr - (unsigned char *)src->str.addr);
		if (0 == match_res) /* if match not found */
			second_src_ind = -1;
		else
		{
			src_len -= INTCAST(match_ptr - src_str);
			src_str = match_ptr;
			/* skip # delimiters this piece will replace, e.g. if we are setting
			 * pieces 2 - 4, then the pieces 2-4 will be replaced by one piece - expr.
			 */
			match_ptr = matchc(del->str.len, (uchar_ptr_t)del->str.addr, src_len, src_str, &match_res, &second_src_ind);
			second_src_ind = (0 == match_res) ? -1 : INTCAST(match_ptr - (unsigned char *)src->str.addr - del->str.len);
		}
	}
	delim_cnt = (size_t)first;
	/* Calculate total string len. */
	str_len = (size_t)expr->str.len + ((size_t)first_src_ind + ((size_t)del->str.len * delim_cnt));
	/* add len. of trailing chars past insertion point */
	if (0 <= second_src_ind)
		str_len += (size_t)(src->str.len - second_src_ind);
	if (MAX_STRLEN < str_len)
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
		return;
	}
	ENSURE_STP_FREE_SPACE((int)str_len);
	str_addr = stringpool.free;
	/* copy prefix */
	if (first_src_ind)
	{
		memcpy(str_addr, src->str.addr, first_src_ind);
		str_addr += first_src_ind;
	}
	/* copy delimiters */
	if (gtm_utf8_mode && (1 < del->str.len))
	{	/* In this mode, delimiters can exceed 1 character so copy them this way */
		while (0 < delim_cnt--)
		{
			memcpy(str_addr, del->str.addr, del->str.len);
			str_addr += del->str.len;
		}
	} else
	{	/* If delimiters are 1 byte (M mode always and perhaps UTF8 mode), use this simpler/faster method */
		memset(str_addr, (char)*del->str.addr, delim_cnt);
		str_addr += delim_cnt;
	}
	/* copy expression */
	memcpy(str_addr, expr->str.addr, expr->str.len);
	str_addr += expr->str.len;
	/* copy trailing pieces */
	if (0 <= second_src_ind)
	{
		len = src->str.len - second_src_ind;
		tmp_str = (unsigned char *)src->str.addr + second_src_ind;
		memcpy(str_addr, tmp_str, len);
		str_addr += len;
	}
	assert(IS_AT_END_OF_STRINGPOOL(str_addr, -str_len));
	dst->mvtype = MV_STR;
	dst->str.len = INTCAST(str_addr - stringpool.free);
	dst->str.addr = (char *)stringpool.free;
	stringpool.free = str_addr;
	return;
}
Пример #5
0
/*
 * ----------------------------------------------------------
 * Set version of $extract
 *
 * Arguments:
 *	src	- source mval
 *	expr	- expression string mval to be inserted into source
 *	schar	- starting character index to be replaced
 *	echar	- ending character index to be replaced
 *	dst	- destination mval where the result is saved.
 *
 * Return:
 *	none
 * ----------------------------------------------------------
 */
void op_setextract(mval *src, mval *expr, int schar, int echar, mval *dst)
{
	int		srclen, padlen, pfxlen, sfxoff, sfxlen, dstlen, bytelen, skip, char_len;
	unsigned char	*srcptr, *srcbase, *srctop, *straddr;

	error_def(ERR_MAXSTRLEN);

	padlen = pfxlen = sfxlen = 0;
	MV_FORCE_STR(expr);	/* Expression to put into piece place */
	if (MV_DEFINED(src))
	{
		MV_FORCE_STR(src);	/* Make sure is string prior to length check */
		srclen = src->str.len;
	} else	/* Source is not defined -- treat as a null string */
		srclen = 0;
	schar = MAX(schar, 1);	/* schar starts at 1 at a minimum */

	/* There are four cases in the spec:
	   1) schar > echar or echar < 1 -- glvn and naked indicator are not changed. This is
	                                    handled by generated code in m_set
	   2) echar >= schar-1 > $L(src) -- dst = src_$J("",schar-1-$L(src))_expr
	   3) schar-1 <= $L(src) < echar -- dst = $E(src,1,schar-1)_expr
	   4) All others                 -- dst = $E(src,1,schar-1)_expr_$E(src,echar+1,$L(src))
	*/
	srcbase = (unsigned char *)src->str.addr;
	srctop = srcbase + srclen;
	for (srcptr = srcbase, skip = schar - 1; (skip > 0 && srcptr < srctop); --skip)
	{ /* skip the first schar - 1 characters */
		if (!UTF8_VALID(srcptr, srctop, bytelen) && !badchar_inhibit)
			utf8_badchar(0, srcptr, srctop, 0, NULL);
		srcptr += bytelen;
	}
	pfxlen = (int)(srcptr - srcbase);
	if (skip > 0)
	{ /* Case #2: schar is past the string length. echar test handled in generated code.
	     Should be padded with as many spaces as characters remained to be skipped */
		padlen = skip;
	}
	for (skip = echar - schar + 1; (skip > 0 && srcptr < srctop); --skip)
	{ /* skip up to the character position echar */
		if (!UTF8_VALID(srcptr, srctop, bytelen) && !badchar_inhibit)
			utf8_badchar(0, srcptr, srctop, 0, NULL);
		srcptr += bytelen;
	}
	char_len = 0;
	if (skip <= 0)
	{ /* Case #4: echar is within the string length, suffix to be added */
		sfxoff = (int)(srcptr - srcbase);
		sfxlen = (int)(srctop - srcptr);
		if (!badchar_inhibit && sfxlen > 0)
		{ /* validate the suffix, and we can compute char_len as well */
			for (; (srcptr < srctop); ++char_len)
			{
				if (!UTF8_VALID(srcptr, srctop, bytelen))
					utf8_badchar(0, srcptr, srctop, 0, NULL);
				srcptr += bytelen;
			}
			MV_FORCE_LEN(expr);
			char_len += schar - 1 + expr->str.char_len;
		}
	}

	/* Calculate total string len */
	dstlen = pfxlen + padlen + expr->str.len + sfxlen;
	if (dstlen > MAX_STRLEN)
		rts_error(VARLSTCNT(1) ERR_MAXSTRLEN);
	ENSURE_STP_FREE_SPACE(dstlen);

	srcbase = (unsigned char *)src->str.addr;
	straddr = stringpool.free;

	if (0 < pfxlen)
	{ /* copy prefix */
		memcpy(straddr, srcbase, pfxlen);
		straddr += pfxlen;
	}
	if (0 < padlen)
	{ /* insert padding */
		memset(straddr, ' ', padlen);
		straddr += padlen;
	}
	if (0 < expr->str.len)
	{ /* copy expression */
		memcpy(straddr, expr->str.addr, expr->str.len);
		straddr += expr->str.len;
	}
	if (0 < sfxlen)
	{ /* copy suffix */
		memcpy(straddr, srcbase + sfxoff, sfxlen);
		straddr += sfxlen;
	}
	assert(straddr - stringpool.free == dstlen);
	MV_INIT_STRING(dst, straddr - stringpool.free, (char *)stringpool.free);
	if (0 < char_len)
	{
		dst->mvtype |= MV_UTF_LEN;
		dst->str.char_len = char_len;
	}
	stringpool.free = straddr;
}