示例#1
0
文件: gtm_conv.c 项目: mihawk/fis-gtm
int gtm_conv(UConverter* from, UConverter* to, mstr *src, char* dstbuff, int* bufflen)
{
	char		*dstptr, *dstbase, *srcptr;
	const char	*ichset;
	int		dstlen, src_charlen, srclen;
	UErrorCode	status, status1;

	if (0 == src->len)
		return 0;
	if (NULL == dstbuff)
	{
		/* Compute the stringpool buffer space needed for conversion given that source
		 * is encoded in the ichset representation.  The ICU functions ucnv_getMinCharSize()
		 * and ucnv_getMaxCharSize() are used to compute the minimum and maximum number of
		 * bytes required per UChar if converted from/to ichset/ochset respectively
		 */
		src_charlen = (src->len / ucnv_getMinCharSize(from)) + 1; /* number of UChar's from ichset */
		dstlen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
		dstlen = (dstlen > MAX_STRLEN) ? MAX_STRLEN : dstlen;
		ENSURE_STP_FREE_SPACE(dstlen);
		dstbase = (char *)stringpool.free;
	} else
	{
		dstbase = dstbuff;
		dstlen = *bufflen;
	}
	srcptr = src->addr;
	srclen = (int)src->len;
	dstptr = dstbase;
	status = U_ZERO_ERROR; /* initialization to "success" is required by ICU */
	ucnv_convertEx(to, from, &dstptr, dstptr + dstlen, (const char**)&srcptr, srcptr + srclen,
		NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
	if (U_FAILURE(status))
	{
		if (U_BUFFER_OVERFLOW_ERROR == status)
		{	/* translation requires more space than the maximum allowed GT.M string size */
			if (NULL == dstbuff)
				rts_error_csa(NULL, VARLSTCNT(1) ERR_MAXSTRLEN);
			else
			{
				/* Insufficient buffer passed. Return the required buffer length */
				src_charlen = (srclen / ucnv_getMinCharSize(from)) + 1;
				*bufflen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
				return -1;
			}
		}
		status1 = U_ZERO_ERROR;
		ichset = ucnv_getName(from, &status1);
		assert(U_SUCCESS(status1));
		UTF8_BADCHAR(1,(unsigned char *) (srcptr - 1), NULL,STRLEN(ichset), ichset);
	}
	return (int) (dstptr - dstbase);
}
示例#2
0
文件: gtm_utf8.c 项目: mihawk/fis-gtm
/* Similar to utf8_len() except it operates on a given string instead of an mval and does not observe badchar_inhibit.
 * String must be valid or error is raised.
 */
int utf8_len_strict(unsigned char* ptr, int len)
{
	int		charlen, bytelen;
	unsigned char	*ptrtop;

	ptrtop = ptr + len;
	for (charlen = 0; ptr < ptrtop; charlen++, ptr += bytelen)
	{
		if (!UTF8_VALID(ptr, ptrtop, bytelen))
			UTF8_BADCHAR(0, ptr, ptrtop, 0, NULL);
	}
	assert(ptr == ptrtop);
	return charlen;
}
示例#3
0
文件: gtm_utf8.c 项目: mihawk/fis-gtm
/* The routine that does the actual work of determining the length and responding appropriately in the event an invalid
 * UTF8 character is detected.
 */
STATICFNDEF int utf8_len_real(utf8_err_type err_type, mstr* str)
{
	int		charlen, bytelen;
	char		*ptrtop, *ptr;
	boolean_t	err_raised;

	assert(gtm_utf8_mode);
	ptr = str->addr;
	ptrtop = ptr + str->len;
	charlen = 0;
	err_raised = FALSE;
	if (!badchar_inhibit)
	{
		for (; ptr < ptrtop; charlen++, ptr += bytelen)
		{
			if (!UTF8_VALID(ptr, ptrtop, bytelen))
			{
				switch(err_type)
				{
					case err_rts:
						UTF8_BADCHAR(0, ptr, ptrtop, 0, NULL);
						break;			/* Never get here but keeps compiler happy */
					case err_stx:
						UTF8_BADCHAR_STX(0, ptr, ptrtop, 0, NULL);
						return -1;
					case err_dec:
						if (!err_raised)
						{
							UTF8_BADCHAR_DEC(0, ptr, ptrtop, 0, NULL);
							err_raised = TRUE;
						}
						bytelen = 1;		/* Assume only one char is broken */
						break;
					default:
						assertpro(FALSE /* Invalid error type */);
				}
			}
		}
	} else
	{
		for (; ptr < ptrtop; charlen++)
			ptr = (char *)UTF8_MBNEXT(ptr, ptrtop);
	}
	assert(ptr == ptrtop);
	str->char_len = charlen;
	return charlen;
}
示例#4
0
文件: gtm_utf8.c 项目: mihawk/fis-gtm
/* Returns the total display column width of a UTF-8 string given its address and byte length.
 * The third parameter (strict) is used to specify how both illegal characters should be handled.
 * The fourth parameter (nonprintwidth) is to specify what width to give for unprintable characters.
 *	It is currently 0 if coming through $ZWIDTH and 1 if coming through util_output (for historical reasons).
 * If strict is TRUE, this routine
 * 	- triggers BADCHAR error if it encounters any illegal characters irrespective of VIEW BADCHAR setting.
 * If strict is FALSE, this routine
 * 	- does NOT do BADCHAR check.
 *	- treats illegal characters as unprintable characters (for width).
 */
int gtm_wcswidth(unsigned char* ptr, int len, boolean_t strict, int nonprintwidth)
{
	int		strwidth, cwidth;
	uint4		ch;
	unsigned char	*ptrtop, *ptrnext;

	assert(gtm_utf8_mode);
	ptrtop = ptr + len;
	for (strwidth = 0; ptr < ptrtop; ptr = ptrnext)
	{
		ptrnext = UTF8_MBTOWC(ptr, ptrtop, ch);
		if (WEOF != ch && -1 != (cwidth = UTF8_WCWIDTH(ch)))
			strwidth += cwidth;
		else if (strict && (WEOF == ch))
			UTF8_BADCHAR(0, ptr, ptrtop, 0, NULL);
		else
			strwidth += nonprintwidth;
	}
	assert(ptr == ptrtop);
	return strwidth;
}
示例#5
0
int m_set(void)
{
	/* Some comment on "parse_warn". It is set to TRUE whenever the parse encounters an
	   invalid setleft target.

	   * Note that even if "parse_warn" is TRUE, we should not return FALSE right away but need to continue the parse
	   * until the end of the current SET command. This way any remaining commands in the current parse line will be
	   * parsed and triples generated for them. This is necessary just in case the currently parsed invalid SET command
	   * does not get executed at runtime (due to postconditionals etc.)
	   *
	   * Some comment on the need for "first_setleft_invalid". This variable is needed only in the
	   * case we encounter an invalid-SVN/invalid-FCN/unsettable-SVN as a target of the SET. We need to evaluate the
	   * right-hand-side of the SET command only if at least one valid setleft target is parsed before an invalid setleft
	   * target is encountered. This is because we still need to execute the valid setlefts at runtime before triggering
	   * a runtime error for the invalid setleft. If the first setleft target is an invalid one, then there is no need
	   * to evaluate the right-hand-side. In fact, in this case, adding triples (corresponding to the right hand side)
	   * to the execution chain could cause problems with emit_code later in the compilation as the destination
	   * for the right hand side triples could now be undefined (for example a valid SVN on the left side of the
	   * SET would have generated an OC_SVPUT triple with one of its operands holding the result of the right
	   * hand side evaluation, but an invalid SVN on the left side which would have instead caused an OC_RTERROR triple
	   * to have been generated leaving no triple to receive the result of the right hand side evaluation thus causing
	   * emit_code to be confused and GTMASSERT). Therefore discard all triples generated by the right hand side in this case.
	   * By the same reasoning, discard all triples generated by setleft targets AFTER this invalid one as well.
	   * "first_setleft_invalid" is set to TRUE if the first setleft target is invalid and set to FALSE if the first setleft
	   * target is valid. It is initialized to -1 before the start of the parse.
	   */

	int		index, setop, delimlen;
	int		first_val_lit, last_val_lit, nakedzalias;
	boolean_t	first_is_lit, last_is_lit, got_lparen, delim1char, is_extract, valid_char;
	boolean_t 	alias_processing, have_lh_alias;
	opctype		put_oc;
	oprtype		v, delimval, firstval, lastval, *result, resptr;
	triple		*curtargchain, *delimiter, discardcurtchain, *first, *get, *jmptrp1, *jmptrp2, *last, *obp, *put;
	triple		*s, *s0, *s1, save_targchain, *save_curtchain, *save_curtchain1, *sub, targchain, *tmp;
	mint		delimlit;
	mval		*delim_mval;
	mvar		*mvarptr;
	boolean_t	parse_warn;	/* set to TRUE in case of an invalid SVN etc. */
	boolean_t	curtchain_switched;	/* set to TRUE if a setcurtchain was done */
	int		first_setleft_invalid;	/* set to TRUE if the first setleft target is invalid */
	boolean_t	temp_subs_was_FALSE;
	union
	{
		uint4		unichar_val;
		unsigned char	unibytes_val[4];
	} unichar;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	TREF(temp_subs) = FALSE;
	dqinit(&targchain, exorder);
	result = (oprtype *)mcalloc(SIZEOF(oprtype));
	resptr = put_indr(result);
	delimiter = sub = last = NULL;
	/* A SET clause must be entirely alias related or a normal set. Parenthized multiple sets of aliases are not allowed
	 * and will trigger an error. This is because the source and targets of aliases require different values and references
	 * than normal sets do and thus cannot be mixed.
	 */
	if (alias_processing = (TK_ASTERISK == window_token))
		advancewindow();
	if (got_lparen = (TK_LPAREN == window_token))
	{
		if (alias_processing)
			stx_error(ERR_NOALIASLIST);
		advancewindow();
		TREF(temp_subs) = TRUE;
	}
	/* Some explanation: The triples from the left hand side of the SET expression that are
	 * expressly associated with fetching (in case of set $piece/$extract) and/or storing of
	 * the target value are removed from curtchain and placed on the targchain. Later, these
	 * triples will be added to the end of curtchain to do the finishing store of the target
	 * after the righthand side has been evaluated. This is per the M standard.
	 *
	 * Note that SET $PIECE/$EXTRACT have special conditions in which the first argument is not referenced at all.
	 * (e.g. set $piece(^a," ",3,2) in this case 3 > 2 so this should not evaluate ^a and therefore should not
	 * modify the naked indicator). That is, the triples that do these conditional checks need to be inserted
	 * ahead of the OC_GVNAME of ^a, all of which need to be inserted on the targchain. But the conditionalization
	 * can be done only after parsing the first argument of the SET $PIECE and examining the remaining arguments.
	 * Therefore we maintain the "curtargchain" variable which stores the value of the "targchain" at the beginning
	 * of the iteration (at the start of the $PIECE parsing) and all the conditionalization will be inserted right
	 * here which is guaranteed to be ahead of where the OC_GVNAME gets inserted.
	 *
	 * For example, SET $PIECE(^A(x,y),delim,first,last)=RHS will generate a final triple chain as follows
	 *
	 *	A - Triples to evaluate subscripts (x,y) of the global ^A
	 *	A - Triples to evaluate delim
	 *	A - Triples to evaluate first
	 *	A - Triples to evaluate last
	 *	B - Triples to evaluate RHS
	 *	C - Triples to do conditional check (e.g. first > last etc.)
	 *	C - Triples to branch around if the checks indicate this is a null operation SET $PIECE
	 *	D - Triple that does OC_GVNAME of ^A
	 *	D - Triple that does OC_SETPIECE to determine the new value
	 *	D - Triple that does OC_GVPUT of the new value into ^A(x,y)
	 *	This is the point where the conditional check triples will branch around to if they chose to.
	 *
	 *	A - triples that evaluates the arguments/subscripts in the left-hand-side of the SET command
	 *		These triples are built in "curtchain"
	 *	B - triples that evaluates the arguments/subscripts in the right-hand-side of the SET command
	 *		These triples are built in "curtchain"
	 *	C - triples that do conditional check for any $PIECE/$EXTRACT in the left side of the SET command.
	 *		These triples are built in "curtargchain"
	 *	D - triples that generate the reference to the target of the SET and the store into the target.
	 *		These triples are built in "targchain"
	 *
	 * Note alias processing does not support the SET *(...)=.. type syntax because the type of argument
	 * created for RHS processing is dependent on the LHS receiver type and we do not support more than one
	 * type of source argument in a single SET.
	 */
	first_setleft_invalid = FIRST_SETLEFT_NOTSEEN;
	curtchain_switched = FALSE;
	nakedzalias = have_lh_alias = FALSE;
	save_curtchain = NULL;
	assert(FIRST_SETLEFT_NOTSEEN != TRUE);
	assert(FIRST_SETLEFT_NOTSEEN != FALSE);
	for (parse_warn = FALSE; ; parse_warn = FALSE)
	{
		curtargchain = targchain.exorder.bl;
		jmptrp1 = jmptrp2 = NULL;
		delim1char = is_extract = FALSE;
		allow_dzwrtac_as_mident();	/* Allows $ZWRTACxxx as target to be treated as an mident */
		switch (window_token)
		{
			case TK_IDENT:
				/* A slight diversion first. If this is a $ZWRTAC set (indication of $ in first char
				 * is currently enough to signify that), then we need to check a few conditions first.
				 * If this is a "naked $ZWRTAC", meaning no numeric suffix, then this is a flag that
				 * all the $ZWRTAC vars in the local variable tree need to be kill *'d which will not
				 * be generating a SET instruction. First we need to verify that fact and make sure
				 * we are not in PARENs and not doing alias processing. Note *any* value can be
				 * specified as the source but while it will be evaluated, it is NOT stored anywhere.
				 */
				if ('$' == *window_ident.addr)
				{	/* We have a $ZWRTAC<xx> target */
					if (got_lparen)
						/* We don't allow $ZWRTACxxx to be specified in a parenthesized list.
						 * Verify that first
						 */
						SYNTAX_ERROR(ERR_DZWRNOPAREN);
					if (STR_LIT_LEN(DOLLAR_ZWRTAC) == window_ident.len)
					{	/* Ok, this is a naked $ZWRTAC targeted set */
						if (alias_processing)
							SYNTAX_ERROR(ERR_DZWRNOALIAS);
						nakedzalias = TRUE;
						/* This opcode doesn't really need args but it is easier to fit in with the rest
						 * of m_set processing to pass it the result arg, which there may actually be
						 * a use for someday..
						 */
						put = maketriple(OC_CLRALSVARS);
						put->operand[0] = resptr;
						dqins(targchain.exorder.bl, exorder, put);
						advancewindow();
						break;
					}
				}
				/* If we are doing alias processing, there are two possibilities:
				 *  1) LHS is unsubscripted - it is an alias variable being created or replaced. Need to parse
				 *  the varname as if this were a regular set.
				 *  2) LHS is subscripted - it is an alias container variable being created or replaced. The
				 *  processing here is to pass the base variable index to the store routine so bypass the
				 *  lvn() call.
				 */
				if (!alias_processing || TK_LPAREN == director_token)
				{	/* Normal variable processing or we have a lh alias container */
					if (!lvn(&v, OC_PUTINDX, 0))
						SYNTAX_ERROR_NOREPORT_HERE;
					if (OC_PUTINDX == v.oprval.tref->opcode)
					{
						dqdel(v.oprval.tref, exorder);
						dqins(targchain.exorder.bl, exorder, v.oprval.tref);
						sub = v.oprval.tref;
						put_oc = OC_PUTINDX;
						if (TREF(temp_subs))
							m_set_create_temporaries(sub, put_oc);
					}
				} else
				{	/* Have alias variable. Argument is index into var table rather than pointer to var */
					have_lh_alias = TRUE;
					/* We only want the variable index in this case. Since the entire hash structure to which
					 * this variable is going to be pointing to is changing, doing anything that calls fetch()
					 * is somewhat pointless so we avoid it by just accessing the variable information
					 * directly.
					 */
					mvarptr = get_mvaddr(&window_ident);
					v = put_ilit(mvarptr->mvidx);
					advancewindow();
				}
				/* Determine correct storing triple */
				put = maketriple((!alias_processing ? OC_STO :
						  (have_lh_alias ? OC_SETALS2ALS : OC_SETALSIN2ALSCT)));
				put->operand[0] = v;
				put->operand[1] = resptr;
				dqins(targchain.exorder.bl, exorder, put);
				break;
			case TK_CIRCUMFLEX:
				if (alias_processing)
					SYNTAX_ERROR(ERR_ALIASEXPECTED);
				s1 = curtchain->exorder.bl;
				if (!gvn())
					SYNTAX_ERROR_NOREPORT_HERE;
				for (sub = curtchain->exorder.bl; sub != s1; sub = sub->exorder.bl)
				{
					put_oc = sub->opcode;
					if (OC_GVNAME == put_oc || OC_GVNAKED == put_oc || OC_GVEXTNAM == put_oc)
						break;
				}
				assert(OC_GVNAME == put_oc || OC_GVNAKED == put_oc || OC_GVEXTNAM == put_oc);
				dqdel(sub, exorder);
				dqins(targchain.exorder.bl, exorder, sub);
				if (TREF(temp_subs))
					m_set_create_temporaries(sub, put_oc);
				put = maketriple(OC_GVPUT);
				put->operand[0] = resptr;
				dqins(targchain.exorder.bl, exorder, put);
				break;
			case TK_ATSIGN:
				if (alias_processing)
					SYNTAX_ERROR(ERR_ALIASEXPECTED);
				if (!indirection(&v))
					SYNTAX_ERROR_NOREPORT_HERE;
				if (!got_lparen && TK_EQUAL != window_token)
				{
					assert(!curtchain_switched);
					put = newtriple(OC_COMMARG);
					put->operand[0] = v;
					put->operand[1] = put_ilit(indir_set);
					return TRUE;
				}
				put = maketriple(OC_INDSET);
				put->operand[0] = v;
				put->operand[1] = resptr;
				dqins(targchain.exorder.bl, exorder, put);
				break;
			case TK_DOLLAR:
				if (alias_processing)
					SYNTAX_ERROR(ERR_ALIASEXPECTED);
				advancewindow();
				if (TK_IDENT != window_token)
					SYNTAX_ERROR(ERR_VAREXPECTED);
				if (TK_LPAREN != director_token)
				{	/* Look for intrinsic special variables */
					s1 = curtchain->exorder.bl;
					if (0 > (index = namelook(svn_index, svn_names, window_ident.addr, window_ident.len)))
					{
						STX_ERROR_WARN(ERR_INVSVN);	/* sets "parse_warn" to TRUE */
					} else if (!svn_data[index].can_set)
					{
						STX_ERROR_WARN(ERR_SVNOSET);	/* sets "parse_warn" to TRUE */
					}
					advancewindow();
					if (!parse_warn)
					{
						if (SV_ETRAP != svn_data[index].opcode && SV_ZTRAP != svn_data[index].opcode)
						{	/* Setting of $ZTRAP or $ETRAP must go through opp_svput because they
							 * may affect the stack pointer. All others directly to op_svput().
							 */
							put = maketriple(OC_SVPUT);
						} else
							put = maketriple(OC_PSVPUT);
						put->operand[0] = put_ilit(svn_data[index].opcode);
						put->operand[1] = resptr;
						dqins(targchain.exorder.bl, exorder, put);
					} else
					{	/* OC_RTERROR triple would have been inserted in curtchain by ins_errtriple
						 * (invoked by stx_error). To maintain consistency with the "if" portion of
						 * this code, we need to move this triple to the "targchain".
						 */
						tmp = curtchain->exorder.bl; /* corresponds to put_ilit(FALSE) in ins_errtriple */
						tmp = tmp->exorder.bl;	/* corresponds to put_ilit(in_error) in ins_errtriple */
						tmp = tmp->exorder.bl;	/* corresponds to newtriple(OC_RTERROR) in ins_errtriple */
						assert(OC_RTERROR == tmp->opcode);
						dqdel(tmp, exorder);
						dqins(targchain.exorder.bl, exorder, tmp);
						CHKTCHAIN(&targchain);
					}
					break;
				}
				/* Only 4 function names allowed on left side: $[Z]Piece and $[Z]Extract */
				index = namelook(fun_index, fun_names, window_ident.addr, window_ident.len);
				if (0 > index)
				{
					STX_ERROR_WARN(ERR_INVFCN);	/* sets "parse_warn" to TRUE */
					/* OC_RTERROR triple would have been inserted in "curtchain" by ins_errtriple
					 * (invoked by stx_error). We need to switch it to "targchain" to be consistent
					 * with every other codepath in this module.
					 */
					tmp = curtchain->exorder.bl; /* corresponds to put_ilit(FALSE) in ins_errtriple */
					tmp = tmp->exorder.bl;	/* corresponds to put_ilit(in_error) in ins_errtriple */
					tmp = tmp->exorder.bl;	/* corresponds to newtriple(OC_RTERROR) in ins_errtriple */
					assert(OC_RTERROR == tmp->opcode);
					dqdel(tmp, exorder);
					dqins(targchain.exorder.bl, exorder, tmp);
					CHKTCHAIN(&targchain);
					advancewindow();	/* skip past the function name */
					advancewindow();	/* skip past the left paren */
					/* Parse the remaining arguments until corresponding RIGHT-PAREN/SPACE/EOL is reached */
					if (!parse_until_rparen_or_space())
						SYNTAX_ERROR_NOREPORT_HERE;
				} else
				{
					switch(fun_data[index].opcode)
					{
						case OC_FNPIECE:
							setop = OC_SETPIECE;
							break;
						case OC_FNEXTRACT:
							is_extract = TRUE;
							setop = OC_SETEXTRACT;
							break;
						case OC_FNZPIECE:
							setop = OC_SETZPIECE;
							break;
						case OC_FNZEXTRACT:
							is_extract = TRUE;
							setop = OC_SETZEXTRACT;
							break;
						default:
							SYNTAX_ERROR(ERR_VAREXPECTED);
					}
					advancewindow();
					advancewindow();
					/* Although we see the get (target) variable first, we need to save it's processing
					 * on another chain -- the targchain -- because the retrieval of the target is bypassed
					 * and the naked indicator is not reset if the first/last parameters are not set in a
					 * logical manner (must be > 0 and first <= last). So the evaluation order is
					 * delimiter (if $piece), first, last, RHS of the set and then the target if applicable.
					 * Set up primary action triple now since it is ref'd by the put triples generated below.
					 */
					s = maketriple(setop);
					/* Even for SET[Z]PIECE and SET[Z]EXTRACT, the SETxxxxx opcodes
					 * do not do the final store, they only create the final value TO be
					 * stored so generate the triples that will actually do the store now.
					 * Note we are still building triples on the original curtchain.
					 */
					switch (window_token)
					{
						case TK_IDENT:
							if (!lvn(&v, OC_PUTINDX, 0))
								SYNTAX_ERROR(ERR_VAREXPECTED);
							if (OC_PUTINDX == v.oprval.tref->opcode)
							{
								dqdel(v.oprval.tref, exorder);
								dqins(targchain.exorder.bl, exorder, v.oprval.tref);
								sub = v.oprval.tref;
								put_oc = OC_PUTINDX;
								if (TREF(temp_subs))
									m_set_create_temporaries(sub, put_oc);
							}
							get = maketriple(OC_FNGET);
							get->operand[0] = v;
							put = maketriple(OC_STO);
							put->operand[0] = v;
							put->operand[1] = put_tref(s);
							break;
						case TK_ATSIGN:
							if (!indirection(&v))
								SYNTAX_ERROR(ERR_VAREXPECTED);
							get = maketriple(OC_INDGET);
							get->operand[0] = v;
							get->operand[1] = put_str(0, 0);
							put = maketriple(OC_INDSET);
							put->operand[0] = v;
							put->operand[1] = put_tref(s);
							break;
						case TK_CIRCUMFLEX:
							s1 = curtchain->exorder.bl;
							if (!gvn())
								SYNTAX_ERROR_NOREPORT_HERE;
							for (sub = curtchain->exorder.bl; sub != s1 ; sub = sub->exorder.bl)
							{
								put_oc = sub->opcode;
								if ((OC_GVNAME == put_oc) || (OC_GVNAKED == put_oc)
								    || (OC_GVEXTNAM == put_oc))
									break;
							}
							assert((OC_GVNAME == put_oc) || (OC_GVNAKED == put_oc)
							       || (OC_GVEXTNAM == put_oc));
							dqdel(sub, exorder);
							dqins(targchain.exorder.bl, exorder, sub);
							if (TREF(temp_subs))
								m_set_create_temporaries(sub, put_oc);
							get = maketriple(OC_FNGVGET);
							get->operand[0] = put_str(0, 0);
							put = maketriple(OC_GVPUT);
							put->operand[0] = put_tref(s);
							break;
						default:
							SYNTAX_ERROR(ERR_VAREXPECTED);
					}
					s->operand[0] = put_tref(get);
					/* Code to fetch args for target triple are on targchain. Put get there now too. */
					dqins(targchain.exorder.bl, exorder, get);
					CHKTCHAIN(&targchain);
					if (!is_extract)
					{	/* Set $[z]piece */
						delimiter = newtriple(OC_PARAMETER);
						s->operand[1] = put_tref(delimiter);
						first = newtriple(OC_PARAMETER);
						delimiter->operand[1] = put_tref(first);
						/* Process delimiter string ($[z]piece only) */
						if (TK_COMMA != window_token)
							SYNTAX_ERROR(ERR_COMMA);
						advancewindow();
						if (!strexpr(&delimval))
							SYNTAX_ERROR_NOREPORT_HERE;
						assert(TRIP_REF == delimval.oprclass);
					} else
					{	/* Set $[Z]Extract */
						first = newtriple(OC_PARAMETER);
						s->operand[1] = put_tref(first);
					}
					/* Process first integer value */
					if (window_token != TK_COMMA)
						firstval = put_ilit(1);
					else
					{
						advancewindow();
						if (!intexpr(&firstval))
							SYNTAX_ERROR(ERR_COMMA);
						assert(firstval.oprclass == TRIP_REF);
					}
					first->operand[0] = firstval;
					if (first_is_lit = (OC_ILIT == firstval.oprval.tref->opcode))
					{
						assert(ILIT_REF ==firstval.oprval.tref->operand[0].oprclass);
						first_val_lit = firstval.oprval.tref->operand[0].oprval.ilit;
					}
					if (TK_COMMA != window_token)
					{	/* There is no "last" value. Only if 1 char literal delimiter and
						 * no "last" value can we generate shortcut code to op_set[z]p1 entry
						 * instead of op_set[z]piece. Note if UTF8 mode is in effect, then this
						 * optimization applies if the literal is one unicode char which may in
						 * fact be up to 4 bytes but will still be passed as a single unsigned
						 * integer.
						 */
						if (!is_extract)
						{
							delim_mval = &delimval.oprval.tref->operand[0].oprval.mlit->v;
							valid_char = TRUE;	/* Basic assumption unles proven otherwise */
							if (delimval.oprval.tref->opcode == OC_LIT &&
							    (1 == (gtm_utf8_mode ?
								   MV_FORCE_LEN(delim_mval) : delim_mval->str.len)))
							{	/* Single char delimiter for set $piece */
								UNICODE_ONLY(
									if (gtm_utf8_mode)
									{	/*  We have a supposed single char delimiter but it
										 *  must be a valid utf8 char to be used by
										 *  op_setp1() and MV_FORCE_LEN won't tell us that.
										 */
										valid_char = UTF8_VALID(delim_mval->str.addr,
													(delim_mval->str.addr
													 + delim_mval->str.len),
													delimlen);
										if (!valid_char && !badchar_inhibit)
											UTF8_BADCHAR(0, delim_mval->str.addr,
												     (delim_mval->str.addr
												      + delim_mval->str.len),
												     0, NULL);
									}
									     );
								if (valid_char || 1 == delim_mval->str.len)
								{	/* This reference to a one character literal or a single
									 * byte invalid utf8 character that needs to be turned into
									 * an explict formated integer literal instead
									 */
									unichar.unichar_val = 0;
									if (!gtm_utf8_mode)
									{	/* Single byte delimiter */
										assert(1 == delim_mval->str.len);
										UNIX_ONLY(s->opcode = OC_SETZP1);
										VMS_ONLY(s->opcode = OC_SETP1);
										unichar.unibytes_val[0] = *delim_mval->str.addr;
									}
									UNICODE_ONLY(
								        else
									{	/* Potentially multiple bytes in one int */
										assert(SIZEOF(int) >= delim_mval->str.len);
										memcpy(unichar.unibytes_val,
										       delim_mval->str.addr,
										       delim_mval->str.len);
										s->opcode = OC_SETP1;
									}
										     );
									delimlit = (mint)unichar.unichar_val;
									delimiter->operand[0] = put_ilit(delimlit);
									delim1char = TRUE;
								}
							}
						}
示例#6
0
/* write ASCII characters converting to UTF16 if needed
 * 	returns bytes written
*/
int  iorm_write_utf_ascii(io_desc *iod, char *string, int len)
{
	int		outlen, mblen, status;
	wint_t		utf_code;
	unsigned char	*outstart, *out, *top, *outptr, *nextoutptr, *outptrtop, *nextmb;
	d_rm_struct	*rm_ptr;
	boolean_t	ch_set;

	ESTABLISH_RET_GTMIO_CH(&iod->pair, -1, ch_set);
	rm_ptr = (d_rm_struct *)iod->dev_sp;
	assert(NULL != rm_ptr);
	if (CHSET_UTF8 != iod->ochset)
	{
		outstart = outptr = &rm_ptr->outbuf[rm_ptr->out_bytes];
		outptrtop = rm_ptr->outbuf + rm_ptr->recordsize;	/* buffer is larger than recordsize to allow for EOL */
		assert(len <= (&rm_ptr->outbuf[rm_ptr->outbufsize] - outstart));
		for (out = (unsigned char*)string, top = out + len, outlen = 0; out < top && outptr <= outptrtop;
				out = nextmb, outptr = nextoutptr)
		{
			nextmb = UTF8_MBTOWC(out, top, utf_code);
			assert(nextmb == (out + 1));
			if (WEOF == utf_code)
			{
				iod->dollar.za = 9;
				UTF8_BADCHAR((int)(nextmb - out), out, top, 0, NULL);
			}
			if (CHSET_UTF16BE == iod->ochset)
				nextoutptr = UTF16BE_WCTOMB(utf_code, outptr);
			else
				nextoutptr = UTF16LE_WCTOMB(utf_code, outptr);
			if (nextoutptr == outptr)
			{	/* invalid codepoint */
				iod->dollar.za = 9;
				UTF8_BADCHAR((int)(nextmb - out), out, top, chset_names[iod->ochset].len,
						chset_names[iod->ochset].addr);
			}
			mblen = (int)(nextoutptr - outptr);
			outlen += mblen;
		}
	} else
	{
		outstart = (unsigned char *)string;
		outlen = len;
	}
	if (0 < outlen)
	{
		if (rm_ptr->output_encrypted)
		{
			REALLOC_CRYPTBUF_IF_NEEDED(outlen);
			WRITE_ENCRYPTED_DATA(rm_ptr, iod->trans_name, outstart, outlen, pvt_crypt_buf.addr);
			outptr = (unsigned char *)pvt_crypt_buf.addr;
		} else
			outptr = outstart;
		DOWRITERC_RM(rm_ptr, outptr, outlen, status);
		ISSUE_NOPRINCIO_IF_NEEDED_RM(status, ==, iod);
		rm_ptr->write_occurred = TRUE;
		rm_ptr->out_bytes += outlen;
	}
	REVERT_GTMIO_CH(&iod->pair, ch_set);
	return outlen;
}
示例#7
0
文件: op_setp1.c 项目: mihawk/fis-gtm
/*
 * ----------------------------------------------------------
 * Fast path setpiece when delimiter is one (lit) char replacing
 * a single piece (last is same as first). Unicode flavor.
 *
 * Arguments:
 *	src	- source mval
 *	delim	- delimiter char
 *	expr	- expression string mval
 *	ind	- index in source mval to be set
 *	dst	- destination mval where the result is saved.
 *
 * Return:
 *	none
 * ----------------------------------------------------------
 */
void op_setp1(mval *src, int delim, mval *expr, int ind, mval *dst)
{
	size_t		str_len, delim_cnt;
	int		len, pfx_str_len, sfx_start_offset, sfx_str_len, rep_str_len, pfx_scan_offset;
	int		dlmlen, cpy_cache_lines, mblen;
	unsigned char	*start_sfx, *str_addr, *end_pfx, *end_src, *start_pfx;
	boolean_t	do_scan, delim_last_scan, valid_char;
	mval		dummymval;	/* It's value is not used but is part of the call to op_fnp1() */
	fnpc		*cfnpc, *pfnpc;
	delimfmt	ldelim;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(gtm_utf8_mode);
	do_scan = FALSE;
	cpy_cache_lines = -1;
	ldelim.unichar_val = delim;
        if (!UTF8_VALID(ldelim.unibytes_val, (ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val)), dlmlen)
	    && !badchar_inhibit)
	{	/* The delimiter is a bad character so error out if badchar not inhibited */
		UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
	}
	MV_FORCE_STR(expr);	/* Expression to put into piece place */
	if (MV_DEFINED(src))
	{
		/* We have 3 possible scenarios:
		 * 1) The source string is null. Nothing to do but proceed to building output.
		 * 2) If the requested piece is larger than can be cached by op_fnp1, call fnp1
		 *    for the maximum piece possible, use the cache info to "prime the pump" and
		 *    then process the rest of the string ourselves.
		 * 3) If the requested piece can be obtained from the cache, call op_fnp1 to validate
		 *    and rebuild the cache if necessary and then retrieve the necessary info from
		 *    the fnpc cache.
		 */
		MV_FORCE_STR(src);	/* Make sure is string prior to length check */
		if (0 == src->str.len)
		{	/* We have a null source string */
			pfx_str_len = sfx_str_len = sfx_start_offset = 0;
			delim_cnt = (0 < ind) ? (size_t)ind - 1 : 0;
		} else if (FNPC_ELEM_MAX >= ind)
		{	/* 3) Best of all possible cases. The op_fnp1 can do most of our work for us
			 *    and we can preload the cache on the new string to help its subsequent
			 *    uses along as well.
			 */
			SETWON;
			op_fnp1(src, delim, ind, &dummymval);
			SETWOFF;
			cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
			assert(cfnpc->last_str.addr == src->str.addr);
			assert(cfnpc->last_str.len == src->str.len);
			assert(cfnpc->delim == delim);
			assert(0 < cfnpc->npcs);
			/* Three more scenarios: #1 piece all in cache, #2 piece would be in cache but ran
			 * out of text or #3 piece is beyond what can be cached
			 */
			if (cfnpc->npcs >= ind)
			{	/* #1 The piece we want is totally within the cache which is good news */
				pfx_str_len = cfnpc->pstart[ind - 1];
				delim_cnt = 0;
				sfx_start_offset = cfnpc->pstart[ind] - dlmlen;				/* Include delimiter */
				rep_str_len = cfnpc->pstart[ind] - cfnpc->pstart[ind - 1] - dlmlen;	/* Replace string length */
				sfx_str_len = src->str.len - pfx_str_len - rep_str_len;
				cpy_cache_lines = ind - 1;
			} else
			{	/* #2 The string was too short so the cache does not contain our string. This means
				 * that the prefix becomes any text that IS in the cache and we set the delim_cnt
				 * to be the number of missing pieces so the delimiters can be put in as part of the
				 * prefix when we build the new string.
				 */
				pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
				delim_cnt = (size_t)(ind - cfnpc->npcs);
				sfx_start_offset = 0;
				sfx_str_len = 0;
				cpy_cache_lines = cfnpc->npcs;
			}
		} else
		{	/* 2) We have a element that would not be able to be in the fnpc cache. Go ahead
			 *    and call op_fnp1 to get cache info up to the maximum and then we will continue
			 *    the scan on our own.
			 */
			SETWON;
			op_fnp1(src, delim, FNPC_ELEM_MAX, &dummymval);
			SETWOFF;
			cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
			assert(cfnpc->last_str.addr == src->str.addr);
			assert(cfnpc->last_str.len == src->str.len);
			assert(cfnpc->delim == delim);
			assert(0 < cfnpc->npcs);
			if (FNPC_ELEM_MAX > cfnpc->npcs)
			{	/* We ran out of text so the scan is complete. This is basically the same
				 * as case #2 above.
				 */
				pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
				delim_cnt = (size_t)(ind - cfnpc->npcs);
				sfx_start_offset = 0;
				sfx_str_len = 0;
				cpy_cache_lines = cfnpc->npcs;
			} else
			{	/* We have a case where the piece we want cannot be kept in cache. In the special
				 * case where there is no more text to handle, we don't need to scan further. Otherwise
				 * we prime the pump and continue the scan where the cache left off.
				 */
				if ((pfx_scan_offset = cfnpc->pstart[FNPC_ELEM_MAX]) < src->str.len)	/* Note assignment */
					/* Normal case where we prime the pump */
					do_scan = TRUE;
				else
				{	/* Special case -- no more text to scan */
					pfx_str_len = cfnpc->pstart[FNPC_ELEM_MAX] - dlmlen;
					sfx_start_offset = 0;
					sfx_str_len = 0;
				}
				delim_cnt = (size_t)ind - FNPC_ELEM_MAX;
				cpy_cache_lines = FNPC_ELEM_MAX;
			}
		}
	} else
	{	/* Source is not defined -- treat as a null string */
		pfx_str_len = sfx_str_len = sfx_start_offset = 0;
		delim_cnt = (size_t)ind - 1;
	}
	/* If we have been forced to do our own scan, do that here. Note the variable pfx_scan_offset has been
	 * set to where the scan should begin in the src string and delim_cnt has been set to how many delimiters
	 * still need to be processed.
	 */
	if (do_scan)
	{	/* Scan the line isolating prefix piece, and end of the
		 * piece being replaced
		 */
		COUNT_EVENT(small);
		end_pfx = start_sfx = (unsigned char *)src->str.addr + pfx_scan_offset;
		end_src = (unsigned char *)src->str.addr + src->str.len;
		/* The compiler would unroll this loop this way anyway but we want to
		 * adjust the start_sfx pointer after the loop but only if we have gone
		 * into it at least once.
		 */
		if ((0 < delim_cnt) && (start_sfx < end_src))
		{
			do
			{
				end_pfx = start_sfx;
				delim_last_scan = FALSE;		/* Whether delimiter is last character scanned */
				while (start_sfx < end_src)
				{
					valid_char = UTF8_VALID(start_sfx, end_src, mblen); /* Length of next char */
					if (!valid_char)
					{	/* Next character is not valid unicode. If badchar error is not inhibited,
						 * signal it now. If it is inhibited, just treat the character as a single
						 * character and continue.
						 */
						if (!badchar_inhibit)
							utf8_badchar(0, start_sfx, end_src, 0, NULL);
						assert(1 == mblen);
					}
					/* Getting mblen first allows us to do quick length compare before the
					 * heavier weight memcmp call.
					 */
					assert(0 < mblen);
					if (mblen == dlmlen && 0 == memcmp(start_sfx, ldelim.unibytes_val, dlmlen))
					{
						delim_last_scan = TRUE;
						break;
					}
					/* Increment ptrs by size of found char */
					start_sfx += mblen;
				}
				start_sfx += dlmlen;
				delim_cnt--;
			} while ((0 < delim_cnt) && (start_sfx < end_src));
			/* We have to backup up the suffix start pointer except under the condition
			 * that the last character in the buffer is the last delimiter we were looking
			 * for.
			 */
			if ((0 == delim_cnt) || (start_sfx < end_src) || !delim_last_scan)
				start_sfx -= dlmlen;			/* Back up suffix to include delimiter char */
			/* If we scanned to the end (no text left) and still have delimiters to
			 * find, the entire src text should be part of the prefix
			 */
			if ((start_sfx >= end_src) && (0 < delim_cnt))
			{
				end_pfx = start_sfx;
				if (delim_last_scan)			/* if last char was delim, reduce delim cnt */
					--delim_cnt;
			}
		} else
		{
			/* If not doing any token finding, then this count becomes the number
			 * of tokens to output. Adjust accordingly.
			 */
			if (0 < delim_cnt)
				--delim_cnt;
		}
		INCR_COUNT(small_pcs, (int)((size_t)ind - delim_cnt));
		/* Now having the following situation:
		 * end_pfx	-> end of the prefix piece including delimiter
		 * start_sfx	-> start of suffix piece (with delimiter) or = end_pfx/src->str.addr if none
		 */
		pfx_str_len = (int)(end_pfx - (unsigned char *)src->str.addr);
		if (0 > pfx_str_len)
			pfx_str_len = 0;
		sfx_start_offset = (int)(start_sfx - (unsigned char *)src->str.addr);
		sfx_str_len = src->str.len - sfx_start_offset;
		if (0 > sfx_str_len)
			sfx_str_len = 0;
	}
	/* Calculate total string len. delim_cnt has needed padding delimiters for null fields */
	str_len = (size_t)expr->str.len + (size_t)pfx_str_len + (delim_cnt * (size_t)dlmlen) + (size_t)sfx_str_len;
	if (MAX_STRLEN < str_len)
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
	ENSURE_STP_FREE_SPACE((int)str_len);
	str_addr = stringpool.free;
	start_pfx = (unsigned char *)src->str.addr;
	/* copy prefix */
	if (0 < pfx_str_len)
	{
		memcpy(str_addr, src->str.addr, pfx_str_len);
		str_addr += pfx_str_len;
	}
	/* copy delimiters */
	while (0 < delim_cnt--)
	{
		memcpy(str_addr, ldelim.unibytes_val, dlmlen);
		str_addr += dlmlen;
	}
	/* copy expression */
	if (0 < expr->str.len)
	{
		memcpy(str_addr, expr->str.addr, expr->str.len);
		str_addr += expr->str.len;
	}
	/* copy suffix */
	if (0 < sfx_str_len)
	{
		memcpy(str_addr, start_pfx + sfx_start_offset, sfx_str_len);
		str_addr += sfx_str_len;
	}
	assert(IS_AT_END_OF_STRINGPOOL(str_addr, -str_len));
	dst->mvtype = MV_STR;
	dst->str.len = INTCAST(str_addr - stringpool.free);
	dst->str.addr = (char *)stringpool.free;
	stringpool.free = str_addr;
	/* If available, update the cache information for this newly created mval to hopefully
	 * give it a head start on its next usage. Note that we can only copy over the cache info
	 * for the prefix. We cannot include information for the 'expression' except where it starts
	 * because the expression could itself contain delimiters that would be found on a rescan.
	 */
	if (0 < cpy_cache_lines)
	{
		pfnpc = cfnpc;				/* pointer for src mval's cache */
		do
		{
			cfnpc = (TREF(fnpca)).fnpcsteal;	/* Next cache element to steal */
			if ((TREF(fnpca)).fnpcmax < cfnpc)
				cfnpc = &(TREF(fnpca)).fnpcs[0];
			(TREF(fnpca)).fnpcsteal = cfnpc + 1;	/* -> next element to steal */
		} while (cfnpc == pfnpc);		/* Make sure we don't step on ourselves */
		cfnpc->last_str = dst->str;		/* Save validation info */
		cfnpc->delim = delim;
		cfnpc->npcs = cpy_cache_lines;
		dst->fnpc_indx = cfnpc->indx + 1;	/* Save where we are putting this element
							 * (1 based index in mval so 0 isn't so common)
							 */
		memcpy(&cfnpc->pstart[0], &pfnpc->pstart[0], (cfnpc->npcs + 1) * SIZEOF(unsigned int));
	} else
示例#8
0
void op_fnp1(mval *src, int delim, int trgpcidx,  mval *dst)
{
	unsigned char	*first, *last, *start, *end;
	unsigned int	*pcoff, *pcoffmax, fnpc_indx, slen;
	int		trgpc, cpcidx, spcidx, mblen, dlmlen;
	boolean_t       valid_char;
	mval		ldst;		/* Local copy since &dst == &src .. move to dst at return */
	fnpc   		*cfnpc;
	delimfmt	ldelim;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(gtm_utf8_mode);
	MV_FORCE_STR(src);
	ldelim.unichar_val = delim;
	if (!UTF8_VALID(ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), dlmlen) &&
			!badchar_inhibit)
	{ /* The delimiter is a bad character so error out if badchar not inhibited */
		UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
	}

	ldst.mvtype = MV_STR;
	start = first = last = (unsigned char *)src->str.addr;
	slen = src->str.len;
	end = start + slen;

	/* Detect annoyance cases and deal with quickly so we don't muck up the
	   logic below trying to handle it properly */
	if (0 >= trgpcidx || 0 == slen)
	{
		ldst.str.addr = (char *)start;
		ldst.str.len  = 0;
		*dst = ldst;
		return;
	}

	/* Test mval for valid cache: index ok, mval addr same, delim same. One additional test
	 * is if the cache entry is byte_oriented, then this cache entry was created by $ZPIECE
	 * (using bytes) and since its results are not same as $PIECE(), we must ignore the cache
	 * and rebuild it for this mval. */
	fnpc_indx = src->fnpc_indx - 1;
	cfnpc = &(TREF(fnpca)).fnpcs[fnpc_indx];
	if (FNPC_MAX > fnpc_indx && cfnpc->last_str.addr == (char *)first &&
	    cfnpc->last_str.len == slen && cfnpc->delim == ldelim.unichar_val &&
	    !cfnpc->byte_oriented) /* cannot use the cache created by an earlier $ZPIECE() */
	{
		/* Have valid cache. See if piece we want already in cache */
		COUNT_EVENT(hit);
		INCR_COUNT(pskip, cfnpc->npcs);

		if (trgpcidx <= cfnpc->npcs)
		{
			/* Piece is totally in cache no scan needed */
			ldst.str.addr = (char *)first + cfnpc->pstart[trgpcidx - 1];
			ldst.str.len = cfnpc->pstart[trgpcidx] - cfnpc->pstart[trgpcidx - 1] - dlmlen;
			assert(ldst.str.len >= 0 && ldst.str.len <= src->str.len);
			*dst = ldst;
			return;
		} else
		{
			/* Not in cache but pick up scan where we left off */
			cpcidx = cfnpc->npcs;
			first = last = start + cfnpc->pstart[cpcidx];	/* First byte of next pc */
			pcoff = &cfnpc->pstart[cpcidx];
			if (pcoff == cfnpc->pcoffmax)
				++pcoff; 		/* No further updates to pstart array */
			++cpcidx;			/* Now past last piece and on to next one */
			COUNT_EVENT(parscan);
		}
	} else
	{
		/* The piece cache index or mval validation was incorrect.
		   Start from the beginning */

		COUNT_EVENT(miss);

		/* Need to steal a new piece cache, get "least recently reused" */
		cfnpc = (TREF(fnpca)).fnpcsteal;	/* Get next element to steal */
		if ((TREF(fnpca)).fnpcmax < cfnpc)
			cfnpc = &(TREF(fnpca)).fnpcs[0];
		(TREF(fnpca)).fnpcsteal = cfnpc + 1;	/* -> next element to steal */

		cfnpc->last_str = src->str;		/* Save validation info */
		cfnpc->delim = ldelim.unichar_val;
		cfnpc->npcs = 0;
		cfnpc->byte_oriented = FALSE;
		src->fnpc_indx = cfnpc->indx + 1;	/* Save where we are putting this element
							   (1 based index in mval so 0 isn't so common) */
		pcoff = &cfnpc->pstart[0];
		cpcidx = 1;				/* current piece index */
	}

	/* Do scan filling in offsets of pieces if they fit in the cache */
	spcidx = cpcidx;				/* Starting value for search */
	pcoffmax = cfnpc->pcoffmax;			/* Local end of array value */
	while (cpcidx <= trgpcidx && last < end)
	{
		/* Once through for each piece we pass, last time through to find length of piece we want */
		first = last;				/* first char of current piece */
		while (last < end)
		{
			valid_char = UTF8_VALID(last, end, mblen);	/* Length of next char */
			if (!valid_char)
			{	/* Next character is not valid unicode. If badchar error is not inhibited,
				   signal it now. If it is inhibited, just treat the character as a single
				   character and continue.
				*/
				if (!badchar_inhibit)
					utf8_badchar(0, last, end, 0, NULL);
				assert(1 == mblen);
			}
			/* Getting mblen first allows us to do quick length compare before the
			   heavier weight memcmp call.
			*/
			assert(0 < mblen);
			if (mblen == dlmlen)
			{
				if (1 == dlmlen)
				{
					if (*last == ldelim.unibytes_val[0])			/* Shortcut - test single byte */
						break;
				} else if (0 == memcmp(last, ldelim.unibytes_val, dlmlen))	/* Longcut - for multibyte chk */
					break;
			}
			last += mblen;  	/* Find delim signaling end of piece */
		}
		last += dlmlen;				/* Bump past delim to first byte of next piece. The length of
							   the delimiter is assumed in the pcoff array and is removed
							   when piece length is calculated so even if we hit the end of
							   the scanned source, we bump the pointer so this extra length
							   is reflected in the pcoff array consistently.
							*/
		++cpcidx;				/* Next piece */
		if (pcoff < pcoffmax)
			*pcoff++ =(unsigned int)(first - start);	/* offset to this piece */
		if (pcoff == pcoffmax)
			*pcoff++ = (unsigned int)(last - start);	/* store start of first piece beyond what is in cache */
	}

	ldst.str.addr = (char *)first;

	/* If we scanned some chars, adjust end pointer and save end of final piece */
	if (spcidx != cpcidx)
	{
		if (pcoff < pcoffmax)
			*pcoff = (unsigned int)(last - start);		/* If not at end of cache, save start of "next" piece */

		last -= dlmlen;				/* Undo bump past last delim (existing or not)
							   of piece for accurate string len */
		/* Update count of pieces in cache */
		cfnpc->npcs = MIN((cfnpc->npcs + cpcidx - spcidx), FNPC_ELEM_MAX);
		assert(cfnpc->npcs <= FNPC_ELEM_MAX);
		assert(cfnpc->npcs > 0);

		/* If we the above loop ended prematurely because we ran out of text, we return null string */
		if (trgpcidx < cpcidx)
			ldst.str.len = INTCAST(last - first);	/* Length of piece we located */
		else
			ldst.str.len = 0;

		INCR_COUNT(pscan, cpcidx - spcidx);	/* Pieces scanned */
	} else
		ldst.str.len  = 0;

	assert(cfnpc->npcs > 0);
	assert(ldst.str.len >= 0 && ldst.str.len <= src->str.len);
	*dst = ldst;
	return;
}