Ejemplo n.º 1
0
/*
 * ----------------------------------------------------------
 * Fast path setpiece when delimiter is one (lit) char replacing
 * a single piece (last is same as first).
 *
 * Arguments:
 *	src	- source mval
 *	delim	- delimiter char
 *	expr	- expression string mval
 *	ind	- index in source mval to be set
 *	dst	- destination mval where the result is saved.
 *
 * Return:
 *	none
 * ----------------------------------------------------------
 */
void op_setp1(mval *src, int delim, mval *expr, int ind, mval *dst)
{
    int		len, pfx_str_len, sfx_start_offset, sfx_str_len, rep_str_len, str_len, delim_cnt, pfx_scan_offset;
    int		cpy_cache_lines;
    unsigned char	ldelim, lc, *start_sfx, *str_addr, *end_pfx, *end_src, *start_pfx;
    boolean_t	do_scan;
    mval		dummymval;	/* It's value is not used but is part of the call to op_fnp1() */
    fnpc		*cfnpc, *pfnpc;

    error_def(ERR_MAXSTRLEN);

    ldelim = delim;		/* Local copy (in unsigned char format) */
    do_scan = FALSE;
    cpy_cache_lines = -1;

    MV_FORCE_STR(expr);	/* Expression to put into piece place */
    if (MV_DEFINED(src))
    {
        /* We have 3 possible scenarios:

           1) If length of src is too small to cause cacheing by op_fnp1, then just do
           the work ourselves with no cacheing.
           2) If the requested piece is larger than can be cached by op_fnp1, call fnp1
           for the maximum piece possible, use the cache info to "prime the pump" and
           then process the rest of the string ourselves.
           3) If the requested piece can be obtained from the cache, call op_fnp1 to validate
           and rebuild the cache if necessary and then retrieve the necessary info from
           the fnpc cache.
        */
        MV_FORCE_STR(src);	/* Make sure is string prior to length check */
        if (FNPC_STRLEN_MIN < src->str.len && FNPC_ELEM_MAX >= ind)
        {   /* 3) Best of all possible cases. The op_fnp1 can do most of our work for us
               and we can preload the cache on the new string to help its subsequent
               uses along as well.
            */
            SETWON;
            op_fnp1(src, delim, ind, &dummymval, FALSE);
            SETWOFF;
            cfnpc = &fnpca.fnpcs[src->fnpc_indx - 1];
            assert(cfnpc->last_str.addr == src->str.addr);
            assert(cfnpc->last_str.len == src->str.len);
            assert(cfnpc->delim == delim);
            assert(0 < cfnpc->npcs);
            /* Three more scenarios: #1 piece all in cache, #2 piece would be in cache but ran
               out of text or #3 piece is beyond what can be cached
            */
            if (cfnpc->npcs >= ind)
            {   /* #1 The piece we want is totally within the cache which is good news */
                pfx_str_len = cfnpc->pstart[ind - 1];
                delim_cnt = 0;
                sfx_start_offset = cfnpc->pstart[ind] - 1;			/* Include delimiter */
                rep_str_len = cfnpc->pstart[ind] - cfnpc->pstart[ind - 1] - 1;	/* Replace string length */
                sfx_str_len = src->str.len - pfx_str_len - rep_str_len;
                cpy_cache_lines = ind - 1;
            } else
            {   /* #2 The string was too short so the cache does not contain our string. This means
                   that the prefix becomes any text that IS in the cache and we set the delim_cnt
                   to be the number of missing pieces so the delimiters can be put in as part of the
                   prefix when we build the new string.
                */
                pfx_str_len = cfnpc->pstart[cfnpc->npcs] - 1;
                delim_cnt = ind - cfnpc->npcs;
                sfx_start_offset = 0;
                sfx_str_len = 0;
                cpy_cache_lines = cfnpc->npcs;
            }
        } else if (FNPC_STRLEN_MIN < src->str.len)
        {   /* 2) We have a element that would not be able to be in the fnpc cache. Go ahead
               and call op_fnp1 to get cache info up to the maximum and then we will continue
               the scan on our own.
            */
            SETWON;
            op_fnp1(src, delim, FNPC_ELEM_MAX, &dummymval, FALSE);
            SETWOFF;
            cfnpc = &fnpca.fnpcs[src->fnpc_indx - 1];
            assert(cfnpc->last_str.addr == src->str.addr);
            assert(cfnpc->last_str.len == src->str.len);
            assert(cfnpc->delim == delim);
            assert(0 < cfnpc->npcs);
            if (FNPC_ELEM_MAX > cfnpc->npcs)
            {   /* We ran out of text so the scan is complete. This is basically the same
                   as case #2 above.
                */
                pfx_str_len = cfnpc->pstart[cfnpc->npcs] - 1;
                delim_cnt = ind - cfnpc->npcs;
                sfx_start_offset = 0;
                sfx_str_len = 0;
                cpy_cache_lines = cfnpc->npcs;
            } else
            {   /* We have a case where the piece we want cannot be kept in cache. In the special
                   case where there is no more text to handle, we don't need to scan further. Otherwise
                   we prime the pump and continue the scan where the cache left off.
                */
                if ((pfx_scan_offset = cfnpc->pstart[FNPC_ELEM_MAX]) < src->str.len)
                {   /* Normal case where we prime the pump */
                    do_scan = TRUE;
                } else
                {   /* Special case -- no more text to scan */
                    pfx_str_len = cfnpc->pstart[FNPC_ELEM_MAX] - 1;
                    sfx_start_offset = 0;
                    sfx_str_len = 0;
                }
                delim_cnt = ind - FNPC_ELEM_MAX;
                cpy_cache_lines = FNPC_ELEM_MAX;
            }

        } else
        {   /* 1) We have a short string where no cacheing happens. Do the scanning work ourselves */
            MV_FORCE_STR(src);
            do_scan = TRUE;
            pfx_scan_offset = 0;
            delim_cnt = ind;
        }
    } else
    {   /* Source is not defined -- treat as a null string */
        pfx_str_len = sfx_str_len = sfx_start_offset = 0;
        delim_cnt = ind - 1;
    }

    /* If we have been forced to do our own scan, do that here. Note the variable pfx_scan_offset has been
       set to where the scan should begin in the src string and delim_cnt has been set to how many delimiters
       still need to be processed.
    */
    if (do_scan)
    {   /* Scan the line isolating prefix piece, and end of the
           piece being replaced
        */
        COUNT_EVENT(cs_small);
        end_pfx = start_sfx = (unsigned char *)src->str.addr + pfx_scan_offset;
        end_src = (unsigned char *)src->str.addr + src->str.len;

        /* The compiler would unroll this loop this way anyway but we want to
           adjust the start_sfx pointer after the loop but only if we have gone
           into it at least once.
        */
        if (0 < delim_cnt && start_sfx < end_src)
        {
            do
            {
                end_pfx = start_sfx;
                while (start_sfx < end_src && (lc = *start_sfx) != ldelim) start_sfx++;
                start_sfx++;
                delim_cnt--;
            } while (0 < delim_cnt && start_sfx < end_src);

            /* We have to backup up the suffix start pointer except under the condition
               that the last character in the buffer is the last delimiter we were looking
               for.
            */
            if (0 == delim_cnt || start_sfx < end_src || lc != ldelim)
                --start_sfx;				/* Back up suffix to include delimiter char */

            /* If we scanned to the end (no text left) and still have delimiters to
               find, the entire src text should be part of the prefix */
            if (start_sfx >= end_src && 0 < delim_cnt)
            {
                end_pfx = start_sfx;
                if (lc == ldelim)			/* if last char was delim, reduce delim cnt */
                    --delim_cnt;
            }

        } else
        {
            /* If not doing any token finding, then this count becomes the number
               of tokens to output. Adjust accordingly.
            */
            if (0 > --delim_cnt)
                delim_cnt = 0;
        }
        INCR_COUNT(cs_small_pcs, ind - delim_cnt);

        /* Now having the following situation:
           end_pfx	-> end of the prefix piece including delimiter
           start_sfx	-> start of suffix piece (with delimiter) or = end_pfx/src->str.addr if none
        */
        pfx_str_len = end_pfx - (unsigned char *)src->str.addr;
        if (0 > pfx_str_len)
            pfx_str_len = 0;
        sfx_start_offset = start_sfx - (unsigned char *)src->str.addr;
        sfx_str_len = src->str.len - sfx_start_offset;
        if (0 > sfx_str_len)
            sfx_str_len = 0;
    }

    /* Calculate total string len. delim_cnt has needed padding delimiters for null fields */
    str_len = expr->str.len + pfx_str_len + delim_cnt + sfx_str_len;
    if (str_len > MAX_STRLEN)
        rts_error(VARLSTCNT(1) ERR_MAXSTRLEN);
    if (str_len > (stringpool.top - stringpool.free))
        stp_gcol(str_len);
    str_addr = stringpool.free;
    start_pfx = (unsigned char *)src->str.addr;

    /* copy prefix */
    if (0 < pfx_str_len)
    {
        memcpy(str_addr, src->str.addr, pfx_str_len);
        str_addr += pfx_str_len;
    }

    /* copy delimiters */
    while (delim_cnt-- > 0)
        *str_addr++ = ldelim;

    /* copy expression */
    if (0 < expr->str.len)
    {
        memcpy(str_addr, expr->str.addr, expr->str.len);
        str_addr += expr->str.len;
    }

    /* copy suffix */
    if (0 < sfx_str_len)
    {
        memcpy(str_addr, start_pfx + sfx_start_offset, sfx_str_len);
        str_addr += sfx_str_len;
    }

    assert(str_addr - stringpool.free == str_len);
    dst->mvtype = MV_STR;
    dst->str.len = str_addr - stringpool.free;
    dst->str.addr = (char *)stringpool.free;
    stringpool.free = str_addr;

    /* If available, update the cache information for this newly created mval to hopefully
       give it a head start on its next usage. Note that we can only copy over the cache info
       for the prefix. We cannot include information for the 'expression' except where it starts
       because the expression could itself contain delimiters that would be found on a rescan.
    */
    if (0 < cpy_cache_lines)
    {
        pfnpc = cfnpc;				/* pointer for src mval's cache */
        do
        {
            cfnpc = fnpca.fnpcsteal;	/* Next cache element to steal */
            if (fnpca.fnpcmax < cfnpc)
                cfnpc = &fnpca.fnpcs[0];
            fnpca.fnpcsteal = cfnpc + 1;	/* -> next element to steal */
        } while (cfnpc == pfnpc);		/* Make sure we don't step on ourselves */

        cfnpc->last_str = dst->str;		/* Save validation info */
        cfnpc->delim = delim;
        cfnpc->npcs = cpy_cache_lines;
        dst->fnpc_indx = cfnpc->indx + 1;	/* Save where we are putting this element
							   (1 based index in mval so 0 isn't so common) */
        memcpy(&cfnpc->pstart[0], &pfnpc->pstart[0], (cfnpc->npcs + 1) * sizeof(unsigned int));
    } else
    {   /* No cache available -- just reset index pointer to get fastest cache validation failure */
        dst->fnpc_indx = -1;
    }
}
Ejemplo n.º 2
0
void op_fnzdate(mval *src, mval *fmt, mval *mo_str, mval *day_str, mval *dst)
{
	unsigned char 	ch, *fmtptr, *fmttop, *i, *outptr, *outtop, *outpt1;
	int 		cent, day, dow, month, nlen, outlen, time, year;
	unsigned int	n;
	mval 		temp_mval;

	static readonly unsigned char montab[] = {31,28,31,30,31,30,31,31,30,31,30,31};
	static readonly unsigned char default1[] = DEFAULT1;
	static readonly unsigned char default2[] = DEFAULT2;
	static readonly unsigned char default3[] = DEFAULT3;
	static readonly unsigned char defmonlst[] = "JANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDEC";
	static readonly unsigned char defdaylst[] = "SUNMONTUEWEDTHUFRISAT";
#if defined(BIGENDIAN)
	static readonly int  comma = (((int)',') << 24);
#else
	static readonly int  comma = ',';
#endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	MV_FORCE_NUM(src);
	MV_FORCE_STR(fmt);
	MV_FORCE_STR(mo_str);
	MV_FORCE_STR(day_str);
	ENSURE_STP_FREE_SPACE(ZDATE_MAX_LEN);
	time = 0;
	outlen = src->str.len;
	if ((src->mvtype & MV_STR) && (src->mvtype & MV_NUM_APPROX))
	{
		for (outptr = (unsigned char *)src->str.addr, outtop = outptr + outlen; outptr < outtop; )
		{
			if (',' == *outptr++)
			{
				outlen = outptr - (unsigned char *)src->str.addr - 1;
				temp_mval.mvtype = MV_STR;
				temp_mval.str.addr = (char *)outptr;
				temp_mval.str.len = INTCAST(outtop - outptr);
				s2n(&temp_mval);
				time = MV_FORCE_INTD(&temp_mval);
				if ((0 > time) || (MAX_TIME < time))
					rts_error(VARLSTCNT(4) ERR_ZDATEBADTIME, 2, temp_mval.str.len, temp_mval.str.addr);
				break;
			}
		}
	}
	day = (int)MV_FORCE_INTD(src);
	if ((MAX_DATE < day) || (MIN_DATE > day))
	{
		MV_FORCE_STR(src);
		rts_error(VARLSTCNT(4) ERR_ZDATEBADDATE, 2, outlen, src->str.addr);
	}
	day += DAYS_MOST_YEARS;
	dow = ((day + ADJUST_TO_1900) % DAYS_IN_WEEK) + 1;
	for (cent = DAYS_BASE_TO_1900, n = ADJUST_TO_1900; cent < day; cent += DAYS_IN_CENTURY, n++)
			day += (0 < (n % COMMON_LEAP_CYCLE));
	year = day / DAYS_IN_FOUR_YEARS;
	day = day - (year * DAYS_IN_FOUR_YEARS);
	year = (year * COMMON_LEAP_CYCLE) + BASE_YEAR;
	if (DAYS_BEFORE_LEAP == day)
	{
		day = MIN_DAYS_IN_MONTH + 1;
		month = 2;
	} else
	{
		if (DAYS_BEFORE_LEAP < day)
			day--;
		month = day / DAYS_MOST_YEARS;
		year += month;
		day -= (month * DAYS_MOST_YEARS);
		for (i = montab; day >= *i; day -= *i++)
			;
		month = (int)((i - montab)) + 1;
		day++;
		assert((0 < month) && (MONTHS_IN_YEAR >= month));
	}
	if ((0 == fmt->str.len) || ((1 == fmt->str.len) && ('1' == *fmt->str.addr)))
	{
		if (!TREF(zdate_form) || ((1 == TREF(zdate_form)) && (PIVOT_MILLENIUM > year)))
		{
			fmtptr = default1;
			fmttop = fmtptr + STR_LIT_LEN(DEFAULT1);
		} else
		{
			fmtptr = default3;
			fmttop = fmtptr + STR_LIT_LEN(DEFAULT3);
		}
	} else if ((1 == fmt->str.len) && ('2' == *fmt->str.addr))
	{
		fmtptr = default2;
		fmttop = fmtptr + STR_LIT_LEN(DEFAULT2);
	} else
	{
		fmtptr = (unsigned char *)fmt->str.addr;
		fmttop = fmtptr + fmt->str.len;
	}
	outlen = (int)(fmttop - fmtptr);
	if (outlen >= ZDATE_MAX_LEN)
		rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
	outptr = stringpool.free;
	outtop = outptr + ZDATE_MAX_LEN;
	temp_mval.mvtype = MV_STR;
	assert(0 <= time);
	nlen = 0;
	while (fmtptr < fmttop)
	{
		switch (ch = *fmtptr++)		/* NOTE assignment */
		{
		case '/':
		case ':':
		case '.':
		case ',':
		case '-':
		case ' ':
		case '*':
		case '+':
		case ';':
			*outptr++ = ch;
			continue;
		case 'M':
			ch = *fmtptr++;
			if ('M' == ch)
			{
				n = month;
				nlen = 2;
				break;
			}
			if (('O' != ch) || ('N' != *fmtptr++))
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			if (0 == mo_str->str.len)
			{
				temp_mval.str.addr = (char *)&defmonlst[(month - 1) * LEN_OF_3_CHAR_ABBREV];
				temp_mval.str.len = LEN_OF_3_CHAR_ABBREV;
				nlen = -LEN_OF_3_CHAR_ABBREV;
			} else
			{
				UNICODE_ONLY(gtm_utf8_mode ? op_fnp1(mo_str, comma, month, &temp_mval) :
					                     op_fnzp1(mo_str, comma, month, &temp_mval));
				VMS_ONLY(op_fnzp1(mo_str, comma, month, &temp_mval, TRUE));
				nlen = -temp_mval.str.len;
				outlen += - LEN_OF_3_CHAR_ABBREV - nlen;
				if (outlen >= ZDATE_MAX_LEN)
					rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			}
			break;
		case 'D':
			ch = *fmtptr++;
			if ('D' == ch)
			{
				n = day;
				nlen = 2;
				break;
			}
			if (('A' != ch) || ('Y' != *fmtptr++))
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			if (0 == day_str->str.len)
			{
				temp_mval.str.addr = (char *)&defdaylst[(dow - 1) * LEN_OF_3_CHAR_ABBREV];
				temp_mval.str.len = LEN_OF_3_CHAR_ABBREV;
				nlen = -LEN_OF_3_CHAR_ABBREV;
			} else
			{
				UNICODE_ONLY(gtm_utf8_mode ? op_fnp1(day_str, comma, dow, &temp_mval)
							   : op_fnzp1(day_str, comma, dow, &temp_mval));
				VMS_ONLY(op_fnzp1(day_str, comma, dow, &temp_mval, TRUE));
				nlen = -temp_mval.str.len;
				outlen += - LEN_OF_3_CHAR_ABBREV - nlen;
				if (outlen >= ZDATE_MAX_LEN)
					rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			}
			break;
		case 'Y':
			ch = *fmtptr++;
			n = year;
			if ('Y' == ch)
			{
				for (nlen = 2; (MAX_YEAR_DIGITS >=nlen) && fmtptr < fmttop; ++nlen, fmtptr++)
					if ('Y' != *fmtptr)
						break;
			} else
			{
				if (('E' != ch) || ('A' != *fmtptr++) || ('R' != *fmtptr++))
					rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
				nlen = 4;
			}
			break;
		case '1':
			if ('2' != *fmtptr++)
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			nlen = 2;
			n = time / SECONDS_PER_HOUR;
			n = ((n + HOURS_PER_AM_OR_PM - 1) % HOURS_PER_AM_OR_PM) + 1;
			break;
		case '2':
			if ('4' != *fmtptr++)
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			nlen = 2;
			n = time / SECONDS_PER_HOUR;
			break;
		case '6':
			if ('0' != *fmtptr++)
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			nlen = 2;
			n = time;
			n /= MINUTES_PER_HOUR;
			n %= MINUTES_PER_HOUR;
			break;
		case 'S':
			if ('S' != *fmtptr++)
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			nlen = 2;
			n = time % SECONDS_PER_MINUTE;
			break;
		case 'A':
			if ('M' != *fmtptr++)
				rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
			*outptr++ = (time < (HOURS_PER_AM_OR_PM * SECONDS_PER_HOUR)) ? 'A' : 'P';
			*outptr++ = 'M';
			continue;
		default:
			rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
		}
		if (nlen > 0)
		{
			outptr += nlen;
			outpt1 = outptr;
			while (nlen-- > 0)
			{
				*--outpt1 = '0' + (n % 10);
				n /= 10;
			}
		} else
		{
			outpt1 = (unsigned char *)temp_mval.str.addr;
			while (nlen++ < 0)
				*outptr++ = *outpt1++;
		}
	}
	if (fmtptr > fmttop)
		rts_error(VARLSTCNT(1) ERR_ZDATEFMT);
	dst->mvtype = MV_STR;
	dst->str.addr = (char *)stringpool.free;
	dst->str.len = INTCAST((char *)outptr - dst->str.addr);
	stringpool.free = outptr;
	return;
}
Ejemplo n.º 3
0
/*
 * Given a input (op) indicating whether we are using $ZPIECE or $PIECE, create the appropriate triple for runtime execution
 *	or run $[Z]PIECE if all inputs are literals. There is also a possibility of a OC_FNZP1 being generated if appropriate.
 * @input[out] a A pointer that will be set to the the result of the expression; in some cases a triple to be evaluated, or
 *	the string literal representing the result of the $PIECE fnction
 * @returns An integer flag of; TRUE if the function completed successfully, or FALSE if there was an error
 * @par Side effects
 *  - Calls advance window multiple times, and consumes tokens accordingly
 *  - Calls expr multiple times, which (most notably) adds literals to a hash table
 *  - Calls ins_triple, which adds triples to the execution chain
 *  - Calls st2pool, which inserts strings into the string pool
 */
int f_piece(oprtype *a, opctype op)
{
	delimfmt	unichar;
	mval		*delim_mval, tmp_mval;
	oprtype		x, *newop;
	triple		*delimiter, *first, *last, *r;
	static mstr	scratch_space = {0, 0, 0};

	DCL_THREADGBL_ACCESS;
	SETUP_THREADGBL_ACCESS;
	r = maketriple(op);
	if (EXPR_FAIL == expr(&(r->operand[0]), MUMPS_STR))
		return FALSE;
	if (TK_COMMA != TREF(window_token))
	{
		stx_error(ERR_COMMA);
		return FALSE;
	}
	advancewindow();
	delimiter = newtriple(OC_PARAMETER);
	r->operand[1] = put_tref(delimiter);
	first = newtriple(OC_PARAMETER);
	delimiter->operand[1] = put_tref(first);
	if (EXPR_FAIL == expr(&x, MUMPS_STR))
		return FALSE;
	if (TK_COMMA != TREF(window_token))
		first->operand[0] = put_ilit(1);
	else
	{
		advancewindow();
		if (EXPR_FAIL == expr(&(first->operand[0]), MUMPS_INT))
			return FALSE;
	}
	assert(TRIP_REF == x.oprclass);
	if ((TK_COMMA != TREF(window_token)) && (OC_LIT == x.oprval.tref->opcode)
	    && (1 == ((gtm_utf8_mode && (OC_FNZPIECE != op)) ? MV_FORCE_LEN_DEC(&x.oprval.tref->operand[0].oprval.mlit->v)
		      : x.oprval.tref->operand[0].oprval.mlit->v.str.len)))
	{	/* Potential shortcut to op_fnzp1 or op_fnp1. Make some further checks */
		delim_mval = &x.oprval.tref->operand[0].oprval.mlit->v;
		/* Both valid chars of char_len 1 and invalid chars of byte length 1 get the fast path */
		unichar.unichar_val = 0;
		if (!gtm_utf8_mode || OC_FNZPIECE == op)
		{       /* Single byte delimiter */
			r->opcode = OC_FNZP1;
			unichar.unibytes_val[0] = *delim_mval->str.addr;
		} else
		{       /* Potentially multiple bytes in one int */
			r->opcode = OC_FNP1;
			assert(SIZEOF(int) >= delim_mval->str.len);
			memcpy(unichar.unibytes_val, delim_mval->str.addr, delim_mval->str.len);
		}
		delimiter->operand[0] = put_ilit(unichar.unichar_val);
		/* If we have all literals, run at compile time and return the result. To maintain backwards compatibility,
		 * we should emit a warning if there is an invalid UTF8 character, but continue compilation anyaway.
		 */
		if ((OC_LIT == r->operand[0].oprval.tref->opcode)
			&& (OC_ILIT == delimiter->operand[0].oprval.tref->opcode)
			&& (OC_ILIT == first->operand[0].oprval.tref->opcode)
			&& (!gtm_utf8_mode || (valid_utf_string(&r->operand[0].oprval.tref->operand[0].oprval.mlit->v.str)
				&& valid_utf_string(&x.oprval.tref->operand[0].oprval.mlit->v.str))))
		{	/* We don't know how much space we will use; but we know it will be <= the size of the current string */
			if (scratch_space.len < r->operand[0].oprval.tref->operand[0].oprval.mlit->v.str.len)
			{
				if (NULL != scratch_space.addr)
					free(scratch_space.addr);
				scratch_space.addr = malloc(r->operand[0].oprval.tref->operand[0].oprval.mlit->v.str.len);
				scratch_space.len = r->operand[0].oprval.tref->operand[0].oprval.mlit->v.str.len;
			}
			tmp_mval.str.addr = scratch_space.addr;
			if (OC_FNZP1 == r->opcode)
			{
				op_fnzp1(&r->operand[0].oprval.tref->operand[0].oprval.mlit->v, /* First string */
					delimiter->operand[0].oprval.tref->operand[0].oprval.ilit,
					first->operand[0].oprval.tref->operand[0].oprval.ilit,
					&tmp_mval);
			} else
			{
				op_fnp1(&r->operand[0].oprval.tref->operand[0].oprval.mlit->v, /* First string */
					delimiter->operand[0].oprval.tref->operand[0].oprval.ilit,
					first->operand[0].oprval.tref->operand[0].oprval.ilit,
					&tmp_mval);
			}
			s2pool(&tmp_mval.str);
			newop = (oprtype *)mcalloc(SIZEOF(oprtype));
			*newop = put_lit(&tmp_mval);				/* Copies mval so stack var tmp_mval not an issue */
			assert(TRIP_REF == newop->oprclass);
			newop->oprval.tref->src = r->src;
			*a = put_tref(newop->oprval.tref);
			return TRUE;

		}
		ins_triple(r);
		*a = put_tref(r);
		return TRUE;
	}
Ejemplo n.º 4
0
/*
 * ----------------------------------------------------------
 * Fast path setpiece when delimiter is one (lit) char replacing
 * a single piece (last is same as first). Unicode flavor.
 *
 * Arguments:
 *	src	- source mval
 *	delim	- delimiter char
 *	expr	- expression string mval
 *	ind	- index in source mval to be set
 *	dst	- destination mval where the result is saved.
 *
 * Return:
 *	none
 * ----------------------------------------------------------
 */
void op_setp1(mval *src, int delim, mval *expr, int ind, mval *dst)
{
	size_t		str_len, delim_cnt;
	int		len, pfx_str_len, sfx_start_offset, sfx_str_len, rep_str_len, pfx_scan_offset;
	int		dlmlen, cpy_cache_lines, mblen;
	unsigned char	*start_sfx, *str_addr, *end_pfx, *end_src, *start_pfx;
	boolean_t	do_scan, delim_last_scan, valid_char;
	mval		dummymval;	/* It's value is not used but is part of the call to op_fnp1() */
	fnpc		*cfnpc, *pfnpc;
	delimfmt	ldelim;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(gtm_utf8_mode);
	do_scan = FALSE;
	cpy_cache_lines = -1;
	ldelim.unichar_val = delim;
        if (!UTF8_VALID(ldelim.unibytes_val, (ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val)), dlmlen)
	    && !badchar_inhibit)
	{	/* The delimiter is a bad character so error out if badchar not inhibited */
		UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
	}
	MV_FORCE_STR(expr);	/* Expression to put into piece place */
	if (MV_DEFINED(src))
	{
		/* We have 3 possible scenarios:
		 * 1) The source string is null. Nothing to do but proceed to building output.
		 * 2) If the requested piece is larger than can be cached by op_fnp1, call fnp1
		 *    for the maximum piece possible, use the cache info to "prime the pump" and
		 *    then process the rest of the string ourselves.
		 * 3) If the requested piece can be obtained from the cache, call op_fnp1 to validate
		 *    and rebuild the cache if necessary and then retrieve the necessary info from
		 *    the fnpc cache.
		 */
		MV_FORCE_STR(src);	/* Make sure is string prior to length check */
		if (0 == src->str.len)
		{	/* We have a null source string */
			pfx_str_len = sfx_str_len = sfx_start_offset = 0;
			delim_cnt = (0 < ind) ? (size_t)ind - 1 : 0;
		} else if (FNPC_ELEM_MAX >= ind)
		{	/* 3) Best of all possible cases. The op_fnp1 can do most of our work for us
			 *    and we can preload the cache on the new string to help its subsequent
			 *    uses along as well.
			 */
			SETWON;
			op_fnp1(src, delim, ind, &dummymval);
			SETWOFF;
			cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
			assert(cfnpc->last_str.addr == src->str.addr);
			assert(cfnpc->last_str.len == src->str.len);
			assert(cfnpc->delim == delim);
			assert(0 < cfnpc->npcs);
			/* Three more scenarios: #1 piece all in cache, #2 piece would be in cache but ran
			 * out of text or #3 piece is beyond what can be cached
			 */
			if (cfnpc->npcs >= ind)
			{	/* #1 The piece we want is totally within the cache which is good news */
				pfx_str_len = cfnpc->pstart[ind - 1];
				delim_cnt = 0;
				sfx_start_offset = cfnpc->pstart[ind] - dlmlen;				/* Include delimiter */
				rep_str_len = cfnpc->pstart[ind] - cfnpc->pstart[ind - 1] - dlmlen;	/* Replace string length */
				sfx_str_len = src->str.len - pfx_str_len - rep_str_len;
				cpy_cache_lines = ind - 1;
			} else
			{	/* #2 The string was too short so the cache does not contain our string. This means
				 * that the prefix becomes any text that IS in the cache and we set the delim_cnt
				 * to be the number of missing pieces so the delimiters can be put in as part of the
				 * prefix when we build the new string.
				 */
				pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
				delim_cnt = (size_t)(ind - cfnpc->npcs);
				sfx_start_offset = 0;
				sfx_str_len = 0;
				cpy_cache_lines = cfnpc->npcs;
			}
		} else
		{	/* 2) We have a element that would not be able to be in the fnpc cache. Go ahead
			 *    and call op_fnp1 to get cache info up to the maximum and then we will continue
			 *    the scan on our own.
			 */
			SETWON;
			op_fnp1(src, delim, FNPC_ELEM_MAX, &dummymval);
			SETWOFF;
			cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
			assert(cfnpc->last_str.addr == src->str.addr);
			assert(cfnpc->last_str.len == src->str.len);
			assert(cfnpc->delim == delim);
			assert(0 < cfnpc->npcs);
			if (FNPC_ELEM_MAX > cfnpc->npcs)
			{	/* We ran out of text so the scan is complete. This is basically the same
				 * as case #2 above.
				 */
				pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
				delim_cnt = (size_t)(ind - cfnpc->npcs);
				sfx_start_offset = 0;
				sfx_str_len = 0;
				cpy_cache_lines = cfnpc->npcs;
			} else
			{	/* We have a case where the piece we want cannot be kept in cache. In the special
				 * case where there is no more text to handle, we don't need to scan further. Otherwise
				 * we prime the pump and continue the scan where the cache left off.
				 */
				if ((pfx_scan_offset = cfnpc->pstart[FNPC_ELEM_MAX]) < src->str.len)	/* Note assignment */
					/* Normal case where we prime the pump */
					do_scan = TRUE;
				else
				{	/* Special case -- no more text to scan */
					pfx_str_len = cfnpc->pstart[FNPC_ELEM_MAX] - dlmlen;
					sfx_start_offset = 0;
					sfx_str_len = 0;
				}
				delim_cnt = (size_t)ind - FNPC_ELEM_MAX;
				cpy_cache_lines = FNPC_ELEM_MAX;
			}
		}
	} else
	{	/* Source is not defined -- treat as a null string */
		pfx_str_len = sfx_str_len = sfx_start_offset = 0;
		delim_cnt = (size_t)ind - 1;
	}
	/* If we have been forced to do our own scan, do that here. Note the variable pfx_scan_offset has been
	 * set to where the scan should begin in the src string and delim_cnt has been set to how many delimiters
	 * still need to be processed.
	 */
	if (do_scan)
	{	/* Scan the line isolating prefix piece, and end of the
		 * piece being replaced
		 */
		COUNT_EVENT(small);
		end_pfx = start_sfx = (unsigned char *)src->str.addr + pfx_scan_offset;
		end_src = (unsigned char *)src->str.addr + src->str.len;
		/* The compiler would unroll this loop this way anyway but we want to
		 * adjust the start_sfx pointer after the loop but only if we have gone
		 * into it at least once.
		 */
		if ((0 < delim_cnt) && (start_sfx < end_src))
		{
			do
			{
				end_pfx = start_sfx;
				delim_last_scan = FALSE;		/* Whether delimiter is last character scanned */
				while (start_sfx < end_src)
				{
					valid_char = UTF8_VALID(start_sfx, end_src, mblen); /* Length of next char */
					if (!valid_char)
					{	/* Next character is not valid unicode. If badchar error is not inhibited,
						 * signal it now. If it is inhibited, just treat the character as a single
						 * character and continue.
						 */
						if (!badchar_inhibit)
							utf8_badchar(0, start_sfx, end_src, 0, NULL);
						assert(1 == mblen);
					}
					/* Getting mblen first allows us to do quick length compare before the
					 * heavier weight memcmp call.
					 */
					assert(0 < mblen);
					if (mblen == dlmlen && 0 == memcmp(start_sfx, ldelim.unibytes_val, dlmlen))
					{
						delim_last_scan = TRUE;
						break;
					}
					/* Increment ptrs by size of found char */
					start_sfx += mblen;
				}
				start_sfx += dlmlen;
				delim_cnt--;
			} while ((0 < delim_cnt) && (start_sfx < end_src));
			/* We have to backup up the suffix start pointer except under the condition
			 * that the last character in the buffer is the last delimiter we were looking
			 * for.
			 */
			if ((0 == delim_cnt) || (start_sfx < end_src) || !delim_last_scan)
				start_sfx -= dlmlen;			/* Back up suffix to include delimiter char */
			/* If we scanned to the end (no text left) and still have delimiters to
			 * find, the entire src text should be part of the prefix
			 */
			if ((start_sfx >= end_src) && (0 < delim_cnt))
			{
				end_pfx = start_sfx;
				if (delim_last_scan)			/* if last char was delim, reduce delim cnt */
					--delim_cnt;
			}
		} else
		{
			/* If not doing any token finding, then this count becomes the number
			 * of tokens to output. Adjust accordingly.
			 */
			if (0 < delim_cnt)
				--delim_cnt;
		}
		INCR_COUNT(small_pcs, (int)((size_t)ind - delim_cnt));
		/* Now having the following situation:
		 * end_pfx	-> end of the prefix piece including delimiter
		 * start_sfx	-> start of suffix piece (with delimiter) or = end_pfx/src->str.addr if none
		 */
		pfx_str_len = (int)(end_pfx - (unsigned char *)src->str.addr);
		if (0 > pfx_str_len)
			pfx_str_len = 0;
		sfx_start_offset = (int)(start_sfx - (unsigned char *)src->str.addr);
		sfx_str_len = src->str.len - sfx_start_offset;
		if (0 > sfx_str_len)
			sfx_str_len = 0;
	}
	/* Calculate total string len. delim_cnt has needed padding delimiters for null fields */
	str_len = (size_t)expr->str.len + (size_t)pfx_str_len + (delim_cnt * (size_t)dlmlen) + (size_t)sfx_str_len;
	if (MAX_STRLEN < str_len)
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN);
	ENSURE_STP_FREE_SPACE((int)str_len);
	str_addr = stringpool.free;
	start_pfx = (unsigned char *)src->str.addr;
	/* copy prefix */
	if (0 < pfx_str_len)
	{
		memcpy(str_addr, src->str.addr, pfx_str_len);
		str_addr += pfx_str_len;
	}
	/* copy delimiters */
	while (0 < delim_cnt--)
	{
		memcpy(str_addr, ldelim.unibytes_val, dlmlen);
		str_addr += dlmlen;
	}
	/* copy expression */
	if (0 < expr->str.len)
	{
		memcpy(str_addr, expr->str.addr, expr->str.len);
		str_addr += expr->str.len;
	}
	/* copy suffix */
	if (0 < sfx_str_len)
	{
		memcpy(str_addr, start_pfx + sfx_start_offset, sfx_str_len);
		str_addr += sfx_str_len;
	}
	assert(IS_AT_END_OF_STRINGPOOL(str_addr, -str_len));
	dst->mvtype = MV_STR;
	dst->str.len = INTCAST(str_addr - stringpool.free);
	dst->str.addr = (char *)stringpool.free;
	stringpool.free = str_addr;
	/* If available, update the cache information for this newly created mval to hopefully
	 * give it a head start on its next usage. Note that we can only copy over the cache info
	 * for the prefix. We cannot include information for the 'expression' except where it starts
	 * because the expression could itself contain delimiters that would be found on a rescan.
	 */
	if (0 < cpy_cache_lines)
	{
		pfnpc = cfnpc;				/* pointer for src mval's cache */
		do
		{
			cfnpc = (TREF(fnpca)).fnpcsteal;	/* Next cache element to steal */
			if ((TREF(fnpca)).fnpcmax < cfnpc)
				cfnpc = &(TREF(fnpca)).fnpcs[0];
			(TREF(fnpca)).fnpcsteal = cfnpc + 1;	/* -> next element to steal */
		} while (cfnpc == pfnpc);		/* Make sure we don't step on ourselves */
		cfnpc->last_str = dst->str;		/* Save validation info */
		cfnpc->delim = delim;
		cfnpc->npcs = cpy_cache_lines;
		dst->fnpc_indx = cfnpc->indx + 1;	/* Save where we are putting this element
							 * (1 based index in mval so 0 isn't so common)
							 */
		memcpy(&cfnpc->pstart[0], &pfnpc->pstart[0], (cfnpc->npcs + 1) * SIZEOF(unsigned int));
	} else