static xmlChar *
xmlPatScanNCName(xmlPatParserContextPtr ctxt) {
    const xmlChar *q, *cur;
    xmlChar *ret = NULL;
    int val, len;

    SKIP_BLANKS;

    cur = q = CUR_PTR;
    val = xmlStringCurrentChar(NULL, cur, &len);
    if (!IS_LETTER(val) && (val != '_'))
	return(NULL);

    while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
           (val == '.') || (val == '-') ||
	   (val == '_') ||
	   (IS_COMBINING(val)) ||
	   (IS_EXTENDER(val))) {
	cur += len;
	val = xmlStringCurrentChar(NULL, cur, &len);
    }
    ret = xmlStrndup(q, cur - q);
    CUR_PTR = cur;
    return(ret);
}
Exemple #2
0
void
doinsert(ZLE_STRING_T zstr, int len)
{
    ZLE_STRING_T s;
    ZLE_CHAR_T c1 = *zstr;	     /* first character */
    int neg = zmult < 0;             /* insert *after* the cursor? */
    int m = neg ? -zmult : zmult;    /* number of copies to insert */
    int count;

    UNMETACHECK();

    iremovesuffix(c1, 0);
    invalidatelist();

    /* In overwrite mode, don't replace newlines. */
    if (insmode || zleline[zlecs] == ZWC('\n'))
	spaceinline(m * len);
    else
    {
	int pos = zlecs, diff, i;
#ifdef MULTIBYTE_SUPPORT
	/*
	 * Calculate the number of character positions we are
	 * going to be using.  The algorithm is that
	 * anything that shows up as a logical single character
	 * (i.e. even if control, or double width, or with combining
	 * characters) is treated as 1 for the purpose of replacing
	 * what's there already.
	 *
	 * This can cause inserting of a combining character in
	 * places where it should overwrite, such as the start
	 * of a line.  However, combining characters aren't
	 * useful there anyway and this doesn't cause any
	 * particular harm.
	 */
	for (i = 0, count = 0; i < len * m; i++) {
	    if (!IS_COMBINING(zstr[i]))
		count++;
	}
#else
	count = len * m;
#endif
	/*
	 * Ensure we replace a complete combining characterfor each
	 * character we overwrite. Switch to inserting at first newline.
	 */
	for (i = count; pos < zlell && zleline[pos] != ZWC('\n') && i--; ) {
	    INCPOS(pos);
	}
	/*
	 * Calculate how many raw line places we need.
	 * pos - zlecs is the raw line distance we're replacing,
	 * m * len the number we're inserting.
	 */
	diff = pos - zlecs - m * len;
	if (diff < 0) {
	    spaceinline(-diff);
	} else if (diff > 0) {
	    /*
	     * We use shiftchars() here because we don't
	     * want combining char alignment fixed up: we
	     * are going to write over any that remain.
	     */
	    shiftchars(zlecs, diff);
	}
    }
    while (m--)
	for (s = zstr, count = len; count; s++, count--)
	    zleline[zlecs++] = *s;
    if (neg)
	zlecs += zmult * len;
    /* if we ended up on a combining character, skip over it */
    CCRIGHT();
}
Exemple #3
0
static int32_t
doWriteReverse(const UChar *src, int32_t srcLength,
               UChar *dest, int32_t destSize,
               uint16_t options,
               UErrorCode *pErrorCode) {
    /*
     * RTL run -
     *
     * RTL runs need to be copied to the destination in reverse order
     * of code points, not code units, to keep Unicode characters intact.
     *
     * The general strategy for this is to read the source text
     * in backward order, collect all code units for a code point
     * (and optionally following combining characters, see below),
     * and copy all these code units in ascending order
     * to the destination for this run.
     *
     * Several options request whether combining characters
     * should be kept after their base characters,
     * whether BiDi control characters should be removed, and
     * whether characters should be replaced by their mirror-image
     * equivalent Unicode characters.
     */
    int32_t i, j;
    UChar32 c;

    /* optimize for several combinations of options */
    switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
    case 0:
        /*
         * With none of the "complicated" options set, the destination
         * run will have the same length as the source run,
         * and there is no mirroring and no keeping combining characters
         * with their base characters.
         */
        if(destSize<srcLength) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return srcLength;
        }
        destSize=srcLength;

        /* preserve character integrity */
        do {
            /* i is always after the last code unit known to need to be kept in this segment */
            i=srcLength;

            /* collect code units for one base character */
            UTF_BACK_1(src, 0, srcLength);

            /* copy this base character */
            j=srcLength;
            do {
                *dest++=src[j++];
            } while(j<i);
        } while(srcLength>0);
        break;
    case UBIDI_KEEP_BASE_COMBINING:
        /*
         * Here, too, the destination
         * run will have the same length as the source run,
         * and there is no mirroring.
         * We do need to keep combining characters with their base characters.
         */
        if(destSize<srcLength) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return srcLength;
        }
        destSize=srcLength;

        /* preserve character integrity */
        do {
            /* i is always after the last code unit known to need to be kept in this segment */
            i=srcLength;

            /* collect code units and modifier letters for one base character */
            do {
                UTF_PREV_CHAR(src, 0, srcLength, c);
            } while(srcLength>0 && IS_COMBINING(u_charType(c)));

            /* copy this "user character" */
            j=srcLength;
            do {
                *dest++=src[j++];
            } while(j<i);
        } while(srcLength>0);
        break;
    default:
        /*
         * With several "complicated" options set, this is the most
         * general and the slowest copying of an RTL run.
         * We will do mirroring, remove BiDi controls, and
         * keep combining characters with their base characters
         * as requested.
         */
        if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
            i=srcLength;
        } else {
            /* we need to find out the destination length of the run,
               which will not include the BiDi control characters */
            int32_t length=srcLength;
            UChar ch;

            i=0;
            do {
                ch=*src++;
                if(!IS_BIDI_CONTROL_CHAR(ch)) {
                    ++i;
                }
            } while(--length>0);
            src-=srcLength;
        }

        if(destSize<i) {
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            return i;
        }
        destSize=i;

        /* preserve character integrity */
        do {
            /* i is always after the last code unit known to need to be kept in this segment */
            i=srcLength;

            /* collect code units for one base character */
            UTF_PREV_CHAR(src, 0, srcLength, c);
            if(options&UBIDI_KEEP_BASE_COMBINING) {
                /* collect modifier letters for this base character */
                while(srcLength>0 && IS_COMBINING(u_charType(c))) {
                    UTF_PREV_CHAR(src, 0, srcLength, c);
                }
            }

            if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
                /* do not copy this BiDi control character */
                continue;
            }

            /* copy this "user character" */
            j=srcLength;
            if(options&UBIDI_DO_MIRRORING) {
                /* mirror only the base character */
                int32_t k=0;
                c=u_charMirror(c);
                UTF_APPEND_CHAR_UNSAFE(dest, k, c);
                dest+=k;
                j+=k;
            }
            while(j<i) {
                *dest++=src[j++];
            }
        } while(srcLength>0);
        break;
    } /* end of switch */

    return destSize;
}