Ejemplo n.º 1
0
GLvoid * EL(SV * sv, int needlen)
{
	STRLEN skip = 0;
    SV * svref;
	
	if (SvREADONLY(sv))
		croak("Readonly value for buffer");

	if(SvROK(sv)) {
        svref = SvRV(sv);
        sv = svref;
    }
    else
    {
#ifdef USE_STRICT_UNGLOB
        if (SvFAKE(sv) && SvTYPE(sv) == SVt_PVGV)
            sv_unglob(sv);
#endif

        SvUPGRADE(sv, SVt_PV);
        SvGROW(sv, (unsigned int)(needlen + 1));
        SvPOK_on(sv);
        SvCUR_set(sv, needlen);
        *SvEND(sv) = '\0';  /* Why is this here? -chm */
    }

	return SvPV_force(sv, skip);
}
Ejemplo n.º 2
0
char *
PJS_ConvertUC(
    pTHX_
    SV *sv,
    STRLEN *len
) {
    dSP;
    char *ret;
    STRLEN elen;
    SvPV_force(sv, elen);
    if(SvUTF8(sv) && !sv_utf8_downgrade(sv, 1)) {
	SV *svtmp;
	ENTER; SAVETMPS;
	PUSHMARK(SP);
	XPUSHs(sv_2mortal(newSVpv(PJS_STR_ENCODING, 0)));
	XPUSHs(sv_mortalcopy(sv));
	PUTBACK;
	call_pv("Encode::encode", G_SCALAR);
	SPAGAIN;
	svtmp = newSVsv(POPs);
	SAVEMORTALIZESV(svtmp);
	ret = SvPV(svtmp, elen);
	PUTBACK;
	FREETMPS; LEAVE;
	*len = -(elen / 2);
    } else {
	ret = SvPV(sv, elen);
	*len = elen;
    }
    return ret;
}
Ejemplo n.º 3
0
EXTERN SV*
sv_lower(pTHX_ SV* sv)
{
    STRLEN len;
    char *s = SvPV_force(sv, len);
    for (; len--; s++)
	*s = toLOWER(*s);
    return sv;
}
Ejemplo n.º 4
0
EXTERN SV*
decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
{
    STRLEN len;
    char *s = SvPV_force(sv, len);
    char *t = s;
    char *end = s + len;
    char *ent_start;

    char *repl;
    STRLEN repl_len;
#ifdef UNICODE_HTML_PARSER
    char buf[UTF8_MAXLEN];
    int repl_utf8;
    int high_surrogate = 0;
#else
    char buf[1];
#endif

#if defined(__GNUC__) && defined(UNICODE_HTML_PARSER)
    /* gcc -Wall reports this variable as possibly used uninitialized */
    repl_utf8 = 0;
#endif

    while (s < end) {
	assert(t <= s);

	if ((*t++ = *s++) != '&')
	    continue;

	ent_start = s;
	repl = 0;

	if (s < end && *s == '#') {
	    UV num = 0;
	    int ok = 0;
	    s++;
	    if (s < end && (*s == 'x' || *s == 'X')) {
		s++;
		while (s < end) {
		    char *tmp = strchr(PL_hexdigit, *s);
		    if (!tmp)
			break;
		    num = num << 4 | ((tmp - PL_hexdigit) & 15);
		    if (num > 0x10FFFF) {
			/* overflow */
			ok = 0;
			break;
		    }
		    s++;
		    ok = 1;
		}
	    }
	    else {
		while (s < end && isDIGIT(*s)) {
		    num = num * 10 + (*s - '0');
		    if (num > 0x10FFFF) {
			/* overflow */
			ok = 0;
			break;
		    }
		    s++;
		    ok = 1;
		}
	    }
	    if (num && ok) {
#ifdef UNICODE_HTML_PARSER
		if (!SvUTF8(sv) && num <= 255) {
		    buf[0] = (char) num;
		    repl = buf;
		    repl_len = 1;
		    repl_utf8 = 0;
		}
		else if (num == 0xFFFE || num == 0xFFFF) {
		    /* illegal */
		}
		else {
		    char *tmp;
		    if ((num & 0xFFFFFC00) == 0xDC00) {  /* low-surrogate */
			if (high_surrogate != 0) {
			    t -= 3; /* Back up past 0xFFFD */
			    num = ((high_surrogate - 0xD800) << 10) +
				(num - 0xDC00) + 0x10000;
			    high_surrogate = 0;
			} else {
			    num = 0xFFFD;
			}
		    }
		    else if ((num & 0xFFFFFC00) == 0xD800) { /* high-surrogate */
			high_surrogate = num;
			num = 0xFFFD;
		    }
		    else {
			high_surrogate = 0;
			/* otherwise invalid? */
			if ((num >= 0xFDD0 && num <= 0xFDEF) ||
			    ((num & 0xFFFE) == 0xFFFE) ||
			    num > 0x10FFFF)
			{
			    num = 0xFFFD;
			}
		    }

		    tmp = (char*)uvuni_to_utf8((U8*)buf, num);
		    repl = buf;
		    repl_len = tmp - buf;
		    repl_utf8 = 1;
		}
#else
		if (num <= 255) {
		    buf[0] = (char) num & 0xFF;
		    repl = buf;
		    repl_len = 1;
		}
#endif
	    }
	}
	else {
	    char *ent_name = s;
	    while (s < end && isALNUM(*s))
		s++;
	    if (ent_name != s && entity2char) {
		SV** svp;
		if (              (svp = hv_fetch(entity2char, ent_name, s - ent_name, 0)) ||
		    (*s == ';' && (svp = hv_fetch(entity2char, ent_name, s - ent_name + 1, 0)))
		   )
		{
		    repl = SvPV(*svp, repl_len);
#ifdef UNICODE_HTML_PARSER
		    repl_utf8 = SvUTF8(*svp);
#endif
		}
		else if (expand_prefix) {
		    char *ss = s - 1;
		    while (ss > ent_name) {
			svp = hv_fetch(entity2char, ent_name, ss - ent_name, 0);
			if (svp) {
			    repl = SvPV(*svp, repl_len);
#ifdef UNICODE_HTML_PARSER
			    repl_utf8 = SvUTF8(*svp);
#endif
			    s = ss;
			    break;
			}
			ss--;
		    }
		}
	    }
#ifdef UNICODE_HTML_PARSER
	    high_surrogate = 0;
#endif
	}

	if (repl) {
	    char *repl_allocated = 0;
	    if (s < end && *s == ';')
		s++;
	    t--;  /* '&' already copied, undo it */

#ifdef UNICODE_HTML_PARSER
	    if (*s != '&') {
		high_surrogate = 0;
	    }

	    if (!SvUTF8(sv) && repl_utf8) {
		/* need to upgrade sv before we continue */
		STRLEN before_gap_len = t - SvPVX(sv);
		char *before_gap = (char*)bytes_to_utf8((U8*)SvPVX(sv), &before_gap_len);
		STRLEN after_gap_len = end - s;
		char *after_gap = (char*)bytes_to_utf8((U8*)s, &after_gap_len);

		sv_setpvn(sv, before_gap, before_gap_len);
		sv_catpvn(sv, after_gap, after_gap_len);
		SvUTF8_on(sv);

		Safefree(before_gap);
		Safefree(after_gap);

		s = t = SvPVX(sv) + before_gap_len;
		end = SvPVX(sv) + before_gap_len + after_gap_len;
	    }
	    else if (SvUTF8(sv) && !repl_utf8) {
		repl = (char*)bytes_to_utf8((U8*)repl, &repl_len);
		repl_allocated = repl;
	    }
#endif

	    if (t + repl_len > s) {
		/* need to grow the string */
		grow_gap(aTHX_ sv, repl_len - (s - t), &t, &s, &end);
	    }

	    /* copy replacement string into string */
	    while (repl_len--)
		*t++ = *repl++;

	    if (repl_allocated)
		Safefree(repl_allocated);
	}
	else {
	    while (ent_start < s)
		*t++ = *ent_start++;
	}
    }

    *t = '\0';
    SvCUR_set(sv, t - SvPVX(sv));

    return sv;
}