bool Perl_is_uni_idfirst(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXBYTES+1]; uvchr_to_utf8(tmpbuf, c); return _is_utf8_idstart(tmpbuf); }
STATIC I32 S_do_trans_simple(pTHX_ SV * const sv) { dVAR; I32 matches = 0; STRLEN len; U8 *s = (U8*)SvPV(sv,len); U8 * const send = s+len; const short * const tbl = (short*)cPVOP->op_pv; if (!tbl) Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); /* First, take care of non-UTF-8 input strings, because they're easy */ if (!SvUTF8(sv)) { while (s < send) { const I32 ch = tbl[*s]; if (ch >= 0) { matches++; *s = (U8)ch; } s++; } SvSETMAGIC(sv); } else { const I32 grows = PL_op->op_private & OPpTRANS_GROWS; U8 *d; U8 *dstart; /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ if (grows) Newx(d, len*2+1, U8); else d = s; dstart = d; while (s < send) { STRLEN ulen; I32 ch; /* Need to check this, otherwise 128..255 won't match */ const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); if (c < 0x100 && (ch = tbl[c]) >= 0) { matches++; d = uvchr_to_utf8(d, ch); s += ulen; } else { /* No match -> copy */ Move(s, d, ulen, U8); d += ulen; s += ulen; } } if (grows) { sv_setpvn(sv, (char*)dstart, d - dstart); Safefree(dstart); } else { *d = '\0'; SvCUR_set(sv, d - dstart); } SvUTF8_on(sv); SvSETMAGIC(sv); } return matches; }
STATIC I32 S_do_trans_complex(pTHX_ SV * const sv) { dVAR; STRLEN len; U8 *s = (U8*)SvPV(sv, len); U8 * const send = s+len; I32 matches = 0; const short * const tbl = (short*)cPVOP->op_pv; if (!tbl) Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); if (!SvUTF8(sv)) { U8 *d = s; U8 * const dstart = d; if (PL_op->op_private & OPpTRANS_SQUASH) { const U8* p = send; while (s < send) { const I32 ch = tbl[*s]; if (ch >= 0) { *d = (U8)ch; matches++; if (p != d - 1 || *p != *d) p = d++; } else if (ch == -1) /* -1 is unmapped character */ *d++ = *s; else if (ch == -2) /* -2 is delete character */ matches++; s++; } } else { while (s < send) { const I32 ch = tbl[*s]; if (ch >= 0) { matches++; *d++ = (U8)ch; } else if (ch == -1) /* -1 is unmapped character */ *d++ = *s; else if (ch == -2) /* -2 is delete character */ matches++; s++; } } *d = '\0'; SvCUR_set(sv, d - dstart); } else { /* is utf8 */ const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; const I32 grows = PL_op->op_private & OPpTRANS_GROWS; const I32 del = PL_op->op_private & OPpTRANS_DELETE; U8 *d; U8 *dstart; STRLEN rlen = 0; if (grows) Newx(d, len*2+1, U8); else d = s; dstart = d; if (complement && !del) rlen = tbl[0x100]; #ifdef MACOS_TRADITIONAL #define comp CoMP /* "comp" is a keyword in some compilers ... */ #endif if (PL_op->op_private & OPpTRANS_SQUASH) { UV pch = 0xfeedface; while (s < send) { STRLEN len; const UV comp = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); I32 ch; if (comp > 0xff) { if (!complement) { Move(s, d, len, U8); d += len; } else { matches++; if (!del) { ch = (rlen == 0) ? (I32)comp : (comp - 0x100 < rlen) ? tbl[comp+1] : tbl[0x100+rlen]; if ((UV)ch != pch) { d = uvchr_to_utf8(d, ch); pch = (UV)ch; } s += len; continue; } } } else if ((ch = tbl[comp]) >= 0) { matches++; if ((UV)ch != pch) { d = uvchr_to_utf8(d, ch); pch = (UV)ch; } s += len; continue; } else if (ch == -1) { /* -1 is unmapped character */ Move(s, d, len, U8); d += len; } else if (ch == -2) /* -2 is delete character */ matches++; s += len; pch = 0xfeedface; } } else { while (s < send) { STRLEN len; const UV comp = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); I32 ch; if (comp > 0xff) { if (!complement) { Move(s, d, len, U8); d += len; } else { matches++; if (!del) { if (comp - 0x100 < rlen) d = uvchr_to_utf8(d, tbl[comp+1]); else d = uvchr_to_utf8(d, tbl[0x100+rlen]); } } } else if ((ch = tbl[comp]) >= 0) { d = uvchr_to_utf8(d, ch); matches++; } else if (ch == -1) { /* -1 is unmapped character */ Move(s, d, len, U8); d += len; } else if (ch == -2) /* -2 is delete character */ matches++; s += len; } } if (grows) { sv_setpvn(sv, (char*)dstart, d - dstart); Safefree(dstart); } else { *d = '\0'; SvCUR_set(sv, d - dstart); } SvUTF8_on(sv); } SvSETMAGIC(sv); return matches; }
STATIC I32 S_do_trans_simple_utf8(pTHX_ SV * const sv) { U8 *s; U8 *send; U8 *d; U8 *start; U8 *dstart, *dend; I32 matches = 0; const I32 grows = PL_op->op_private & OPpTRANS_GROWS; STRLEN len; SV* const rv = #ifdef USE_ITHREADS PAD_SVl(cPADOP->op_padix); #else MUTABLE_SV(cSVOP->op_sv); #endif HV* const hv = MUTABLE_HV(SvRV(rv)); SV* const * svp = hv_fetchs(hv, "NONE", FALSE); const UV none = svp ? SvUV(*svp) : 0x7fffffff; const UV extra = none + 1; UV final = 0; U8 hibit = 0; PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8; s = (U8*)SvPV_nomg(sv, len); if (!SvUTF8(sv)) { const U8 *t = s; const U8 * const e = s + len; while (t < e) { const U8 ch = *t++; hibit = !NATIVE_BYTE_IS_INVARIANT(ch); if (hibit) { s = bytes_to_utf8(s, &len); break; } } } send = s + len; start = s; svp = hv_fetchs(hv, "FINAL", FALSE); if (svp) final = SvUV(*svp); if (grows) { /* d needs to be bigger than s, in case e.g. upgrading is required */ Newx(d, len * 3 + UTF8_MAXBYTES, U8); dend = d + len * 3; dstart = d; } else { dstart = d = s; dend = d + len; } while (s < send) { const UV uv = swash_fetch(rv, s, TRUE); if (uv < none) { s += UTF8SKIP(s); matches++; d = uvchr_to_utf8(d, uv); } else if (uv == none) { const int i = UTF8SKIP(s); Move(s, d, i, U8); d += i; s += i; } else if (uv == extra) { s += UTF8SKIP(s); matches++; d = uvchr_to_utf8(d, final); } else