Ejemplo n.º 1
0
char *
PJS_ConvertUC(
    pTHX_
    SV *sv,
    STRLEN *len
) {
    dSP;
    char *ret;
    STRLEN elen;
    SvPV_force(sv, elen);
    if(SvUTF8(sv) && !sv_utf8_downgrade(sv, 1)) {
	SV *svtmp;
	ENTER; SAVETMPS;
	PUSHMARK(SP);
	XPUSHs(sv_2mortal(newSVpv(PJS_STR_ENCODING, 0)));
	XPUSHs(sv_mortalcopy(sv));
	PUTBACK;
	call_pv("Encode::encode", G_SCALAR);
	SPAGAIN;
	svtmp = newSVsv(POPs);
	SAVEMORTALIZESV(svtmp);
	ret = SvPV(svtmp, elen);
	PUTBACK;
	FREETMPS; LEAVE;
	*len = -(elen / 2);
    } else {
	ret = SvPV(sv, elen);
	*len = elen;
    }
    return ret;
}
Ejemplo n.º 2
0
STATIC I32
S_do_trans_count(pTHX_ SV * const sv)
{
    dVAR;
    STRLEN len;
    const U8 *s = (const U8*)SvPV_const(sv, len);
    const U8 * const send = s + len;
    I32 matches = 0;

    const short * const tbl = (short*)cPVOP->op_pv;
    if (!tbl)
	Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);

    if (!SvUTF8(sv)) {
	while (s < send) {
            if (tbl[*s++] >= 0)
                matches++;
	}
    }
    else {
	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
	while (s < send) {
	    STRLEN ulen;
	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
	    if (c < 0x100) {
		if (tbl[c] >= 0)
		    matches++;
	    } else if (complement)
		matches++;
	    s += ulen;
	}
    }

    return matches;
}
Ejemplo n.º 3
0
static void
flush_pending_text(PSTATE* p_state, SV* self)
{
    dTHX;
    bool   old_unbroken_text = p_state->unbroken_text;
    SV*    old_pend_text     = p_state->pend_text;
    bool   old_is_cdata      = p_state->is_cdata;
    STRLEN old_offset        = p_state->offset;
    STRLEN old_line          = p_state->line;
    STRLEN old_column        = p_state->column;

    assert(p_state->pend_text && SvOK(p_state->pend_text));

    p_state->unbroken_text = 0;
    p_state->pend_text     = 0;
    p_state->is_cdata      = p_state->pend_text_is_cdata;
    p_state->offset        = p_state->pend_text_offset;
    p_state->line          = p_state->pend_text_line;
    p_state->column        = p_state->pend_text_column;

    report_event(p_state, E_TEXT,
		 SvPVX(old_pend_text), SvEND(old_pend_text), 
		 SvUTF8(old_pend_text), 0, 0, self);
    SvOK_off(old_pend_text);

    p_state->unbroken_text = old_unbroken_text;
    p_state->pend_text     = old_pend_text;
    p_state->is_cdata      = old_is_cdata;
    p_state->offset        = old_offset;
    p_state->line          = old_line;
    p_state->column        = old_column;
}
Ejemplo n.º 4
0
static void
my_sv_copypv(pTHX_ SV *const dsv, SV *const ssv)
{
    STRLEN len;
    const char * const s = SvPV_const(ssv,len);
    sv_setpvn(dsv,s,len);
    if (SvUTF8(ssv))
        SvUTF8_on(dsv);
    else
        SvUTF8_off(dsv);
}
Ejemplo n.º 5
0
/* Entry point for serialization. Dumps generic SVs and delegates
 * to more specialized functions for RVs, etc. */
void
ddl_dump_sv(pTHX_ ddl_encoder_t *enc, SV *src)
{
  SvGETMAGIC(src);

  /* dump strings */
  if (SvPOKp(src)) {
    STRLEN len;
    char *str = SvPV(src, len);
    BUF_SIZE_ASSERT(enc, 2 + len);
    ddl_dump_pv(aTHX_ enc, str, len, SvUTF8(src));
  }
  /* dump floats */
  else if (SvNOKp(src)) {
    BUF_SIZE_ASSERT(enc, NV_DIG + 32);
    Gconvert(SvNVX(src), NV_DIG, 0, enc->pos);
    enc->pos += strlen(enc->pos);
  }
  /* dump ints */
  else if (SvIOKp(src)) {
    /* we assume we can always read an IV as a UV and vice versa
     * we assume two's complement
     * we assume no aliasing issues in the union */
    if (SvIsUV(src) ? SvUVX(src) <= 59000
                    : SvIVX(src) <= 59000 && SvIVX(src) >= -59000)
    {
      /* optimise the "small number case"
       * code will likely be branchless and use only a single multiplication
       * works for numbers up to 59074 */
      I32 i = SvIVX(src);
      U32 u;
      char digit, nz = 0;

      BUF_SIZE_ASSERT(enc, 6);

      *enc->pos = '-'; enc->pos += i < 0 ? 1 : 0;
      u = i < 0 ? -i : i;

      /* convert to 4.28 fixed-point representation */
      u *= ((0xfffffff + 10000) / 10000); /* 10**5, 5 fractional digits */

      /* now output digit by digit, each time masking out the integer part
       * and multiplying by 5 while moving the decimal point one to the right,
       * resulting in a net multiplication by 10.
       * we always write the digit to memory but conditionally increment
       * the pointer, to enable the use of conditional move instructions. */
      digit = u >> 28; *enc->pos = digit + '0'; enc->pos += (nz = nz || digit); u = (u & 0xfffffffUL) * 5;
      digit = u >> 27; *enc->pos = digit + '0'; enc->pos += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5;
      digit = u >> 26; *enc->pos = digit + '0'; enc->pos += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5;
      digit = u >> 25; *enc->pos = digit + '0'; enc->pos += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5;
      digit = u >> 24; *enc->pos = digit + '0'; enc->pos += 1; /* correctly generate '0' */
    }
    else {
Ejemplo n.º 6
0
static void
Scalarize(pTHX_ SV *sv, AV *av)
{
 int n    = av_len(av)+1;
 if (n == 0)
  sv_setpvn(sv,"",0);
 else
  {
   SV **svp;
   if (n == 1 && (svp = av_fetch(av, 0, 0)))
    {
     STRLEN len = 0;
     char *s  = SvPV(*svp,len);
#ifdef SvUTF8
     int utf8 = SvUTF8(*svp);
     sv_setpvn(sv,s,len);
     if (utf8)
      SvUTF8_on(sv);
#else
     sv_setpvn(sv,s,len);
#endif
    }
   else
    {
     Tcl_DString ds;
     int i;
     Tcl_DStringInit(&ds);
     for (i=0; i < n; i++)
      {
       if ((svp = av_fetch(av, i, 0)))
        {
         SV *el = *svp;
         int temp = 0;
         if (SvROK(el) && !SvOBJECT(SvRV(el)) && SvTYPE(SvRV(el)) == SVt_PVAV)
          {
           el = newSVpv("",0);
           temp = 1;
           if ((AV *) SvRV(*svp) == av)
            abort();
           Scalarize(aTHX_ el,(AV *) SvRV(*svp));
          }
         Tcl_DStringAppendElement(&ds,Tcl_GetString(el));
         if (temp)
          SvREFCNT_dec(el);
        }
      }
     sv_setpvn(sv,Tcl_DStringValue(&ds), Tcl_DStringLength(&ds));
     sv_maybe_utf8(sv);
     Tcl_DStringFree(&ds);
    }
  }
}
Ejemplo n.º 7
0
U32 p5_sv_utf8(PerlInterpreter *my_perl, SV* sv) {
    if (SvUTF8(sv)) { // UTF-8 flag set -> can use string as-is
        return 1;
    }
    else { // pure 7 bit ASCII is valid UTF-8 as well
        STRLEN len;
        char * const pv  = SvPV(sv, len);
        STRLEN i;
        for (i = 0; i < len; i++)
            if (pv[i] < 0) // signed char!
                return 0;
        return 1;
    }
}
Ejemplo n.º 8
0
bool
LUCY_Doc_Equals_IMP(lucy_Doc *self, cfish_Obj *other) {
    if ((lucy_Doc*)other  == self)        { return true;  }
    if (!cfish_Obj_is_a(other, LUCY_DOC)) { return false; }
    lucy_DocIVARS *const ivars = lucy_Doc_IVARS(self);
    lucy_DocIVARS *const ovars = lucy_Doc_IVARS((lucy_Doc*)other);

    if (!ivars->doc_id == ovars->doc_id)   { return false; }
    if (!!ivars->fields ^ !!ovars->fields) { return false; }

    // Verify fields.  Don't allow any deep data structures.
    dTHX;
    HV *my_fields    = (HV*)ivars->fields;
    HV *other_fields = (HV*)ovars->fields;
    if (HvKEYS(my_fields) != HvKEYS(other_fields)) { return false; }
    I32 num_fields = hv_iterinit(my_fields);
    while (num_fields--) {
        HE *my_entry = hv_iternext(my_fields);
        SV *my_val_sv = HeVAL(my_entry);
        STRLEN key_len;
        char *key;

        if (HeKLEN(my_entry) == HEf_SVKEY) {
            SV *key_sv = HeKEY_sv(my_entry);
            key = SvPV(key_sv, key_len);
            if (SvUTF8(key_sv)) { key_len = -key_len; }
        }
        else {
            key_len = HeKLEN(my_entry);
            key = key_len ? HeKEY(my_entry) : Nullch;
            if (HeKUTF8(my_entry)) { key_len = -key_len; }
        }

        SV **const other_val = hv_fetch(other_fields, key, key_len, 0);
        if (!other_val) { return false; }
        if (!sv_eq(my_val_sv, *other_val)) { return false; }
    }

    return true;
}
Ejemplo n.º 9
0
STATIC I32
S_do_trans_count(pTHX_ SV *sv)
{
    U8 *s;
    U8 *send;
    I32 matches = 0;
    STRLEN len;
    short *tbl;
    I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;

    tbl = (short*)cPVOP->op_pv;
    if (!tbl)
	Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);

    s = (U8*)SvPV(sv, len);
    send = s + len;

    if (!SvUTF8(sv))
	while (s < send) {
            if (tbl[*s++] >= 0)
                matches++;
	}
    else
	while (s < send) {
	    UV c;
	    STRLEN ulen;
	    c = utf8n_to_uvchr(s, send - s, &ulen, 0);
	    if (c < 0x100) {
		if (tbl[c] >= 0)
		    matches++;
	    } else if (complement)
		matches++;
	    s += ulen;
	}

    return matches;
}
Ejemplo n.º 10
0
STATIC void
S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes,
                        HV *stash, HV *oldstash, SV *namesv)
{
    XPVHV* xhv;
    HE *entry;
    I32 riter = -1;
    I32 items = 0;
    const bool stash_had_name = stash && HvENAME(stash);
    bool fetched_isarev = FALSE;
    HV *seen = NULL;
    HV *isarev = NULL;
    SV **svp = NULL;

    PERL_ARGS_ASSERT_MRO_GATHER_AND_RENAME;

    /* We use the seen_stashes hash to keep track of which packages have
       been encountered so far. This must be separate from the main list of
       stashes, as we need to distinguish between stashes being assigned
       and stashes being replaced/deleted. (A nested stash can be on both
       sides of an assignment. We cannot simply skip iterating through a
       stash on the right if we have seen it on the left, as it will not
       get its ename assigned to it.)

       To avoid allocating extra SVs, instead of a bitfield we can make
       bizarre use of immortals:

        &PL_sv_undef:  seen on the left  (oldstash)
        &PL_sv_no   :  seen on the right (stash)
        &PL_sv_yes  :  seen on both sides

     */

    if(oldstash) {
        /* Add to the big list. */
        struct mro_meta * meta;
        HE * const entry
            = (HE *)
              hv_common(
                  seen_stashes, NULL, (const char *)&oldstash, sizeof(HV *), 0,
                  HV_FETCH_LVALUE|HV_FETCH_EMPTY_HE, NULL, 0
              );
        if(HeVAL(entry) == &PL_sv_undef || HeVAL(entry) == &PL_sv_yes) {
            oldstash = NULL;
            goto check_stash;
        }
        HeVAL(entry)
            = HeVAL(entry) == &PL_sv_no ? &PL_sv_yes : &PL_sv_undef;
        meta = HvMROMETA(oldstash);
        (void)
        hv_store(
            stashes, (const char *)&oldstash, sizeof(HV *),
            meta->isa
            ? SvREFCNT_inc_simple_NN((SV *)meta->isa)
            : &PL_sv_yes,
            0
        );
        CLEAR_LINEAR(meta);

        /* Update the effective name. */
        if(HvENAME_get(oldstash)) {
            const HEK * const enamehek = HvENAME_HEK(oldstash);
            if(SvTYPE(namesv) == SVt_PVAV) {
                items = AvFILLp((AV *)namesv) + 1;
                svp = AvARRAY((AV *)namesv);
            }
            else {
                items = 1;
                svp = &namesv;
            }
            while (items--) {
                const U32 name_utf8 = SvUTF8(*svp);
                STRLEN len;
                const char *name = SvPVx_const(*svp, len);
                if(PL_stashcache) {
                    DEBUG_o(Perl_deb(aTHX_ "mro_gather_and_rename clearing PL_stashcache for '%"SVf"'\n",
                                     SVfARG(*svp)));
                    (void)hv_delete(PL_stashcache, name, name_utf8 ? -(I32)len : (I32)len, G_DISCARD);
                }
                ++svp;
                hv_ename_delete(oldstash, name, len, name_utf8);

                if (!fetched_isarev) {
                    /* If the name deletion caused a name change, then we
                     * are not going to call mro_isa_changed_in with this
                     * name (and not at all if it has become anonymous) so
                     * we need to delete old isarev entries here, both
                     * those in the superclasses and this class's own list
                     * of subclasses. We simply delete the latter from
                     * PL_isarev, since we still need it. hv_delete morti-
                     * fies it for us, so sv_2mortal is not necessary. */
                    if(HvENAME_HEK(oldstash) != enamehek) {
                        if(meta->isa && HvARRAY(meta->isa))
                            mro_clean_isarev(meta->isa, name, len, 0, 0,
                                             name_utf8 ? HVhek_UTF8 : 0);
                        isarev = (HV *)hv_delete(PL_isarev, name,
                                                 name_utf8 ? -(I32)len : (I32)len, 0);
                        fetched_isarev=TRUE;
                    }
                }
            }
        }
    }
check_stash:
    if(stash) {
        if(SvTYPE(namesv) == SVt_PVAV) {
            items = AvFILLp((AV *)namesv) + 1;
            svp = AvARRAY((AV *)namesv);
        }
        else {
            items = 1;
            svp = &namesv;
        }
        while (items--) {
            const U32 name_utf8 = SvUTF8(*svp);
            STRLEN len;
            const char *name = SvPVx_const(*svp++, len);
            hv_ename_add(stash, name, len, name_utf8);
        }

        /* Add it to the big list if it needs
        * mro_isa_changed_in called on it. That happens if it was
        * detached from the symbol table (so it had no HvENAME) before
        * being assigned to the spot named by the 'name' variable, because
        * its cached isa linearisation is now stale (the effective name
        * having changed), and subclasses will then use that cache when
        * mro_package_moved calls mro_isa_changed_in. (See
        * [perl #77358].)
        *
        * If it did have a name, then its previous name is still
        * used in isa caches, and there is no need for
        * mro_package_moved to call mro_isa_changed_in.
        */

        entry
            = (HE *)
              hv_common(
                  seen_stashes, NULL, (const char *)&stash, sizeof(HV *), 0,
                  HV_FETCH_LVALUE|HV_FETCH_EMPTY_HE, NULL, 0
              );
        if(HeVAL(entry) == &PL_sv_yes || HeVAL(entry) == &PL_sv_no)
            stash = NULL;
        else {
            HeVAL(entry)
                = HeVAL(entry) == &PL_sv_undef ? &PL_sv_yes : &PL_sv_no;
            if(!stash_had_name)
            {
                struct mro_meta * const meta = HvMROMETA(stash);
                (void)
                hv_store(
                    stashes, (const char *)&stash, sizeof(HV *),
                    meta->isa
                    ? SvREFCNT_inc_simple_NN((SV *)meta->isa)
                    : &PL_sv_yes,
                    0
                );
                CLEAR_LINEAR(meta);
            }
        }
    }

    if(!stash && !oldstash)
        /* Both stashes have been encountered already. */
        return;

    /* Add all the subclasses to the big list. */
    if(!fetched_isarev) {
        /* If oldstash is not null, then we can use its HvENAME to look up
           the isarev hash, since all its subclasses will be listed there.
           It will always have an HvENAME. It the HvENAME was removed
           above, then fetch_isarev will be true, and this code will not be
           reached.

           If oldstash is null, then this is an empty spot with no stash in
           it, so subclasses could be listed in isarev hashes belonging to
           any of the names, so we have to check all of them.
         */
        assert(!oldstash || HvENAME(oldstash));
        if (oldstash) {
            /* Extra variable to avoid a compiler warning */
            const HEK * const hvename = HvENAME_HEK(oldstash);
            fetched_isarev = TRUE;
            svp = hv_fetchhek(PL_isarev, hvename, 0);
            if (svp) isarev = MUTABLE_HV(*svp);
        }
        else if(SvTYPE(namesv) == SVt_PVAV) {
            items = AvFILLp((AV *)namesv) + 1;
            svp = AvARRAY((AV *)namesv);
        }
        else {
            items = 1;
            svp = &namesv;
        }
    }
    if(
        isarev || !fetched_isarev
    ) {
        while (fetched_isarev || items--) {
            HE *iter;

            if (!fetched_isarev) {
                HE * const he = hv_fetch_ent(PL_isarev, *svp++, 0, 0);
                if (!he || !(isarev = MUTABLE_HV(HeVAL(he)))) continue;
            }

            hv_iterinit(isarev);
            while((iter = hv_iternext(isarev))) {
                HV* revstash = gv_stashsv(hv_iterkeysv(iter), 0);
                struct mro_meta * meta;

                if(!revstash) continue;
                meta = HvMROMETA(revstash);
                (void)
                hv_store(
                    stashes, (const char *)&revstash, sizeof(HV *),
                    meta->isa
                    ? SvREFCNT_inc_simple_NN((SV *)meta->isa)
                    : &PL_sv_yes,
                    0
                );
                CLEAR_LINEAR(meta);
            }

            if (fetched_isarev) break;
        }
    }

    /* This is partly based on code in hv_iternext_flags. We are not call-
       ing that here, as we want to avoid resetting the hash iterator. */

    /* Skip the entire loop if the hash is empty.   */
    if(oldstash && HvUSEDKEYS(oldstash)) {
        xhv = (XPVHV*)SvANY(oldstash);
        seen = (HV *) sv_2mortal((SV *)newHV());

        /* Iterate through entries in the oldstash, adding them to the
           list, meanwhile doing the equivalent of $seen{$key} = 1.
         */

        while (++riter <= (I32)xhv->xhv_max) {
            entry = (HvARRAY(oldstash))[riter];

            /* Iterate through the entries in this list */
            for(; entry; entry = HeNEXT(entry)) {
                const char* key;
                I32 len;

                /* If this entry is not a glob, ignore it.
                   Try the next.  */
                if (!isGV(HeVAL(entry))) continue;

                key = hv_iterkey(entry, &len);
                if ((len > 1 && key[len-2] == ':' && key[len-1] == ':')
                        || (len == 1 && key[0] == ':')) {
                    HV * const oldsubstash = GvHV(HeVAL(entry));
                    SV ** const stashentry
                        = stash ? hv_fetch(stash, key, HeUTF8(entry) ? -(I32)len : (I32)len, 0) : NULL;
                    HV *substash = NULL;

                    /* Avoid main::main::main::... */
                    if(oldsubstash == oldstash) continue;

                    if(
                        (
                            stashentry && *stashentry && isGV(*stashentry)
                            && (substash = GvHV(*stashentry))
                        )
                        || (oldsubstash && HvENAME_get(oldsubstash))
                    )
                    {
                        /* Add :: and the key (minus the trailing ::)
                           to each name. */
                        SV *subname;
                        if(SvTYPE(namesv) == SVt_PVAV) {
                            SV *aname;
                            items = AvFILLp((AV *)namesv) + 1;
                            svp = AvARRAY((AV *)namesv);
                            subname = sv_2mortal((SV *)newAV());
                            while (items--) {
                                aname = newSVsv(*svp++);
                                if (len == 1)
                                    sv_catpvs(aname, ":");
                                else {
                                    sv_catpvs(aname, "::");
                                    sv_catpvn_flags(
                                        aname, key, len-2,
                                        HeUTF8(entry)
                                        ? SV_CATUTF8 : SV_CATBYTES
                                    );
                                }
                                av_push((AV *)subname, aname);
                            }
                        }
                        else {
                            subname = sv_2mortal(newSVsv(namesv));
                            if (len == 1) sv_catpvs(subname, ":");
                            else {
                                sv_catpvs(subname, "::");
                                sv_catpvn_flags(
                                    subname, key, len-2,
                                    HeUTF8(entry) ? SV_CATUTF8 : SV_CATBYTES
                                );
                            }
                        }
                        mro_gather_and_rename(
                            stashes, seen_stashes,
                            substash, oldsubstash, subname
                        );
                    }

                    (void)hv_store(seen, key, HeUTF8(entry) ? -(I32)len : (I32)len, &PL_sv_yes, 0);
                }
            }
        }
    }

    /* Skip the entire loop if the hash is empty.   */
    if (stash && HvUSEDKEYS(stash)) {
        xhv = (XPVHV*)SvANY(stash);
        riter = -1;

        /* Iterate through the new stash, skipping $seen{$key} items,
           calling mro_gather_and_rename(stashes,seen,entry,NULL, ...). */
        while (++riter <= (I32)xhv->xhv_max) {
            entry = (HvARRAY(stash))[riter];

            /* Iterate through the entries in this list */
            for(; entry; entry = HeNEXT(entry)) {
                const char* key;
                I32 len;

                /* If this entry is not a glob, ignore it.
                   Try the next.  */
                if (!isGV(HeVAL(entry))) continue;

                key = hv_iterkey(entry, &len);
                if ((len > 1 && key[len-2] == ':' && key[len-1] == ':')
                        || (len == 1 && key[0] == ':')) {
                    HV *substash;

                    /* If this entry was seen when we iterated through the
                       oldstash, skip it. */
                    if(seen && hv_exists(seen, key, HeUTF8(entry) ? -(I32)len : (I32)len)) continue;

                    /* We get here only if this stash has no corresponding
                       entry in the stash being replaced. */

                    substash = GvHV(HeVAL(entry));
                    if(substash) {
                        SV *subname;

                        /* Avoid checking main::main::main::... */
                        if(substash == stash) continue;

                        /* Add :: and the key (minus the trailing ::)
                           to each name. */
                        if(SvTYPE(namesv) == SVt_PVAV) {
                            SV *aname;
                            items = AvFILLp((AV *)namesv) + 1;
                            svp = AvARRAY((AV *)namesv);
                            subname = sv_2mortal((SV *)newAV());
                            while (items--) {
                                aname = newSVsv(*svp++);
                                if (len == 1)
                                    sv_catpvs(aname, ":");
                                else {
                                    sv_catpvs(aname, "::");
                                    sv_catpvn_flags(
                                        aname, key, len-2,
                                        HeUTF8(entry)
                                        ? SV_CATUTF8 : SV_CATBYTES
                                    );
                                }
                                av_push((AV *)subname, aname);
                            }
                        }
                        else {
                            subname = sv_2mortal(newSVsv(namesv));
                            if (len == 1) sv_catpvs(subname, ":");
                            else {
                                sv_catpvs(subname, "::");
                                sv_catpvn_flags(
                                    subname, key, len-2,
                                    HeUTF8(entry) ? SV_CATUTF8 : SV_CATBYTES
                                );
                            }
                        }
                        mro_gather_and_rename(
                            stashes, seen_stashes,
                            substash, NULL, subname
                        );
                    }
                }
            }
        }
    }
}
Ejemplo n.º 11
0
/*
 * This is the main Load function.
 * It takes a yaml stream and turns it into 0 or more Perl objects.
 */
void
Load(SV *yaml_sv)
{
    dXSARGS;
    perl_yaml_loader_t loader;
    SV *node;
    char *yaml_str;
    STRLEN yaml_len;
    
    /* If UTF8, make copy and downgrade */
    if (SvPV_nolen(yaml_sv) && SvUTF8(yaml_sv)) {
        yaml_sv = sv_mortalcopy(yaml_sv);
    }
    yaml_str = SvPVbyte(yaml_sv, yaml_len);

    sp = mark;
    if (0 && (items || ax)) {} /* XXX Quiet the -Wall warnings for now. */

    yaml_parser_initialize(&loader.parser);
    loader.document = 0;
    yaml_parser_set_input_string(
        &loader.parser,
        (unsigned char *)yaml_str,
        yaml_len
    );

    /* Get the first event. Must be a STREAM_START */
    if (!yaml_parser_parse(&loader.parser, &loader.event))
        goto load_error;
    if (loader.event.type != YAML_STREAM_START_EVENT)
        croak(ERRMSG "Expected STREAM_START_EVENT; Got: %d != %d",
            loader.event.type,
            YAML_STREAM_START_EVENT
         );

    loader.anchors = newHV();
    sv_2mortal((SV*)loader.anchors);

    /* Keep calling load_node until end of stream */
    while (1) {
        loader.document++;
        if (!yaml_parser_parse(&loader.parser, &loader.event))
            goto load_error;
        if (loader.event.type == YAML_STREAM_END_EVENT)
            break;
        node = load_node(&loader);
        hv_clear(loader.anchors);
        if (! node) break;
        XPUSHs(sv_2mortal(node));
        if (!yaml_parser_parse(&loader.parser, &loader.event))
            goto load_error;
        if (loader.event.type != YAML_DOCUMENT_END_EVENT)
            croak(ERRMSG "Expected DOCUMENT_END_EVENT");
    }

    /* Make sure the last event is a STREAM_END */
    if (loader.event.type != YAML_STREAM_END_EVENT)
        croak(ERRMSG "Expected STREAM_END_EVENT; Got: %d != %d",
            loader.event.type,
            YAML_STREAM_END_EVENT
         );
    yaml_parser_delete(&loader.parser);
    PUTBACK;
    return;

load_error:
    croak(loader_error_msg(&loader, NULL));
}
Ejemplo n.º 12
0
STATIC I32
S_do_trans_simple_utf8(pTHX_ SV *sv)
{
    U8 *s;
    U8 *send;
    U8 *d;
    U8 *start;
    U8 *dstart, *dend;
    I32 matches = 0;
    I32 grows = PL_op->op_private & OPpTRANS_GROWS;
    STRLEN len;

    SV* rv = (SV*)cSVOP->op_sv;
    HV* hv = (HV*)SvRV(rv);
    SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
    UV none = svp ? SvUV(*svp) : 0x7fffffff;
    UV extra = none + 1;
    UV final = 0;
    UV uv;
    I32 isutf8;
    U8 hibit = 0;

    s = (U8*)SvPV(sv, len);
    isutf8 = SvUTF8(sv);
    if (!isutf8) {
	U8 *t = s, *e = s + len;
	while (t < e) {
	    U8 ch = *t++;
	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
		break;
	}
	if (hibit)
	    s = bytes_to_utf8(s, &len);
    }
    send = s + len;
    start = s;

    svp = hv_fetch(hv, "FINAL", 5, FALSE);
    if (svp)
	final = SvUV(*svp);

    if (grows) {
	/* d needs to be bigger than s, in case e.g. upgrading is required */
	New(0, d, len*3+UTF8_MAXLEN, U8);
	dend = d + len * 3;
	dstart = d;
    }
    else {
	dstart = d = s;
	dend = d + len;
    }

    while (s < send) {
	if ((uv = swash_fetch(rv, s, TRUE)) < none) {
	    s += UTF8SKIP(s);
	    matches++;
	    d = uvuni_to_utf8(d, uv);
	}
	else if (uv == none) {
	    int i = UTF8SKIP(s);
	    Move(s, d, i, U8);
	    d += i;
	    s += i;
	}
	else if (uv == extra) {
	    int i = UTF8SKIP(s);
	    s += i;
	    matches++;
	    d = uvuni_to_utf8(d, final);
	}
	else
Ejemplo n.º 13
0
STATIC I32
S_do_trans_simple_utf8(pTHX_ SV * const sv)
{
    dVAR;
    U8 *s;
    U8 *send;
    U8 *d;
    U8 *start;
    U8 *dstart, *dend;
    I32 matches = 0;
    const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
    STRLEN len;

    SV* const  rv =
#ifdef USE_ITHREADS
		    PAD_SVl(cPADOP->op_padix);
#else
		    (SV*)cSVOP->op_sv;
#endif
    HV* const  hv = (HV*)SvRV(rv);
    SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
    const UV none = svp ? SvUV(*svp) : 0x7fffffff;
    const UV extra = none + 1;
    UV final = 0;
    U8 hibit = 0;

    s = (U8*)SvPV(sv, len);
    if (!SvUTF8(sv)) {
	const U8 *t = s;
	const U8 * const e = s + len;
	while (t < e) {
	    const U8 ch = *t++;
	    hibit = !NATIVE_IS_INVARIANT(ch);
	    if (hibit) {
		s = bytes_to_utf8(s, &len);
		break;
	    }
	}
    }
    send = s + len;
    start = s;

    svp = hv_fetchs(hv, "FINAL", FALSE);
    if (svp)
	final = SvUV(*svp);

    if (grows) {
	/* d needs to be bigger than s, in case e.g. upgrading is required */
	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
	dend = d + len * 3;
	dstart = d;
    }
    else {
	dstart = d = s;
	dend = d + len;
    }

    while (s < send) {
	const UV uv = swash_fetch(rv, s, TRUE);
	if (uv < none) {
	    s += UTF8SKIP(s);
	    matches++;
	    d = uvuni_to_utf8(d, uv);
	}
	else if (uv == none) {
	    const int i = UTF8SKIP(s);
	    Move(s, d, i, U8);
	    d += i;
	    s += i;
	}
	else if (uv == extra) {
	    s += UTF8SKIP(s);
	    matches++;
	    d = uvuni_to_utf8(d, final);
	}
	else
Ejemplo n.º 14
0
STATIC I32
S_do_trans_simple(pTHX_ SV * const sv)
{
    dVAR;
    I32 matches = 0;
    STRLEN len;
    U8 *s = (U8*)SvPV(sv,len);
    U8 * const send = s+len;

    const short * const tbl = (short*)cPVOP->op_pv;
    if (!tbl)
	Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);

    /* First, take care of non-UTF-8 input strings, because they're easy */
    if (!SvUTF8(sv)) {
	while (s < send) {
	    const I32 ch = tbl[*s];
	    if (ch >= 0) {
		matches++;
		*s = (U8)ch;
	    }
	    s++;
	}
	SvSETMAGIC(sv);
    }
    else {
	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
	U8 *d;
	U8 *dstart;

	/* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
	if (grows)
	    Newx(d, len*2+1, U8);
	else
	    d = s;
	dstart = d;
	while (s < send) {
	    STRLEN ulen;
	    I32 ch;

	    /* Need to check this, otherwise 128..255 won't match */
	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
	    if (c < 0x100 && (ch = tbl[c]) >= 0) {
		matches++;
		d = uvchr_to_utf8(d, ch);
		s += ulen;
	    }
	    else { /* No match -> copy */
		Move(s, d, ulen, U8);
		d += ulen;
		s += ulen;
	    }
	}
	if (grows) {
	    sv_setpvn(sv, (char*)dstart, d - dstart);
	    Safefree(dstart);
	}
	else {
	    *d = '\0';
	    SvCUR_set(sv, d - dstart);
	}
	SvUTF8_on(sv);
	SvSETMAGIC(sv);
    }
    return matches;
}
Ejemplo n.º 15
0
STATIC I32
S_do_trans_complex(pTHX_ SV * const sv)
{
    dVAR;
    STRLEN len;
    U8 *s = (U8*)SvPV(sv, len);
    U8 * const send = s+len;
    I32 matches = 0;

    const short * const tbl = (short*)cPVOP->op_pv;
    if (!tbl)
	Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);

    if (!SvUTF8(sv)) {
	U8 *d = s;
	U8 * const dstart = d;

	if (PL_op->op_private & OPpTRANS_SQUASH) {
	    const U8* p = send;
	    while (s < send) {
		const I32 ch = tbl[*s];
		if (ch >= 0) {
		    *d = (U8)ch;
		    matches++;
		    if (p != d - 1 || *p != *d)
			p = d++;
		}
		else if (ch == -1)	/* -1 is unmapped character */
		    *d++ = *s;	
		else if (ch == -2)	/* -2 is delete character */
		    matches++;
		s++;
	    }
	}
	else {
	    while (s < send) {
		const I32 ch = tbl[*s];
		if (ch >= 0) {
		    matches++;
		    *d++ = (U8)ch;
		}
		else if (ch == -1)	/* -1 is unmapped character */
		    *d++ = *s;
		else if (ch == -2)      /* -2 is delete character */
		    matches++;
		s++;
	    }
	}
	*d = '\0';
	SvCUR_set(sv, d - dstart);
    }
    else { /* is utf8 */
	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
	const I32 del = PL_op->op_private & OPpTRANS_DELETE;
	U8 *d;
	U8 *dstart;
	STRLEN rlen = 0;

	if (grows)
	    Newx(d, len*2+1, U8);
	else
	    d = s;
	dstart = d;
	if (complement && !del)
	    rlen = tbl[0x100];

#ifdef MACOS_TRADITIONAL
#define comp CoMP   /* "comp" is a keyword in some compilers ... */
#endif

	if (PL_op->op_private & OPpTRANS_SQUASH) {
	    UV pch = 0xfeedface;
	    while (s < send) {
		STRLEN len;
		const UV comp = utf8n_to_uvchr(s, send - s, &len,
					       UTF8_ALLOW_DEFAULT);
		I32 ch;

		if (comp > 0xff) {
		    if (!complement) {
			Move(s, d, len, U8);
			d += len;
		    }
		    else {
			matches++;
			if (!del) {
			    ch = (rlen == 0) ? (I32)comp :
				(comp - 0x100 < rlen) ?
				tbl[comp+1] : tbl[0x100+rlen];
			    if ((UV)ch != pch) {
				d = uvchr_to_utf8(d, ch);
				pch = (UV)ch;
			    }
			    s += len;
			    continue;
			}
		    }
		}
		else if ((ch = tbl[comp]) >= 0) {
		    matches++;
		    if ((UV)ch != pch) {
		        d = uvchr_to_utf8(d, ch);
		        pch = (UV)ch;
		    }
		    s += len;
		    continue;
		}
		else if (ch == -1) {	/* -1 is unmapped character */
		    Move(s, d, len, U8);
		    d += len;
		}
		else if (ch == -2)      /* -2 is delete character */
		    matches++;
		s += len;
		pch = 0xfeedface;
	    }
	}
	else {
	    while (s < send) {
		STRLEN len;
		const UV comp = utf8n_to_uvchr(s, send - s, &len,
					       UTF8_ALLOW_DEFAULT);
		I32 ch;
		if (comp > 0xff) {
		    if (!complement) {
			Move(s, d, len, U8);
			d += len;
		    }
		    else {
			matches++;
			if (!del) {
			    if (comp - 0x100 < rlen)
				d = uvchr_to_utf8(d, tbl[comp+1]);
			    else
				d = uvchr_to_utf8(d, tbl[0x100+rlen]);
			}
		    }
		}
		else if ((ch = tbl[comp]) >= 0) {
		    d = uvchr_to_utf8(d, ch);
		    matches++;
		}
		else if (ch == -1) {	/* -1 is unmapped character */
		    Move(s, d, len, U8);
		    d += len;
		}
		else if (ch == -2)      /* -2 is delete character */
		    matches++;
		s += len;
	    }
	}
	if (grows) {
	    sv_setpvn(sv, (char*)dstart, d - dstart);
	    Safefree(dstart);
	}
	else {
	    *d = '\0';
	    SvCUR_set(sv, d - dstart);
	}
	SvUTF8_on(sv);
    }
    SvSETMAGIC(sv);
    return matches;
}
Ejemplo n.º 16
0
EXTERN SV*
decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
{
    STRLEN len;
    char *s = SvPV_force(sv, len);
    char *t = s;
    char *end = s + len;
    char *ent_start;

    char *repl;
    STRLEN repl_len;
#ifdef UNICODE_HTML_PARSER
    char buf[UTF8_MAXLEN];
    int repl_utf8;
    int high_surrogate = 0;
#else
    char buf[1];
#endif

#if defined(__GNUC__) && defined(UNICODE_HTML_PARSER)
    /* gcc -Wall reports this variable as possibly used uninitialized */
    repl_utf8 = 0;
#endif

    while (s < end) {
	assert(t <= s);

	if ((*t++ = *s++) != '&')
	    continue;

	ent_start = s;
	repl = 0;

	if (s < end && *s == '#') {
	    UV num = 0;
	    int ok = 0;
	    s++;
	    if (s < end && (*s == 'x' || *s == 'X')) {
		s++;
		while (s < end) {
		    char *tmp = strchr(PL_hexdigit, *s);
		    if (!tmp)
			break;
		    num = num << 4 | ((tmp - PL_hexdigit) & 15);
		    if (num > 0x10FFFF) {
			/* overflow */
			ok = 0;
			break;
		    }
		    s++;
		    ok = 1;
		}
	    }
	    else {
		while (s < end && isDIGIT(*s)) {
		    num = num * 10 + (*s - '0');
		    if (num > 0x10FFFF) {
			/* overflow */
			ok = 0;
			break;
		    }
		    s++;
		    ok = 1;
		}
	    }
	    if (num && ok) {
#ifdef UNICODE_HTML_PARSER
		if (!SvUTF8(sv) && num <= 255) {
		    buf[0] = (char) num;
		    repl = buf;
		    repl_len = 1;
		    repl_utf8 = 0;
		}
		else if (num == 0xFFFE || num == 0xFFFF) {
		    /* illegal */
		}
		else {
		    char *tmp;
		    if ((num & 0xFFFFFC00) == 0xDC00) {  /* low-surrogate */
			if (high_surrogate != 0) {
			    t -= 3; /* Back up past 0xFFFD */
			    num = ((high_surrogate - 0xD800) << 10) +
				(num - 0xDC00) + 0x10000;
			    high_surrogate = 0;
			} else {
			    num = 0xFFFD;
			}
		    }
		    else if ((num & 0xFFFFFC00) == 0xD800) { /* high-surrogate */
			high_surrogate = num;
			num = 0xFFFD;
		    }
		    else {
			high_surrogate = 0;
			/* otherwise invalid? */
			if ((num >= 0xFDD0 && num <= 0xFDEF) ||
			    ((num & 0xFFFE) == 0xFFFE) ||
			    num > 0x10FFFF)
			{
			    num = 0xFFFD;
			}
		    }

		    tmp = (char*)uvuni_to_utf8((U8*)buf, num);
		    repl = buf;
		    repl_len = tmp - buf;
		    repl_utf8 = 1;
		}
#else
		if (num <= 255) {
		    buf[0] = (char) num & 0xFF;
		    repl = buf;
		    repl_len = 1;
		}
#endif
	    }
	}
	else {
	    char *ent_name = s;
	    while (s < end && isALNUM(*s))
		s++;
	    if (ent_name != s && entity2char) {
		SV** svp;
		if (              (svp = hv_fetch(entity2char, ent_name, s - ent_name, 0)) ||
		    (*s == ';' && (svp = hv_fetch(entity2char, ent_name, s - ent_name + 1, 0)))
		   )
		{
		    repl = SvPV(*svp, repl_len);
#ifdef UNICODE_HTML_PARSER
		    repl_utf8 = SvUTF8(*svp);
#endif
		}
		else if (expand_prefix) {
		    char *ss = s - 1;
		    while (ss > ent_name) {
			svp = hv_fetch(entity2char, ent_name, ss - ent_name, 0);
			if (svp) {
			    repl = SvPV(*svp, repl_len);
#ifdef UNICODE_HTML_PARSER
			    repl_utf8 = SvUTF8(*svp);
#endif
			    s = ss;
			    break;
			}
			ss--;
		    }
		}
	    }
#ifdef UNICODE_HTML_PARSER
	    high_surrogate = 0;
#endif
	}

	if (repl) {
	    char *repl_allocated = 0;
	    if (s < end && *s == ';')
		s++;
	    t--;  /* '&' already copied, undo it */

#ifdef UNICODE_HTML_PARSER
	    if (*s != '&') {
		high_surrogate = 0;
	    }

	    if (!SvUTF8(sv) && repl_utf8) {
		/* need to upgrade sv before we continue */
		STRLEN before_gap_len = t - SvPVX(sv);
		char *before_gap = (char*)bytes_to_utf8((U8*)SvPVX(sv), &before_gap_len);
		STRLEN after_gap_len = end - s;
		char *after_gap = (char*)bytes_to_utf8((U8*)s, &after_gap_len);

		sv_setpvn(sv, before_gap, before_gap_len);
		sv_catpvn(sv, after_gap, after_gap_len);
		SvUTF8_on(sv);

		Safefree(before_gap);
		Safefree(after_gap);

		s = t = SvPVX(sv) + before_gap_len;
		end = SvPVX(sv) + before_gap_len + after_gap_len;
	    }
	    else if (SvUTF8(sv) && !repl_utf8) {
		repl = (char*)bytes_to_utf8((U8*)repl, &repl_len);
		repl_allocated = repl;
	    }
#endif

	    if (t + repl_len > s) {
		/* need to grow the string */
		grow_gap(aTHX_ sv, repl_len - (s - t), &t, &s, &end);
	    }

	    /* copy replacement string into string */
	    while (repl_len--)
		*t++ = *repl++;

	    if (repl_allocated)
		Safefree(repl_allocated);
	}
	else {
	    while (ent_start < s)
		*t++ = *ent_start++;
	}
    }

    *t = '\0';
    SvCUR_set(sv, t - SvPVX(sv));

    return sv;
}
Ejemplo n.º 17
0
/* This is the main routine to deserialize a structure.
 * It rolls up all the other "top level" routines into one
 */
SV *
srl_decode_into(pTHX_ srl_decoder_t *dec, SV *src, SV* into, UV start_offset)
{
    assert(dec != NULL);
    if (SvUTF8(src))
        sv_utf8_downgrade(src, 0);
    srl_begin_decoding(aTHX_ dec, src, start_offset);
    srl_read_header(aTHX_ dec);
    if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DECOMPRESS_SNAPPY)) {
        /* uncompress */
        uint32_t dest_len;
        SV *buf_sv;
        unsigned char *buf;
        unsigned char *old_pos;
        const ptrdiff_t sereal_header_len = dec->pos - dec->buf_start;
        const STRLEN compressed_packet_len =
                ( dec->proto_version_and_flags & SRL_PROTOCOL_ENCODING_MASK ) == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL
                ? (STRLEN)srl_read_varint_uv_length(aTHX_ dec, " while reading compressed packet size")
                : (STRLEN)(dec->buf_end - dec->pos);
        int decompress_ok;
        int header_len;

        /* all decl's above here, or we break C89 compilers */

        dec->bytes_consumed= compressed_packet_len + (dec->pos - dec->buf_start);

        header_len = csnappy_get_uncompressed_length(
                            (char *)dec->pos,
                            compressed_packet_len,
                            &dest_len
                         );
        if (header_len == CSNAPPY_E_HEADER_BAD)
            SRL_ERROR("Invalid Snappy header in Snappy-compressed Sereal packet");

        /* Let perl clean this up. Yes, it's not the most efficient thing
         * ever, but it's just one mortal per full decompression, so not
         * a bottle-neck. */
        buf_sv = sv_2mortal( newSV(sereal_header_len + dest_len + 1 ));
        buf = (unsigned char *)SvPVX(buf_sv);

        /* FIXME probably unnecessary to copy the Sereal header! */
        Copy(dec->buf_start, buf, sereal_header_len, unsigned char);

        old_pos = dec->pos;
        dec->buf_start = buf;
        dec->pos = buf + sereal_header_len;
        dec->buf_end = dec->pos + dest_len;
        dec->buf_len = dest_len + sereal_header_len;

        decompress_ok = csnappy_decompress_noheader((char *)(old_pos + header_len),
                                                    compressed_packet_len - header_len,
                                                    (char *)dec->pos,
                                                    &dest_len);
        if (expect_false( decompress_ok != 0 ))
        {
            SRL_ERRORf1("Snappy decompression of Sereal packet payload failed with error %i!", decompress_ok);
        }
    }

    if (expect_true(!into)) {
        into= sv_2mortal(newSV_type(SVt_NULL));
    }
    srl_read_single_value(aTHX_ dec, into);
    /* assert(dec->pos == dec->buf_end); For now we disable this */
    if (expect_false(SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_NEEDS_FINALIZE))) {
        srl_finalize_structure(aTHX_ dec);
    }

    /* If we aren't reading from a decompressed buffer we have to remember the number
     * of bytes used for the user to query. */
    if (dec->bytes_consumed == 0)
        dec->bytes_consumed = dec->pos - dec->buf_start;

    if (SRL_DEC_HAVE_OPTION(dec, SRL_F_DECODER_DESTRUCTIVE_INCREMENTAL)) {
        STRLEN len;
        char *pv= SvPV(src,len);
        /* check the length here? do something different if the string is now exhausted? */
        sv_chop(src, pv + dec->bytes_consumed);
    }

    srl_clear_decoder(aTHX_ dec);
    return into;
}
Ejemplo n.º 18
0
/* helper for the default modify handler for builtin attributes */
static int
modify_SV_attributes(pTHX_ SV *sv, SV **retlist, SV **attrlist, int numattrs)
{
    SV *attr;
    int nret;

    for (nret = 0 ; numattrs && (attr = *attrlist++); numattrs--) {
	STRLEN len;
	const char *name = SvPV_const(attr, len);
	const bool negated = (*name == '-');
        HV *typestash;

	if (negated) {
	    name++;
	    len--;
	}
	switch (SvTYPE(sv)) {
	case SVt_PVCV:
	    switch ((int)len) {
	    case 4:
		if (memEQ(name, "pure", 4)) {
		    if (negated)
			Perl_croak(aTHX_ "Illegal :-pure attribute");
                    CvPURE_on(sv);
		    goto next_attr;
                }
		break;
	    case 5:
		if (memEQ(name, "const", 5)) {
		    if (negated)
			CvCONST_off(sv);
		    else {
#ifndef USE_CPERL
                        const bool warn = (!CvANON(sv) || CvCLONED(sv))
                                        && !CvCONST(sv);
                        CvCONST_on(sv);
                        if (warn)
                            break;
#else
                        CvCONST_on(sv);
#endif
		    }
		    goto next_attr;
		}
		break;
	    case 6:
		switch (name[3]) {
		case 'l':
		    if (memEQ(name, "lvalue", 6)) {
			bool warn =
			    !CvISXSUB(MUTABLE_CV(sv))
			 && CvROOT(MUTABLE_CV(sv))
			 && !CvLVALUE(MUTABLE_CV(sv)) != negated;
			if (negated)
			    CvFLAGS(MUTABLE_CV(sv)) &= ~CVf_LVALUE;
			else
			    CvFLAGS(MUTABLE_CV(sv)) |= CVf_LVALUE;
			if (warn) break;
                        goto next_attr;
		    }
		    break;
		case 'h':
		    if (memEQ(name, "method", 6)) {
			if (negated)
			    CvFLAGS(MUTABLE_CV(sv)) &= ~CVf_METHOD;
			else
			    CvFLAGS(MUTABLE_CV(sv)) |= CVf_METHOD;
                        goto next_attr;
		    }
		    break;
		}
		break;
	    default:
		if (len > 10 && memEQ(name, "prototype(", 10)) {
		    SV * proto = newSVpvn(name+10,len-11);
		    HEK *const hek = CvNAME_HEK((CV *)sv);
		    SV *subname;
		    if (name[len-1] != ')')
			Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
		    if (hek)
			subname = sv_2mortal(newSVhek(hek));
		    else
			subname=(SV *)CvGV((const CV *)sv);
		    if (ckWARN(WARN_ILLEGALPROTO))
			Perl_validate_proto(aTHX_ subname, proto, TRUE);
		    Perl_cv_ckproto_len_flags(aTHX_ (const CV *)sv,
		                                    (const GV *)subname,
		                                    name+10,
		                                    len-11,
		                                    SvUTF8(attr));
		    sv_setpvn(MUTABLE_SV(sv), name+10, len-11);
		    if (SvUTF8(attr)) SvUTF8_on(MUTABLE_SV(sv));
		    goto next_attr;
		}
		break;
	    }
            if (!negated && (typestash = gv_stashpvn(name, len, SvUTF8(attr)))) {
                CvTYPED_on(sv);
                CvTYPE_set((CV*)sv, typestash);
                continue;
            }
	    break;
	case SVt_IV:
	case SVt_PVIV:
	case SVt_PVMG:
            if (memEQ(name, "unsigned", 8)
                && (SvIOK(sv) || SvUOK(sv)))
            {
                if (negated) /* :-unsigned alias for :signed */
                    SvIsUV_off(sv);
                else
                    SvIsUV_on(sv);
                continue;
            }
            /* fallthru - all other data types */
	default:
            if (memEQ(name, "const", 5)
                && !(SvFLAGS(sv) & SVf_PROTECT))
            {
                if (negated)
                    SvREADONLY_off(sv);
                else
                    SvREADONLY_on(sv);
                continue;
            }
	    if (memEQs(name, len, "shared")) {
                if (negated)
                    Perl_croak(aTHX_ "A variable may not be unshared");
                SvSHARE(sv);
                continue;
	    }
	    break;
	}
	/* anything recognized had a 'continue' above */
	*retlist++ = attr;
	nret++;
    next_attr:
        ;
    }

    return nret;
}
Ejemplo n.º 19
0
static void
report_event(PSTATE* p_state,
	     event_id_t event,
	     char *beg, char *end, U32 utf8,
	     token_pos_t *tokens, int num_tokens,
	     SV* self
	    )
{
    struct p_handler *h;
    dTHX;
    dSP;
    AV *array;
    STRLEN my_na;
    char *argspec;
    char *s;

#ifdef UNICODE_HTML_PARSER
    #define CHR_DIST(a,b) (utf8 ? utf8_distance((U8*)(a),(U8*)(b)) : (a) - (b))
#else
    #define CHR_DIST(a,b) ((a) - (b))
#endif

    /* capture offsets */
    STRLEN offset = p_state->offset;
    STRLEN line = p_state->line;
    STRLEN column = p_state->column;

#if 0
    {  /* used for debugging at some point */
	char *s = beg;
	int i;

	/* print debug output */
	switch(event) {
	case E_DECLARATION: printf("DECLARATION"); break;
	case E_COMMENT:     printf("COMMENT"); break;
	case E_START:       printf("START"); break;
	case E_END:         printf("END"); break;
	case E_TEXT:        printf("TEXT"); break;
	case E_PROCESS:     printf("PROCESS"); break;
	case E_NONE:        printf("NONE"); break;
	default:            printf("EVENT #%d", event); break;
	}

	printf(" [");
	while (s < end) {
	    if (*s == '\n') {
		putchar('\\'); putchar('n');
	    }
	    else
		putchar(*s);
	    s++;
	}
	printf("] %d\n", end - beg);
	for (i = 0; i < num_tokens; i++) {
	    printf("  token %d: %d %d\n",
		   i,
		   tokens[i].beg - beg,
		   tokens[i].end - tokens[i].beg);
	}
    }
#endif

    if (p_state->pending_end_tag && event != E_TEXT && event != E_COMMENT) {
	token_pos_t t;
	char dummy;
	t.beg = p_state->pending_end_tag;
	t.end = p_state->pending_end_tag + strlen(p_state->pending_end_tag);
	p_state->pending_end_tag = 0;
	report_event(p_state, E_END, &dummy, &dummy, 0, &t, 1, self);
	SPAGAIN;
    }

    /* update offsets */
    p_state->offset += CHR_DIST(end, beg);
    if (line) {
	char *s = beg;
	char *nl = NULL;
	while (s < end) {
	    if (*s == '\n') {
		p_state->line++;
		nl = s;
	    }
	    s++;
	}
	if (nl)
	    p_state->column = CHR_DIST(end, nl) - 1;
	else
	    p_state->column += CHR_DIST(end, beg);
    }

    if (event == E_NONE)
	goto IGNORE_EVENT;
    
#ifdef MARKED_SECTION
    if (p_state->ms == MS_IGNORE)
	goto IGNORE_EVENT;
#endif

    /* tag filters */
    if (p_state->ignore_tags || p_state->report_tags || p_state->ignore_elements) {

	if (event == E_START || event == E_END) {
	    SV* tagname = p_state->tmp;

	    assert(num_tokens >= 1);
	    sv_setpvn(tagname, tokens[0].beg, tokens[0].end - tokens[0].beg);
	    if (utf8)
		SvUTF8_on(tagname);
	    else
		SvUTF8_off(tagname);
	    if (!CASE_SENSITIVE(p_state))
		sv_lower(aTHX_ tagname);

	    if (p_state->ignoring_element) {
		if (sv_eq(p_state->ignoring_element, tagname)) {
		    if (event == E_START)
			p_state->ignore_depth++;
		    else if (--p_state->ignore_depth == 0) {
			SvREFCNT_dec(p_state->ignoring_element);
			p_state->ignoring_element = 0;
		    }
		}
		goto IGNORE_EVENT;
	    }

	    if (p_state->ignore_elements &&
		hv_fetch_ent(p_state->ignore_elements, tagname, 0, 0))
	    {
		p_state->ignoring_element = newSVsv(tagname);
		p_state->ignore_depth = 1;
		goto IGNORE_EVENT;
	    }

	    if (p_state->ignore_tags &&
		hv_fetch_ent(p_state->ignore_tags, tagname, 0, 0))
	    {
		goto IGNORE_EVENT;
	    }
	    if (p_state->report_tags &&
		!hv_fetch_ent(p_state->report_tags, tagname, 0, 0))
	    {
		goto IGNORE_EVENT;
	    }
	}
	else if (p_state->ignoring_element) {
	    goto IGNORE_EVENT;
	}
    }

    h = &p_state->handlers[event];
    if (!h->cb) {
	/* event = E_DEFAULT; */
	h = &p_state->handlers[E_DEFAULT];
	if (!h->cb)
	    goto IGNORE_EVENT;
    }

    if (SvTYPE(h->cb) != SVt_PVAV && !SvTRUE(h->cb)) {
	/* FALSE scalar ('' or 0) means IGNORE this event */
	return;
    }

    if (p_state->unbroken_text && event == E_TEXT) {
	/* should buffer text */
	if (!p_state->pend_text)
	    p_state->pend_text = newSV(256);
	if (SvOK(p_state->pend_text)) {
	    if (p_state->is_cdata != p_state->pend_text_is_cdata) {
		flush_pending_text(p_state, self);
		SPAGAIN;
		goto INIT_PEND_TEXT;
	    }
	}
	else {
	INIT_PEND_TEXT:
	    p_state->pend_text_offset = offset;
	    p_state->pend_text_line = line;
	    p_state->pend_text_column = column;
	    p_state->pend_text_is_cdata = p_state->is_cdata;
	    sv_setpvn(p_state->pend_text, "", 0);
	    if (!utf8)
		SvUTF8_off(p_state->pend_text);
	}
#ifdef UNICODE_HTML_PARSER
	if (utf8 && !SvUTF8(p_state->pend_text))
	    sv_utf8_upgrade(p_state->pend_text);
	if (utf8 || !SvUTF8(p_state->pend_text)) {
	    sv_catpvn(p_state->pend_text, beg, end - beg);
	}
	else {
	    SV *tmp = newSVpvn(beg, end - beg);
	    sv_utf8_upgrade(tmp);
	    sv_catsv(p_state->pend_text, tmp);
	    SvREFCNT_dec(tmp);
	}
#else
	sv_catpvn(p_state->pend_text, beg, end - beg);
#endif
	return;
    }
    else if (p_state->pend_text && SvOK(p_state->pend_text)) {
	flush_pending_text(p_state, self);
	SPAGAIN;
    }

    /* At this point we have decided to generate an event callback */

    argspec = h->argspec ? SvPV(h->argspec, my_na) : "";

    if (SvTYPE(h->cb) == SVt_PVAV) {
	
	if (*argspec == ARG_FLAG_FLAT_ARRAY) {
	    argspec++;
	    array = (AV*)h->cb;
	}
	else {
	    /* start sub-array for accumulator array */
	    array = newAV();
	}
    }
    else {
	array = 0;
	if (*argspec == ARG_FLAG_FLAT_ARRAY)
	    argspec++;

	/* start argument stack for callback */
	ENTER;
	SAVETMPS;
	PUSHMARK(SP);
    }

    for (s = argspec; *s; s++) {
	SV* arg = 0;
	int push_arg = 1;
	enum argcode argcode = (enum argcode)*s;

	switch( argcode ) {

	case ARG_SELF:
	    arg = sv_mortalcopy(self);
	    break;

	case ARG_TOKENS:
	    if (num_tokens >= 1) {
		AV* av = newAV();
		SV* prev_token = &PL_sv_undef;
		int i;
		av_extend(av, num_tokens);
		for (i = 0; i < num_tokens; i++) {
		    if (tokens[i].beg) {
			prev_token = newSVpvn(tokens[i].beg, tokens[i].end-tokens[i].beg);
			if (utf8)
			    SvUTF8_on(prev_token);
			av_push(av, prev_token);
		    }
		    else { /* boolean */
			av_push(av, p_state->bool_attr_val
				? newSVsv(p_state->bool_attr_val)
				: newSVsv(prev_token));
		    }
		}
		arg = sv_2mortal(newRV_noinc((SV*)av));
	    }
	    break;

	case ARG_TOKENPOS:
	    if (num_tokens >= 1 && tokens[0].beg >= beg) {
		AV* av = newAV();
		int i;
		av_extend(av, num_tokens*2);
		for (i = 0; i < num_tokens; i++) {
		    if (tokens[i].beg) {
			av_push(av, newSViv(CHR_DIST(tokens[i].beg, beg)));
			av_push(av, newSViv(CHR_DIST(tokens[i].end, tokens[i].beg)));
		    }
		    else { /* boolean tag value */
			av_push(av, newSViv(0));
			av_push(av, newSViv(0));
		    }
		}
		arg = sv_2mortal(newRV_noinc((SV*)av));
	    }
	    break;

	case ARG_TOKEN0:
	case ARG_TAGNAME:
	    /* fall through */

	case ARG_TAG:
	    if (num_tokens >= 1) {
		arg = sv_2mortal(newSVpvn(tokens[0].beg,
					  tokens[0].end - tokens[0].beg));
		if (utf8)
		    SvUTF8_on(arg);
		if (!CASE_SENSITIVE(p_state) && argcode != ARG_TOKEN0)
		    sv_lower(aTHX_ arg);
		if (argcode == ARG_TAG && event != E_START) {
		    char *e_type = "!##/#?#";
		    sv_insert(arg, 0, 0, &e_type[event], 1);
		}
	    }
	    break;

	case ARG_ATTR:
	case ARG_ATTRARR:
	    if (event == E_START) {
		HV* hv;
		int i;
		if (argcode == ARG_ATTR) {
		    hv = newHV();
		    arg = sv_2mortal(newRV_noinc((SV*)hv));
		}
		else {
#ifdef __GNUC__
		    /* gcc -Wall reports this variable as possibly used uninitialized */
		    hv = 0;
#endif
		    push_arg = 0;  /* deal with argument pushing here */
		}

		for (i = 1; i < num_tokens; i += 2) {
		    SV* attrname = newSVpvn(tokens[i].beg,
					    tokens[i].end-tokens[i].beg);
		    SV* attrval;

		    if (utf8)
			SvUTF8_on(attrname);
		    if (tokens[i+1].beg) {
			char *beg = tokens[i+1].beg;
			STRLEN len = tokens[i+1].end - beg;
			if (*beg == '"' || *beg == '\'') {
			    assert(len >= 2 && *beg == beg[len-1]);
			    beg++; len -= 2;
			}
			attrval = newSVpvn(beg, len);
			if (utf8)
			    SvUTF8_on(attrval);
			if (!p_state->attr_encoded) {
#ifdef UNICODE_HTML_PARSER
			    if (p_state->utf8_mode)
				sv_utf8_decode(attrval);
#endif
			    decode_entities(aTHX_ attrval, p_state->entity2char, 0);
			    if (p_state->utf8_mode)
				SvUTF8_off(attrval);
			}
		    }
		    else { /* boolean */
			if (p_state->bool_attr_val)
			    attrval = newSVsv(p_state->bool_attr_val);
			else
			    attrval = newSVsv(attrname);
		    }

		    if (!CASE_SENSITIVE(p_state))
			sv_lower(aTHX_ attrname);

		    if (argcode == ARG_ATTR) {
			if (hv_exists_ent(hv, attrname, 0) ||
			    !hv_store_ent(hv, attrname, attrval, 0)) {
			    SvREFCNT_dec(attrval);
			}
			SvREFCNT_dec(attrname);
		    }
		    else { /* ARG_ATTRARR */
			if (array) {
			    av_push(array, attrname);
			    av_push(array, attrval);
			}
			else {
			    XPUSHs(sv_2mortal(attrname));
			    XPUSHs(sv_2mortal(attrval));
			}
		    }
		}
	    }
	    else if (argcode == ARG_ATTRARR) {
		push_arg = 0;
	    }
	    break;

	case ARG_ATTRSEQ:       /* (v2 compatibility stuff) */
	    if (event == E_START) {
		AV* av = newAV();
		int i;
		for (i = 1; i < num_tokens; i += 2) {
		    SV* attrname = newSVpvn(tokens[i].beg,
					    tokens[i].end-tokens[i].beg);
		    if (utf8)
			SvUTF8_on(attrname);
		    if (!CASE_SENSITIVE(p_state))
			sv_lower(aTHX_ attrname);
		    av_push(av, attrname);
		}
		arg = sv_2mortal(newRV_noinc((SV*)av));
	    }
	    break;
	
	case ARG_TEXT:
	    arg = sv_2mortal(newSVpvn(beg, end - beg));
	    if (utf8)
		SvUTF8_on(arg);
	    break;

	case ARG_DTEXT:
	    if (event == E_TEXT) {
		arg = sv_2mortal(newSVpvn(beg, end - beg));
		if (utf8)
		    SvUTF8_on(arg);
		if (!p_state->is_cdata) {
#ifdef UNICODE_HTML_PARSER
		    if (p_state->utf8_mode)
			sv_utf8_decode(arg);
#endif
		    decode_entities(aTHX_ arg, p_state->entity2char, 1);
		    if (p_state->utf8_mode)
			SvUTF8_off(arg);
		}
	    }
	    break;
      
	case ARG_IS_CDATA:
	    if (event == E_TEXT) {
		arg = boolSV(p_state->is_cdata);
	    }
	    break;

        case ARG_SKIPPED_TEXT:
	    arg = sv_2mortal(p_state->skipped_text);
	    p_state->skipped_text = newSVpvn("", 0);
            break;

	case ARG_OFFSET:
	    arg = sv_2mortal(newSViv(offset));
	    break;

	case ARG_OFFSET_END:
	    arg = sv_2mortal(newSViv(offset + CHR_DIST(end, beg)));
	    break;

	case ARG_LENGTH:
	    arg = sv_2mortal(newSViv(CHR_DIST(end, beg)));
	    break;

	case ARG_LINE:
	    arg = sv_2mortal(newSViv(line));
	    break;

	case ARG_COLUMN:
	    arg = sv_2mortal(newSViv(column));
	    break;

	case ARG_EVENT:
	    assert(event >= 0 && event < EVENT_COUNT);
	    arg = sv_2mortal(newSVpv(event_id_str[event], 0));
	    break;

	case ARG_LITERAL:
	{
	    int len = (unsigned char)s[1];
	    arg = sv_2mortal(newSVpvn(s+2, len));
	    if (SvUTF8(h->argspec))
		SvUTF8_on(arg);
	    s += len + 1;
	}
	break;

	case ARG_UNDEF:
	    arg = sv_mortalcopy(&PL_sv_undef);
	    break;
      
	default:
	    arg = sv_2mortal(newSVpvf("Bad argspec %d", *s));
	    break;
	}

	if (push_arg) {
	    if (!arg)
		arg = sv_mortalcopy(&PL_sv_undef);

	    if (array) {
		/* have to fix mortality here or add mortality to
		 * XPUSHs after removing it from the switch cases.
		 */
		av_push(array, SvREFCNT_inc(arg));
	    }
	    else {
		XPUSHs(arg);
	    }
	}
    }

    if (array) {
	if (array != (AV*)h->cb)
	    av_push((AV*)h->cb, newRV_noinc((SV*)array));
    }
    else {
	PUTBACK;

	if ((enum argcode)*argspec == ARG_SELF && !SvROK(h->cb)) {
	    char *method = SvPV(h->cb, my_na);
	    perl_call_method(method, G_DISCARD | G_EVAL | G_VOID);
	}
	else {
	    perl_call_sv(h->cb, G_DISCARD | G_EVAL | G_VOID);
	}

	if (SvTRUE(ERRSV)) {
	    RETHROW;
	}

	FREETMPS;
	LEAVE;
    }
    if (p_state->skipped_text)
	SvCUR_set(p_state->skipped_text, 0);
    return;

IGNORE_EVENT:
    if (p_state->skipped_text) {
	if (event != E_TEXT && p_state->pend_text && SvOK(p_state->pend_text))
	    flush_pending_text(p_state, self);
#ifdef UNICODE_HTML_PARSER
	if (utf8 && !SvUTF8(p_state->skipped_text))
	    sv_utf8_upgrade(p_state->skipped_text);
	if (utf8 || !SvUTF8(p_state->skipped_text)) {
#endif
	    sv_catpvn(p_state->skipped_text, beg, end - beg);
#ifdef UNICODE_HTML_PARSER
	}
	else {
	    SV *tmp = newSVpvn(beg, end - beg);
	    sv_utf8_upgrade(tmp);
	    sv_catsv(p_state->pend_text, tmp);
	    SvREFCNT_dec(tmp);
	}
#endif
    }
#undef CHR_DIST    
    return;
}
Ejemplo n.º 20
0
EXTERN void
parse(pTHX_
      PSTATE* p_state,
      SV* chunk,
      SV* self)
{
    char *s, *beg, *end;
    U32 utf8 = 0;
    STRLEN len;

    if (!chunk) {
	/* eof */
	char empty[1];
	if (p_state->buf && SvOK(p_state->buf)) {
	    /* flush it */
	    s = SvPV(p_state->buf, len);
	    end = s + len;
	    utf8 = SvUTF8(p_state->buf);
	    assert(len);

	    while (s < end) {
		if (p_state->literal_mode) {
		    if (strEQ(p_state->literal_mode, "plaintext") && !p_state->closing_plaintext)
			break;
		    p_state->pending_end_tag = p_state->literal_mode;
		    p_state->literal_mode = 0;
		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
		    continue;
		}

		if (!p_state->strict_comment && !p_state->no_dash_dash_comment_end && *s == '<') {
		    p_state->no_dash_dash_comment_end = 1;
		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
		    continue;
		}

		if (!p_state->strict_comment && *s == '<') {
		    /* some kind of unterminated markup.  Report rest as as comment */
		    token_pos_t token;
		    token.beg = s + 1;
		    token.end = end;
		    report_event(p_state, E_COMMENT, s, end, utf8, &token, 1, self);
		    s = end;
		}

		break;
	    }

	    if (s < end) {
		/* report rest as text */
		report_event(p_state, E_TEXT, s, end, utf8, 0, 0, self);
	    }
	    
	    SvREFCNT_dec(p_state->buf);
	    p_state->buf = 0;
	}
	if (p_state->pend_text && SvOK(p_state->pend_text))
	    flush_pending_text(p_state, self);

	if (p_state->ignoring_element) {
	    /* document not balanced */
	    SvREFCNT_dec(p_state->ignoring_element);
	    p_state->ignoring_element = 0;
	}
	report_event(p_state, E_END_DOCUMENT, empty, empty, 0, 0, 0, self);

	/* reset state */
	p_state->offset = 0;
	if (p_state->line)
	    p_state->line = 1;
	p_state->column = 0;
	p_state->literal_mode = 0;
	p_state->is_cdata = 0;
	return;
    }

#ifdef UNICODE_HTML_PARSER
    if (p_state->utf8_mode)
	sv_utf8_downgrade(chunk, 0);
#endif

    if (p_state->buf && SvOK(p_state->buf)) {
	sv_catsv(p_state->buf, chunk);
	beg = SvPV(p_state->buf, len);
	utf8 = SvUTF8(p_state->buf);
    }
    else {
	beg = SvPV(chunk, len);
	utf8 = SvUTF8(chunk);
	if (p_state->offset == 0) {
	    report_event(p_state, E_START_DOCUMENT, beg, beg, 0, 0, 0, self);

	    /* Print warnings if we find unexpected Unicode BOM forms */
#ifdef UNICODE_HTML_PARSER
	    if (DOWARN &&
		p_state->argspec_entity_decode &&
		!p_state->utf8_mode && (
                 (!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
		 (utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||
		 (!utf8 && probably_utf8_chunk(aTHX_ beg, len))
		)
	       )
	    {
		warn("Parsing of undecoded UTF-8 will give garbage when decoding entities");
	    }
	    if (DOWARN && utf8 && len >= 2 && strnEQ(beg, "\xFF\xFE", 2)) {
		warn("Parsing string decoded with wrong endianess");
	    }
#endif
	    if (DOWARN) {
		if (!utf8 && len >= 4 &&
		    (strnEQ(beg, "\x00\x00\xFE\xFF", 4) ||
		     strnEQ(beg, "\xFE\xFF\x00\x00", 4))
		    )
		{
		    warn("Parsing of undecoded UTF-32");
		}
		else if (!utf8 && len >= 2 &&
			 (strnEQ(beg, "\xFE\xFF", 2) || strnEQ(beg, "\xFF\xFE", 2))
		    )
		{
		    warn("Parsing of undecoded UTF-16");
		}
	    }
	}
    }

    if (!len)
	return; /* nothing to do */

    end = beg + len;
    s = parse_buf(aTHX_ p_state, beg, end, utf8, self);

    if (s == end || p_state->eof) {
	if (p_state->buf) {
	    SvOK_off(p_state->buf);
	}
    }
    else {
	/* need to keep rest in buffer */
	if (p_state->buf) {
	    /* chop off some chars at the beginning */
	    if (SvOK(p_state->buf)) {
		sv_chop(p_state->buf, s);
	    }
	    else {
		sv_setpvn(p_state->buf, s, end - s);
		if (utf8)
		    SvUTF8_on(p_state->buf);
		else
		    SvUTF8_off(p_state->buf);
	    }
	}
	else {
	    p_state->buf = newSVpv(s, end - s);
	    if (utf8)
		SvUTF8_on(p_state->buf);
	}
    }
    return;
}
Ejemplo n.º 21
0
EXTERN SV*
argspec_compile(SV* src, PSTATE* p_state)
{
    dTHX;
    SV* argspec = newSVpvn("", 0);
    STRLEN len;
    char *s = SvPV(src, len);
    char *end = s + len;

    if (SvUTF8(src))
	SvUTF8_on(argspec);

    while (isHSPACE(*s))
	s++;

    if (*s == '@') {
	/* try to deal with '@{ ... }' wrapping */
	char *tmp = s + 1;
	while (isHSPACE(*tmp))
	    tmp++;
	if (*tmp == '{') {
	    char c = ARG_FLAG_FLAT_ARRAY;
	    sv_catpvn(argspec, &c, 1);
	    tmp++;
	    while (isHSPACE(*tmp))
		tmp++;
	    s = tmp;
	}
    }
    while (s < end) {
	if (isHNAME_FIRST(*s) || *s == '@') {
	    char *name = s;
	    int a = ARG_SELF;
	    char **arg_name;

	    s++;
	    while (isHNAME_CHAR(*s))
		s++;

	    /* check identifier */
	    for ( arg_name = argname; a < ARG_LITERAL ; ++a, ++arg_name ) {
		if (strnEQ(*arg_name, name, s - name) &&
		    (*arg_name)[s - name] == '\0')
		    break;
	    }
	    if (a < ARG_LITERAL) {
		char c = (unsigned char) a;
		sv_catpvn(argspec, &c, 1);

		if (a == ARG_LINE || a == ARG_COLUMN) {
		    if (!p_state->line)
			p_state->line = 1; /* enable tracing of line/column */
		}
		if (a == ARG_SKIPPED_TEXT) {
		    if (!p_state->skipped_text) {
			p_state->skipped_text = newSVpvn("", 0);
                    }
                }
		if (a == ARG_ATTR || a == ARG_ATTRARR || a == ARG_DTEXT) {
		    p_state->argspec_entity_decode++;
		}
	    }
	    else {
		croak("Unrecognized identifier %.*s in argspec", s - name, name);
	    }
	}
	else if (*s == '"' || *s == '\'') {
	    char *string_beg = s;
	    s++;
	    while (s < end && *s != *string_beg && *s != '\\')
		s++;
	    if (*s == *string_beg) {
		/* literal */
		int len = s - string_beg - 1;
		unsigned char buf[2];
		if (len > 255)
		    croak("Literal string is longer than 255 chars in argspec");
		buf[0] = ARG_LITERAL;
		buf[1] = len;
		sv_catpvn(argspec, (char*)buf, 2);
		sv_catpvn(argspec, string_beg+1, len);
		s++;
	    }
	    else if (*s == '\\') {
		croak("Backslash reserved for literal string in argspec");
	    }
	    else {
		croak("Unterminated literal string in argspec");
	    }
	}
	else {
	    croak("Bad argspec (%s)", s);
	}

	while (isHSPACE(*s))
	    s++;
	
	if (*s == '}' && SvPVX(argspec)[0] == ARG_FLAG_FLAT_ARRAY) {
	    /* end of '@{ ... }' */
	    s++;
	    while (isHSPACE(*s))
		s++;
	    if (s < end)
		croak("Bad argspec: stuff after @{...} (%s)", s);
	}

	if (s == end)
	    break;
	if (*s != ',') {
	    croak("Missing comma separator in argspec");
	}
	s++;
	while (isHSPACE(*s))
	    s++;
    }
    return argspec;
}