Пример #1
0
IV
PerlIOEncode_flush(pTHX_ PerlIO * f)
{
    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
    IV code = 0;

    if (e->bufsv) {
	dSP;
	SV *str;
	char *s;
	STRLEN len;
	SSize_t count = 0;
	if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
	    if (e->inEncodeCall) return 0;
	    /* Write case - encode the buffer and write() to layer below */
	    PUSHSTACKi(PERLSI_MAGIC);
	    SPAGAIN;
	    ENTER;
	    SAVETMPS;
	    PUSHMARK(sp);
	    XPUSHs(e->enc);
	    SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
	    SvUTF8_on(e->bufsv);
	    XPUSHs(e->bufsv);
	    XPUSHs(e->chk);
	    PUTBACK;
	    e->inEncodeCall = 1;
	    if (call_method("encode", G_SCALAR) != 1) {
		e->inEncodeCall = 0;
		Perl_die(aTHX_ "panic: encode did not return a value");
	    }
	    e->inEncodeCall = 0;
	    SPAGAIN;
	    str = POPs;
	    PUTBACK;
	    s = SvPV(str, len);
	    count = PerlIO_write(PerlIONext(f),s,len);
	    if ((STRLEN)count != len) {
		code = -1;
	    }
	    FREETMPS;
	    LEAVE;
	    POPSTACK;
	    if (PerlIO_flush(PerlIONext(f)) != 0) {
		code = -1;
	    }
	    if (SvCUR(e->bufsv)) {
		/* Did not all translate */
		e->base.ptr = e->base.buf+SvCUR(e->bufsv);
		return code;
	    }
	}
	else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
	    /* read case */
	    /* if we have any untranslated stuff then unread that first */
	    /* FIXME - unread is fragile is there a better way ? */
	    if (e->dataSV && SvCUR(e->dataSV)) {
		s = SvPV(e->dataSV, len);
		count = PerlIO_unread(PerlIONext(f),s,len);
		if ((STRLEN)count != len) {
		    code = -1;
		}
		SvCUR_set(e->dataSV,0);
	    }
	    /* See if there is anything left in the buffer */
	    if (e->base.ptr < e->base.end) {
		if (e->inEncodeCall) return 0;
		/* Bother - have unread data.
		   re-encode and unread() to layer below
		 */
		PUSHSTACKi(PERLSI_MAGIC);
		SPAGAIN;
		ENTER;
		SAVETMPS;
		str = sv_newmortal();
		sv_upgrade(str, SVt_PV);
		SvPV_set(str, (char*)e->base.ptr);
		SvLEN_set(str, 0);
		SvCUR_set(str, e->base.end - e->base.ptr);
		SvPOK_only(str);
		SvUTF8_on(str);
		PUSHMARK(sp);
		XPUSHs(e->enc);
		XPUSHs(str);
		XPUSHs(e->chk);
		PUTBACK;
		e->inEncodeCall = 1;
		if (call_method("encode", G_SCALAR) != 1) {
		    e->inEncodeCall = 0;
		    Perl_die(aTHX_ "panic: encode did not return a value");
		}
		e->inEncodeCall = 0;
		SPAGAIN;
		str = POPs;
		PUTBACK;
		s = SvPV(str, len);
		count = PerlIO_unread(PerlIONext(f),s,len);
		if ((STRLEN)count != len) {
		    code = -1;
		}
		FREETMPS;
		LEAVE;
		POPSTACK;
	    }
	}
	e->base.ptr = e->base.end = e->base.buf;
	PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
    }
    return code;
}
Пример #2
0
STATIC char *
S_skipspace(pTHX_ register char *s, int incline)
{
    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
	while (s < PL_bufend && SPACE_OR_TAB(*s))
	    s++;
	return s;
    }
    for (;;) {
	STRLEN prevlen;
	SSize_t oldprevlen, oldoldprevlen;
	SSize_t oldloplen = 0, oldunilen = 0;
	while (s < PL_bufend && isSPACE(*s)) {
	    if (*s++ == '\n' && ((incline == 2) || PL_in_eval && !PL_rsfp && !incline))
		incline(s);
	}

	/* comment */
	if (s < PL_bufend && *s == '#') {
	    while (s < PL_bufend && *s != '\n')
		s++;
	    if (s < PL_bufend) {
		s++;
		if (PL_in_eval && !PL_rsfp && !incline) {
		    incline(s);
		    continue;
		}
	    }
	}

	/* also skip leading whitespace on the beginning of a line before deciding
	 * whether or not to recharge the linestr. --rafl
	 */
	while (s < PL_bufend && isSPACE(*s)) {
		if (*s++ == '\n' && PL_in_eval && !PL_rsfp && !incline)
			incline(s);
	}

	/* only continue to recharge the buffer if we're at the end
	 * of the buffer, we're not reading from a source filter, and
	 * we're in normal lexing mode
	 */
	if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
		PL_lex_state == LEX_FORMLINE)
	    return s;

	/* try to recharge the buffer */
	if ((s = filter_gets(PL_linestr, PL_rsfp,
			     (prevlen = SvCUR(PL_linestr)))) == Nullch)
	{
	    /* end of file.  Add on the -p or -n magic */
	    if (PL_minus_p) {
		sv_setpv(PL_linestr,
			 ";}continue{print or die qq(-p destination: $!\\n);}");
		PL_minus_n = PL_minus_p = 0;
	    }
	    else if (PL_minus_n) {
		sv_setpvn(PL_linestr, ";}", 2);
		PL_minus_n = 0;
	    }
	    else
		sv_setpvn(PL_linestr,";", 1);

	    /* reset variables for next time we lex */
	    PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
		= SvPVX(PL_linestr);
	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	    PL_last_lop = PL_last_uni = Nullch;

	    /* In perl versions previous to p4-rawid: //depot/perl@32954 -P
	     * preprocessors were supported here. We don't support -P at all, even
	     * on perls that support it, and use the following chunk from blead
	     * perl. (rafl)
	     */

	    /* Close the filehandle.  Could be from
	     * STDIN, or a regular file.  If we were reading code from
	     * STDIN (because the commandline held no -e or filename)
	     * then we don't close it, we reset it so the code can
	     * read from STDIN too.
	     */

	    if ((PerlIO*)PL_rsfp == PerlIO_stdin())
		PerlIO_clearerr(PL_rsfp);
	    else
		(void)PerlIO_close(PL_rsfp);
	    PL_rsfp = Nullfp;
	    return s;
	}

	/* not at end of file, so we only read another line */
	/* make corresponding updates to old pointers, for yyerror() */
	oldprevlen = PL_oldbufptr - PL_bufend;
	oldoldprevlen = PL_oldoldbufptr - PL_bufend;
	if (PL_last_uni)
	    oldunilen = PL_last_uni - PL_bufend;
	if (PL_last_lop)
	    oldloplen = PL_last_lop - PL_bufend;
	PL_linestart = PL_bufptr = s + prevlen;
	PL_bufend = s + SvCUR(PL_linestr);
	s = PL_bufptr;
	PL_oldbufptr = s + oldprevlen;
	PL_oldoldbufptr = s + oldoldprevlen;
	if (PL_last_uni)
	    PL_last_uni = s + oldunilen;
	if (PL_last_lop)
	    PL_last_lop = s + oldloplen;
	if (!incline)
		incline(s);

	/* debugger active and we're not compiling the debugger code,
	 * so store the line into the debugger's array of lines
	 */
	if (PERLDB_LINE && PL_curstash != PL_debstash) {
	    SV * const sv = NEWSV(85,0);

	    sv_upgrade(sv, SVt_PVMG);
	    sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
            (void)SvIOK_on(sv);
            SvIV_set(sv, 0);
	    av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
	}
    }
}
Пример #3
0
IV
PerlIOEncode_fill(pTHX_ PerlIO * f)
{
    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
    dSP;
    IV code = 0;
    PerlIO *n;
    SSize_t avail;

    if (PerlIO_flush(f) != 0)
	return -1;
    n  = PerlIONext(f);
    if (!PerlIO_fast_gets(n)) {
	/* Things get too messy if we don't have a buffer layer
	   push a :perlio to do the job */
	char mode[8];
	n  = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
	if (!n) {
	    Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
	}
    }
    PUSHSTACKi(PERLSI_MAGIC);
    SPAGAIN;
    ENTER;
    SAVETMPS;
  retry:
    avail = PerlIO_get_cnt(n);
    if (avail <= 0) {
	avail = PerlIO_fill(n);
	if (avail == 0) {
	    avail = PerlIO_get_cnt(n);
	}
	else {
	    if (!PerlIO_error(n) && PerlIO_eof(n))
		avail = 0;
	}
    }
    if (avail > 0 || (e->flags & NEEDS_LINES)) {
	STDCHAR *ptr = PerlIO_get_ptr(n);
	SSize_t use  = (avail >= 0) ? avail : 0;
	SV *uni;
	char *s = NULL;
	STRLEN len = 0;
	e->base.ptr = e->base.end = (STDCHAR *) NULL;
	(void) PerlIOEncode_get_base(aTHX_ f);
	if (!e->dataSV)
	    e->dataSV = newSV(0);
	if (SvTYPE(e->dataSV) < SVt_PV) {
	    sv_upgrade(e->dataSV,SVt_PV);
	}
	if (e->flags & NEEDS_LINES) {
	    /* Encoding needs whole lines (e.g. iso-2022-*)
	       search back from end of available data for
	       and line marker
	     */
	    STDCHAR *nl = ptr+use-1;
	    while (nl >= ptr) {
		if (*nl == '\n') {
		    break;
		}
		nl--;
	    }
	    if (nl >= ptr && *nl == '\n') {
		/* found a line - take up to and including that */
		use = (nl+1)-ptr;
	    }
	    else if (avail > 0) {
		/* No line, but not EOF - append avail to the pending data */
		sv_catpvn(e->dataSV, (char*)ptr, use);
		PerlIO_set_ptrcnt(n, ptr+use, 0);
		goto retry;
	    }
	    else if (!SvCUR(e->dataSV)) {
		goto end_of_file;
	    }
	}
	if (SvCUR(e->dataSV)) {
	    /* something left over from last time - create a normal
	       SV with new data appended
	     */
	    if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
		if (e->flags & NEEDS_LINES) {
		    /* Have to grow buffer */
		    e->base.bufsiz = use + SvCUR(e->dataSV);
		    PerlIOEncode_get_base(aTHX_ f);
		}
		else {
	       use = e->base.bufsiz - SvCUR(e->dataSV);
	    }
	    }
	    sv_catpvn(e->dataSV,(char*)ptr,use);
	}
	else {
	    /* Create a "dummy" SV to represent the available data from layer below */
	    if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) {
		Safefree(SvPVX_mutable(e->dataSV));
	    }
	    if (use > (SSize_t)e->base.bufsiz) {
		if (e->flags & NEEDS_LINES) {
		    /* Have to grow buffer */
		    e->base.bufsiz = use;
		    PerlIOEncode_get_base(aTHX_ f);
		}
		else {
	       use = e->base.bufsiz;
	    }
	    }
	    SvPV_set(e->dataSV, (char *) ptr);
	    SvLEN_set(e->dataSV, 0);  /* Hands off sv.c - it isn't yours */
	    SvCUR_set(e->dataSV,use);
	    SvPOK_only(e->dataSV);
	}
	SvUTF8_off(e->dataSV);
	PUSHMARK(sp);
	XPUSHs(e->enc);
	XPUSHs(e->dataSV);
	XPUSHs(e->chk);
	PUTBACK;
	if (call_method("decode", G_SCALAR) != 1) {
	    Perl_die(aTHX_ "panic: decode did not return a value");
	}
	SPAGAIN;
	uni = POPs;
	PUTBACK;
	/* Now get translated string (forced to UTF-8) and use as buffer */
	if (SvPOK(uni)) {
	    s = SvPVutf8(uni, len);
#ifdef PARANOID_ENCODE_CHECKS
	    if (len && !is_utf8_string((U8*)s,len)) {
		Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
	    }
#endif
	}
	if (len > 0) {
	    /* Got _something */
	    /* if decode gave us back dataSV then data may vanish when
	       we do ptrcnt adjust - so take our copy now.
	       (The copy is a pain - need a put-it-here option for decode.)
	     */
	    sv_setpvn(e->bufsv,s,len);
	    e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
	    e->base.end = e->base.ptr + SvCUR(e->bufsv);
	    PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
	    SvUTF8_on(e->bufsv);

	    /* Adjust ptr/cnt not taking anything which
	       did not translate - not clear this is a win */
	    /* compute amount we took */
	    use -= SvCUR(e->dataSV);
	    PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
	    /* and as we did not take it it isn't pending */
	    SvCUR_set(e->dataSV,0);
	} else {
	    /* Got nothing - assume partial character so we need some more */
	    /* Make sure e->dataSV is a normal SV before re-filling as
	       buffer alias will change under us
	     */
	    s = SvPV(e->dataSV,len);
	    sv_setpvn(e->dataSV,s,len);
	    PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
	    goto retry;
	}
    }
    else {
    end_of_file:
	code = -1;
	if (avail == 0)
	    PerlIOBase(f)->flags |= PERLIO_F_EOF;
	else
	    PerlIOBase(f)->flags |= PERLIO_F_ERROR;
    }
    FREETMPS;
    LEAVE;
    POPSTACK;
    return code;
}
Пример #4
0
void
lucy_RegexTokenizer_tokenize_str(lucy_RegexTokenizer *self,
                                 const char *string, size_t string_len,
                                 lucy_Inversion *inversion) {
    uint32_t   num_code_points = 0;
    SV        *wrapper    = sv_newmortal();
#if (PERL_VERSION > 10)
    REGEXP    *rx         = (REGEXP*)self->token_re;
    regexp    *rx_struct  = (regexp*)SvANY(rx);
#else
    REGEXP    *rx         = (REGEXP*)self->token_re;
    regexp    *rx_struct  = rx;
#endif
    char      *string_beg = (char*)string;
    char      *string_end = string_beg + string_len;
    char      *string_arg = string_beg;


    // Fake up an SV wrapper to feed to the regex engine.
    sv_upgrade(wrapper, SVt_PV);
    SvREADONLY_on(wrapper);
    SvLEN(wrapper) = 0;
    SvUTF8_on(wrapper);

    // Wrap the string in an SV to please the regex engine.
    SvPVX(wrapper) = string_beg;
    SvCUR_set(wrapper, string_len);
    SvPOK_on(wrapper);

    while (pregexec(rx, string_arg, string_end, string_arg, 1, wrapper, 1)) {
#if ((PERL_VERSION >= 10) || (PERL_VERSION == 9 && PERL_SUBVERSION >= 5))
        char *const start_ptr = string_arg + rx_struct->offs[0].start;
        char *const end_ptr   = string_arg + rx_struct->offs[0].end;
#else
        char *const start_ptr = string_arg + rx_struct->startp[0];
        char *const end_ptr   = string_arg + rx_struct->endp[0];
#endif
        uint32_t start, end;

        // Get start and end offsets in Unicode code points.
        for (; string_arg < start_ptr; num_code_points++) {
            string_arg += lucy_StrHelp_UTF8_COUNT[(uint8_t)(*string_arg)];
            if (string_arg > string_end) {
                THROW(LUCY_ERR, "scanned past end of '%s'", string_beg);
            }
        }
        start = num_code_points;
        for (; string_arg < end_ptr; num_code_points++) {
            string_arg += lucy_StrHelp_UTF8_COUNT[(uint8_t)(*string_arg)];
            if (string_arg > string_end) {
                THROW(LUCY_ERR, "scanned past end of '%s'", string_beg);
            }
        }
        end = num_code_points;

        // Add a token to the new inversion.
        Lucy_Inversion_Append(inversion,
                              lucy_Token_new(
                                  start_ptr,
                                  (end_ptr - start_ptr),
                                  start,
                                  end,
                                  1.0f,   // boost always 1 for now
                                  1       // position increment
                              )
                             );
    }
}