IV PerlIOEncode_flush(pTHX_ PerlIO * f) { PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); IV code = 0; if (e->bufsv) { dSP; SV *str; char *s; STRLEN len; SSize_t count = 0; if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) { if (e->inEncodeCall) return 0; /* Write case - encode the buffer and write() to layer below */ PUSHSTACKi(PERLSI_MAGIC); SPAGAIN; ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(e->enc); SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); SvUTF8_on(e->bufsv); XPUSHs(e->bufsv); XPUSHs(e->chk); PUTBACK; e->inEncodeCall = 1; if (call_method("encode", G_SCALAR) != 1) { e->inEncodeCall = 0; Perl_die(aTHX_ "panic: encode did not return a value"); } e->inEncodeCall = 0; SPAGAIN; str = POPs; PUTBACK; s = SvPV(str, len); count = PerlIO_write(PerlIONext(f),s,len); if ((STRLEN)count != len) { code = -1; } FREETMPS; LEAVE; POPSTACK; if (PerlIO_flush(PerlIONext(f)) != 0) { code = -1; } if (SvCUR(e->bufsv)) { /* Did not all translate */ e->base.ptr = e->base.buf+SvCUR(e->bufsv); return code; } } else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) { /* read case */ /* if we have any untranslated stuff then unread that first */ /* FIXME - unread is fragile is there a better way ? */ if (e->dataSV && SvCUR(e->dataSV)) { s = SvPV(e->dataSV, len); count = PerlIO_unread(PerlIONext(f),s,len); if ((STRLEN)count != len) { code = -1; } SvCUR_set(e->dataSV,0); } /* See if there is anything left in the buffer */ if (e->base.ptr < e->base.end) { if (e->inEncodeCall) return 0; /* Bother - have unread data. re-encode and unread() to layer below */ PUSHSTACKi(PERLSI_MAGIC); SPAGAIN; ENTER; SAVETMPS; str = sv_newmortal(); sv_upgrade(str, SVt_PV); SvPV_set(str, (char*)e->base.ptr); SvLEN_set(str, 0); SvCUR_set(str, e->base.end - e->base.ptr); SvPOK_only(str); SvUTF8_on(str); PUSHMARK(sp); XPUSHs(e->enc); XPUSHs(str); XPUSHs(e->chk); PUTBACK; e->inEncodeCall = 1; if (call_method("encode", G_SCALAR) != 1) { e->inEncodeCall = 0; Perl_die(aTHX_ "panic: encode did not return a value"); } e->inEncodeCall = 0; SPAGAIN; str = POPs; PUTBACK; s = SvPV(str, len); count = PerlIO_unread(PerlIONext(f),s,len); if ((STRLEN)count != len) { code = -1; } FREETMPS; LEAVE; POPSTACK; } } e->base.ptr = e->base.end = e->base.buf; PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); } return code; }
STATIC char * S_skipspace(pTHX_ register char *s, int incline) { if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; return s; } for (;;) { STRLEN prevlen; SSize_t oldprevlen, oldoldprevlen; SSize_t oldloplen = 0, oldunilen = 0; while (s < PL_bufend && isSPACE(*s)) { if (*s++ == '\n' && ((incline == 2) || PL_in_eval && !PL_rsfp && !incline)) incline(s); } /* comment */ if (s < PL_bufend && *s == '#') { while (s < PL_bufend && *s != '\n') s++; if (s < PL_bufend) { s++; if (PL_in_eval && !PL_rsfp && !incline) { incline(s); continue; } } } /* also skip leading whitespace on the beginning of a line before deciding * whether or not to recharge the linestr. --rafl */ while (s < PL_bufend && isSPACE(*s)) { if (*s++ == '\n' && PL_in_eval && !PL_rsfp && !incline) incline(s); } /* only continue to recharge the buffer if we're at the end * of the buffer, we're not reading from a source filter, and * we're in normal lexing mode */ if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) return s; /* try to recharge the buffer */ if ((s = filter_gets(PL_linestr, PL_rsfp, (prevlen = SvCUR(PL_linestr)))) == Nullch) { /* end of file. Add on the -p or -n magic */ if (PL_minus_p) { sv_setpv(PL_linestr, ";}continue{print or die qq(-p destination: $!\\n);}"); PL_minus_n = PL_minus_p = 0; } else if (PL_minus_n) { sv_setpvn(PL_linestr, ";}", 2); PL_minus_n = 0; } else sv_setpvn(PL_linestr,";", 1); /* reset variables for next time we lex */ PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = Nullch; /* In perl versions previous to p4-rawid: //depot/perl@32954 -P * preprocessors were supported here. We don't support -P at all, even * on perls that support it, and use the following chunk from blead * perl. (rafl) */ /* Close the filehandle. Could be from * STDIN, or a regular file. If we were reading code from * STDIN (because the commandline held no -e or filename) * then we don't close it, we reset it so the code can * read from STDIN too. */ if ((PerlIO*)PL_rsfp == PerlIO_stdin()) PerlIO_clearerr(PL_rsfp); else (void)PerlIO_close(PL_rsfp); PL_rsfp = Nullfp; return s; } /* not at end of file, so we only read another line */ /* make corresponding updates to old pointers, for yyerror() */ oldprevlen = PL_oldbufptr - PL_bufend; oldoldprevlen = PL_oldoldbufptr - PL_bufend; if (PL_last_uni) oldunilen = PL_last_uni - PL_bufend; if (PL_last_lop) oldloplen = PL_last_lop - PL_bufend; PL_linestart = PL_bufptr = s + prevlen; PL_bufend = s + SvCUR(PL_linestr); s = PL_bufptr; PL_oldbufptr = s + oldprevlen; PL_oldoldbufptr = s + oldoldprevlen; if (PL_last_uni) PL_last_uni = s + oldunilen; if (PL_last_lop) PL_last_lop = s + oldloplen; if (!incline) incline(s); /* debugger active and we're not compiling the debugger code, * so store the line into the debugger's array of lines */ if (PERLDB_LINE && PL_curstash != PL_debstash) { SV * const sv = NEWSV(85,0); sv_upgrade(sv, SVt_PVMG); sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr); (void)SvIOK_on(sv); SvIV_set(sv, 0); av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); } } }
IV PerlIOEncode_fill(pTHX_ PerlIO * f) { PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); dSP; IV code = 0; PerlIO *n; SSize_t avail; if (PerlIO_flush(f) != 0) return -1; n = PerlIONext(f); if (!PerlIO_fast_gets(n)) { /* Things get too messy if we don't have a buffer layer push a :perlio to do the job */ char mode[8]; n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); if (!n) { Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); } } PUSHSTACKi(PERLSI_MAGIC); SPAGAIN; ENTER; SAVETMPS; retry: avail = PerlIO_get_cnt(n); if (avail <= 0) { avail = PerlIO_fill(n); if (avail == 0) { avail = PerlIO_get_cnt(n); } else { if (!PerlIO_error(n) && PerlIO_eof(n)) avail = 0; } } if (avail > 0 || (e->flags & NEEDS_LINES)) { STDCHAR *ptr = PerlIO_get_ptr(n); SSize_t use = (avail >= 0) ? avail : 0; SV *uni; char *s = NULL; STRLEN len = 0; e->base.ptr = e->base.end = (STDCHAR *) NULL; (void) PerlIOEncode_get_base(aTHX_ f); if (!e->dataSV) e->dataSV = newSV(0); if (SvTYPE(e->dataSV) < SVt_PV) { sv_upgrade(e->dataSV,SVt_PV); } if (e->flags & NEEDS_LINES) { /* Encoding needs whole lines (e.g. iso-2022-*) search back from end of available data for and line marker */ STDCHAR *nl = ptr+use-1; while (nl >= ptr) { if (*nl == '\n') { break; } nl--; } if (nl >= ptr && *nl == '\n') { /* found a line - take up to and including that */ use = (nl+1)-ptr; } else if (avail > 0) { /* No line, but not EOF - append avail to the pending data */ sv_catpvn(e->dataSV, (char*)ptr, use); PerlIO_set_ptrcnt(n, ptr+use, 0); goto retry; } else if (!SvCUR(e->dataSV)) { goto end_of_file; } } if (SvCUR(e->dataSV)) { /* something left over from last time - create a normal SV with new data appended */ if (use + SvCUR(e->dataSV) > e->base.bufsiz) { if (e->flags & NEEDS_LINES) { /* Have to grow buffer */ e->base.bufsiz = use + SvCUR(e->dataSV); PerlIOEncode_get_base(aTHX_ f); } else { use = e->base.bufsiz - SvCUR(e->dataSV); } } sv_catpvn(e->dataSV,(char*)ptr,use); } else { /* Create a "dummy" SV to represent the available data from layer below */ if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) { Safefree(SvPVX_mutable(e->dataSV)); } if (use > (SSize_t)e->base.bufsiz) { if (e->flags & NEEDS_LINES) { /* Have to grow buffer */ e->base.bufsiz = use; PerlIOEncode_get_base(aTHX_ f); } else { use = e->base.bufsiz; } } SvPV_set(e->dataSV, (char *) ptr); SvLEN_set(e->dataSV, 0); /* Hands off sv.c - it isn't yours */ SvCUR_set(e->dataSV,use); SvPOK_only(e->dataSV); } SvUTF8_off(e->dataSV); PUSHMARK(sp); XPUSHs(e->enc); XPUSHs(e->dataSV); XPUSHs(e->chk); PUTBACK; if (call_method("decode", G_SCALAR) != 1) { Perl_die(aTHX_ "panic: decode did not return a value"); } SPAGAIN; uni = POPs; PUTBACK; /* Now get translated string (forced to UTF-8) and use as buffer */ if (SvPOK(uni)) { s = SvPVutf8(uni, len); #ifdef PARANOID_ENCODE_CHECKS if (len && !is_utf8_string((U8*)s,len)) { Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); } #endif } if (len > 0) { /* Got _something */ /* if decode gave us back dataSV then data may vanish when we do ptrcnt adjust - so take our copy now. (The copy is a pain - need a put-it-here option for decode.) */ sv_setpvn(e->bufsv,s,len); e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); e->base.end = e->base.ptr + SvCUR(e->bufsv); PerlIOBase(f)->flags |= PERLIO_F_RDBUF; SvUTF8_on(e->bufsv); /* Adjust ptr/cnt not taking anything which did not translate - not clear this is a win */ /* compute amount we took */ use -= SvCUR(e->dataSV); PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); /* and as we did not take it it isn't pending */ SvCUR_set(e->dataSV,0); } else { /* Got nothing - assume partial character so we need some more */ /* Make sure e->dataSV is a normal SV before re-filling as buffer alias will change under us */ s = SvPV(e->dataSV,len); sv_setpvn(e->dataSV,s,len); PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); goto retry; } } else { end_of_file: code = -1; if (avail == 0) PerlIOBase(f)->flags |= PERLIO_F_EOF; else PerlIOBase(f)->flags |= PERLIO_F_ERROR; } FREETMPS; LEAVE; POPSTACK; return code; }
void lucy_RegexTokenizer_tokenize_str(lucy_RegexTokenizer *self, const char *string, size_t string_len, lucy_Inversion *inversion) { uint32_t num_code_points = 0; SV *wrapper = sv_newmortal(); #if (PERL_VERSION > 10) REGEXP *rx = (REGEXP*)self->token_re; regexp *rx_struct = (regexp*)SvANY(rx); #else REGEXP *rx = (REGEXP*)self->token_re; regexp *rx_struct = rx; #endif char *string_beg = (char*)string; char *string_end = string_beg + string_len; char *string_arg = string_beg; // Fake up an SV wrapper to feed to the regex engine. sv_upgrade(wrapper, SVt_PV); SvREADONLY_on(wrapper); SvLEN(wrapper) = 0; SvUTF8_on(wrapper); // Wrap the string in an SV to please the regex engine. SvPVX(wrapper) = string_beg; SvCUR_set(wrapper, string_len); SvPOK_on(wrapper); while (pregexec(rx, string_arg, string_end, string_arg, 1, wrapper, 1)) { #if ((PERL_VERSION >= 10) || (PERL_VERSION == 9 && PERL_SUBVERSION >= 5)) char *const start_ptr = string_arg + rx_struct->offs[0].start; char *const end_ptr = string_arg + rx_struct->offs[0].end; #else char *const start_ptr = string_arg + rx_struct->startp[0]; char *const end_ptr = string_arg + rx_struct->endp[0]; #endif uint32_t start, end; // Get start and end offsets in Unicode code points. for (; string_arg < start_ptr; num_code_points++) { string_arg += lucy_StrHelp_UTF8_COUNT[(uint8_t)(*string_arg)]; if (string_arg > string_end) { THROW(LUCY_ERR, "scanned past end of '%s'", string_beg); } } start = num_code_points; for (; string_arg < end_ptr; num_code_points++) { string_arg += lucy_StrHelp_UTF8_COUNT[(uint8_t)(*string_arg)]; if (string_arg > string_end) { THROW(LUCY_ERR, "scanned past end of '%s'", string_beg); } } end = num_code_points; // Add a token to the new inversion. Lucy_Inversion_Append(inversion, lucy_Token_new( start_ptr, (end_ptr - start_ptr), start, end, 1.0f, // boost always 1 for now 1 // position increment ) ); } }