STATIC char * S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { register char *d = dest; register char * const e = d + destlen - 3; /* two-character token, ending NUL */ for (;;) { if (d >= e) Perl_croak(aTHX_ ident_too_long); if (isALNUM(*s)) /* UTF handled below */ *d++ = *s++; else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) { *d++ = ':'; *d++ = ':'; s++; } else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') { *d++ = *s++; *d++ = *s++; } else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { char *t = s + UTF8SKIP(s); while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t)) t += UTF8SKIP(t); if (d + (t - s) > e) Perl_croak(aTHX_ ident_too_long); Copy(s, d, t - s, char); d += t - s; s = t; } else {
sar_bool sar_checkNodeContainChar_c(sarNode_p checkNode, char checkChar, sar_nodeClass nodeClass) { if (nodeClass == SAR_CHAR) { if (checkNode->pathChar == checkChar) { return SAR_TRUE; } } else if (nodeClass == SAR_DIGIT) { if (isDIGIT(checkChar)) { return SAR_TRUE; } } else if (nodeClass == SAR_ALPHA_NUM) { if (isALNUM(checkChar)) { return SAR_TRUE; } } else if (nodeClass == SAR_ALPHA) { if (isALPHA(checkChar)) { return SAR_TRUE; } } else if (nodeClass == SAR_SPACE) { if (isSPACE(checkChar)) { return SAR_TRUE; } } else if (nodeClass == SAR_DOT) { return SAR_TRUE; } return SAR_FALSE; }
STATIC char S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning) { U8 result; if (utf8) { /* Trying to deprecate non-ASCII usages. This construct has never * worked for a utf8 variant. So, even though are accepting non-ASCII * Latin1 in 5.14, no need to make them work under utf8 */ if (! isASCII(source)) { Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII"); } } result = toCTRL(source); if (! isASCII(source)) { Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), "Character following \"\\c\" must be ASCII"); } else if (! isCNTRL(result) && output_warning) { if (source == '{') { Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), "\"\\c{\" is deprecated and is more clearly written as \";\""); } else { U8 clearer[3]; U8 i = 0; if (! isALNUM(result)) { clearer[i++] = '\\'; } clearer[i++] = result; clearer[i++] = '\0'; Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\"\\c%c\" is more clearly written simply as \"%s\"", source, clearer); } } return result; }
void Perl_taint_env(pTHX) { SV** svp; MAGIC* mg; char** e; static char* misc_env[] = { "IFS", /* most shells' inter-field separators */ "CDPATH", /* ksh dain bramage #1 */ "ENV", /* ksh dain bramage #2 */ "BASH_ENV", /* bash dain bramage -- I guess it's contagious */ NULL }; /* Don't bother if there's no *ENV glob */ if (!PL_envgv) return; /* If there's no %ENV hash of if it's not magical, croak, because * it probably doesn't reflect the actual environment */ if (!GvHV(PL_envgv) || !(SvRMAGICAL(GvHV(PL_envgv)) && mg_find((SV*)GvHV(PL_envgv), PERL_MAGIC_env))) { bool was_tainted = PL_tainted; char *name = GvENAME(PL_envgv); PL_tainted = TRUE; if (strEQ(name,"ENV")) /* hash alias */ taint_proper("%%ENV is aliased to %s%s", "another variable"); else /* glob alias: report it in the error message */ taint_proper("%%ENV is aliased to %%%s%s", name); /* this statement is reached under -t or -U */ PL_tainted = was_tainted; } #ifdef VMS { int i = 0; char name[10 + TYPE_DIGITS(int)] = "DCL$PATH"; while (1) { if (i) (void)sprintf(name,"DCL$PATH;%d", i); svp = hv_fetch(GvHVn(PL_envgv), name, strlen(name), FALSE); if (!svp || *svp == &PL_sv_undef) break; if (SvTAINTED(*svp)) { TAINT; taint_proper("Insecure %s%s", "$ENV{DCL$PATH}"); } if ((mg = mg_find(*svp, PERL_MAGIC_envelem)) && MgTAINTEDDIR(mg)) { TAINT; taint_proper("Insecure directory in %s%s", "$ENV{DCL$PATH}"); } i++; } } #endif /* VMS */ svp = hv_fetch(GvHVn(PL_envgv),"PATH",4,FALSE); if (svp && *svp) { if (SvTAINTED(*svp)) { TAINT; taint_proper("Insecure %s%s", "$ENV{PATH}"); } if ((mg = mg_find(*svp, PERL_MAGIC_envelem)) && MgTAINTEDDIR(mg)) { TAINT; taint_proper("Insecure directory in %s%s", "$ENV{PATH}"); } } #ifndef VMS /* tainted $TERM is okay if it contains no metachars */ svp = hv_fetch(GvHVn(PL_envgv),"TERM",4,FALSE); if (svp && *svp && SvTAINTED(*svp)) { STRLEN n_a; bool was_tainted = PL_tainted; char *t = SvPV(*svp, n_a); char *e = t + n_a; PL_tainted = was_tainted; if (t < e && isALNUM(*t)) t++; while (t < e && (isALNUM(*t) || strchr("-_.+", *t))) t++; if (t < e) { TAINT; taint_proper("Insecure $ENV{%s}%s", "TERM"); } } #endif /* !VMS */ for (e = misc_env; *e; e++) { svp = hv_fetch(GvHVn(PL_envgv), *e, strlen(*e), FALSE); if (svp && *svp != &PL_sv_undef && SvTAINTED(*svp)) { TAINT; taint_proper("Insecure $ENV{%s}%s", *e); } } }
EXTERN SV* decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix) { STRLEN len; char *s = SvPV_force(sv, len); char *t = s; char *end = s + len; char *ent_start; char *repl; STRLEN repl_len; #ifdef UNICODE_HTML_PARSER char buf[UTF8_MAXLEN]; int repl_utf8; int high_surrogate = 0; #else char buf[1]; #endif #if defined(__GNUC__) && defined(UNICODE_HTML_PARSER) /* gcc -Wall reports this variable as possibly used uninitialized */ repl_utf8 = 0; #endif while (s < end) { assert(t <= s); if ((*t++ = *s++) != '&') continue; ent_start = s; repl = 0; if (s < end && *s == '#') { UV num = 0; int ok = 0; s++; if (s < end && (*s == 'x' || *s == 'X')) { s++; while (s < end) { char *tmp = strchr(PL_hexdigit, *s); if (!tmp) break; num = num << 4 | ((tmp - PL_hexdigit) & 15); if (num > 0x10FFFF) { /* overflow */ ok = 0; break; } s++; ok = 1; } } else { while (s < end && isDIGIT(*s)) { num = num * 10 + (*s - '0'); if (num > 0x10FFFF) { /* overflow */ ok = 0; break; } s++; ok = 1; } } if (num && ok) { #ifdef UNICODE_HTML_PARSER if (!SvUTF8(sv) && num <= 255) { buf[0] = (char) num; repl = buf; repl_len = 1; repl_utf8 = 0; } else if (num == 0xFFFE || num == 0xFFFF) { /* illegal */ } else { char *tmp; if ((num & 0xFFFFFC00) == 0xDC00) { /* low-surrogate */ if (high_surrogate != 0) { t -= 3; /* Back up past 0xFFFD */ num = ((high_surrogate - 0xD800) << 10) + (num - 0xDC00) + 0x10000; high_surrogate = 0; } else { num = 0xFFFD; } } else if ((num & 0xFFFFFC00) == 0xD800) { /* high-surrogate */ high_surrogate = num; num = 0xFFFD; } else { high_surrogate = 0; /* otherwise invalid? */ if ((num >= 0xFDD0 && num <= 0xFDEF) || ((num & 0xFFFE) == 0xFFFE) || num > 0x10FFFF) { num = 0xFFFD; } } tmp = (char*)uvuni_to_utf8((U8*)buf, num); repl = buf; repl_len = tmp - buf; repl_utf8 = 1; } #else if (num <= 255) { buf[0] = (char) num & 0xFF; repl = buf; repl_len = 1; } #endif } } else { char *ent_name = s; while (s < end && isALNUM(*s)) s++; if (ent_name != s && entity2char) { SV** svp; if ( (svp = hv_fetch(entity2char, ent_name, s - ent_name, 0)) || (*s == ';' && (svp = hv_fetch(entity2char, ent_name, s - ent_name + 1, 0))) ) { repl = SvPV(*svp, repl_len); #ifdef UNICODE_HTML_PARSER repl_utf8 = SvUTF8(*svp); #endif } else if (expand_prefix) { char *ss = s - 1; while (ss > ent_name) { svp = hv_fetch(entity2char, ent_name, ss - ent_name, 0); if (svp) { repl = SvPV(*svp, repl_len); #ifdef UNICODE_HTML_PARSER repl_utf8 = SvUTF8(*svp); #endif s = ss; break; } ss--; } } } #ifdef UNICODE_HTML_PARSER high_surrogate = 0; #endif } if (repl) { char *repl_allocated = 0; if (s < end && *s == ';') s++; t--; /* '&' already copied, undo it */ #ifdef UNICODE_HTML_PARSER if (*s != '&') { high_surrogate = 0; } if (!SvUTF8(sv) && repl_utf8) { /* need to upgrade sv before we continue */ STRLEN before_gap_len = t - SvPVX(sv); char *before_gap = (char*)bytes_to_utf8((U8*)SvPVX(sv), &before_gap_len); STRLEN after_gap_len = end - s; char *after_gap = (char*)bytes_to_utf8((U8*)s, &after_gap_len); sv_setpvn(sv, before_gap, before_gap_len); sv_catpvn(sv, after_gap, after_gap_len); SvUTF8_on(sv); Safefree(before_gap); Safefree(after_gap); s = t = SvPVX(sv) + before_gap_len; end = SvPVX(sv) + before_gap_len + after_gap_len; } else if (SvUTF8(sv) && !repl_utf8) { repl = (char*)bytes_to_utf8((U8*)repl, &repl_len); repl_allocated = repl; } #endif if (t + repl_len > s) { /* need to grow the string */ grow_gap(aTHX_ sv, repl_len - (s - t), &t, &s, &end); } /* copy replacement string into string */ while (repl_len--) *t++ = *repl++; if (repl_allocated) Safefree(repl_allocated); } else { while (ent_start < s) *t++ = *ent_start++; } } *t = '\0'; SvCUR_set(sv, t - SvPVX(sv)); return sv; }
sar_bool sar_lookPathPos_c(sarNode_p currNode, const char * checkStr, long startPos, long currPos, long len, sar_bool negative) { sar_bool matched = SAR_FALSE; int callPos = 0; while(currNode->callFunc[callPos] != (SV*)NULL) { matched = SAR_TRUE; sar_runCallFunc_c(currNode->callFunc[callPos], startPos, currPos); ++callPos; } if (currPos >= len) { return matched; } char checkChar = checkStr[currPos]; sarNode_p plusNode = currNode->plusNode; if (plusNode != (sarNode_p)NULL) { if (negative) { int pathCharNum = 0; for (pathCharNum=0; pathCharNum < plusNode->charNumber; ++pathCharNum) { if (checkChar != plusNode->sarPathChars[pathCharNum]) { sarNode_p nextPlusNode = plusNode->sarNodes[pathCharNum]; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_CHAR, negative); matched = matched || plusNodesMatched; } } if (plusNode->digitNode != (sarNode_p)NULL) { if (! isDIGIT(checkChar)) { sarNode_p nextPlusNode = plusNode->digitNode; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_DIGIT, negative); matched = matched || plusNodesMatched; } } } else { int existListPlusNodePos = sar_searchChar_c(plusNode->sarPathChars, plusNode->charNumber, checkChar); if (existListPlusNodePos >= 0) { sarNode_p nextPlusNode = plusNode->sarNodes[existListPlusNodePos]; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_CHAR, negative); matched = matched || plusNodesMatched; } if (plusNode->dotNode != (sarNode_p)NULL) { sarNode_p nextPlusNode = plusNode->dotNode; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_DOT, negative); matched = matched || plusNodesMatched; } if (plusNode->digitNode != (sarNode_p)NULL) { if (isDIGIT(checkChar)) { sarNode_p nextPlusNode = plusNode->digitNode; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_DIGIT, negative); matched = matched || plusNodesMatched; } } if (plusNode->alphaNumNode != (sarNode_p)NULL) { if (isALNUM(checkChar)) { sarNode_p nextPlusNode = plusNode->alphaNumNode; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_ALPHA_NUM, negative); matched = matched || plusNodesMatched; } } if (plusNode->alphaNode != (sarNode_p)NULL) { if (isALPHA(checkChar)) { sarNode_p nextPlusNode = plusNode->alphaNode; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_ALPHA, negative); matched = matched || plusNodesMatched; } } if (plusNode->spaceNode != (sarNode_p)NULL) { if (isSPACE(checkChar)) { sarNode_p nextPlusNode = plusNode->spaceNode; sar_bool plusNodesMatched = sar_matchPlusNode_c(nextPlusNode, checkStr, startPos, currPos, len, SAR_SPACE, negative); matched = matched || plusNodesMatched; } } } } if (negative) { int pathCharNum = 0; for (pathCharNum=0; pathCharNum < currNode->charNumber; ++pathCharNum) { if (checkChar != currNode->sarPathChars[pathCharNum]) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->sarNodes[pathCharNum], checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->spaceNode != (sarNode_p)NULL) { if (! isSPACE(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->spaceNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->digitNode != (sarNode_p)NULL) { if (! isDIGIT(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->digitNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->alphaNumNode != (sarNode_p)NULL) { if (! isALNUM(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->alphaNumNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->alphaNode != (sarNode_p)NULL) { if (! isALPHA(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->alphaNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } } else { int existListNodePos = sar_searchChar_c(currNode->sarPathChars, currNode->charNumber, checkChar); if (existListNodePos >= 0) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->sarNodes[existListNodePos], checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } if (currNode->negativeNode != (sarNode_p)NULL) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->negativeNode, checkStr, startPos, currPos, len, SAR_TRUE); matched = matched || nodesMatched; } if (currNode->dotNode != (sarNode_p)NULL) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->dotNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } if (currNode->spaceNode != (sarNode_p)NULL) { if (isSPACE(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->spaceNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->digitNode != (sarNode_p)NULL) { if (isDIGIT(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->digitNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->alphaNumNode != (sarNode_p)NULL) { if (isALNUM(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->alphaNumNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } if (currNode->alphaNode != (sarNode_p)NULL) { if (isALPHA(checkChar)) { sar_bool nodesMatched = sar_lookPathPos_c(currNode->alphaNode, checkStr, startPos, currPos + 1, len, SAR_FALSE); matched = matched || nodesMatched; } } } return matched; }
void Perl_taint_env(pTHX) { SV** svp; MAGIC* mg; char** e; static char* misc_env[] = { "IFS", /* most shells' inter-field separators */ "CDPATH", /* ksh dain bramage #1 */ "ENV", /* ksh dain bramage #2 */ "BASH_ENV", /* bash dain bramage -- I guess it's contagious */ NULL }; if (!PL_envgv) return; #ifdef VMS { int i = 0; char name[10 + TYPE_DIGITS(int)] = "DCL$PATH"; while (1) { if (i) (void)sprintf(name,"DCL$PATH;%d", i); svp = hv_fetch(GvHVn(PL_envgv), name, strlen(name), FALSE); if (!svp || *svp == &PL_sv_undef) break; if (SvTAINTED(*svp)) { TAINT; taint_proper("Insecure %s%s", "$ENV{DCL$PATH}"); } if ((mg = mg_find(*svp, PERL_MAGIC_envelem)) && MgTAINTEDDIR(mg)) { TAINT; taint_proper("Insecure directory in %s%s", "$ENV{DCL$PATH}"); } i++; } } #endif /* VMS */ svp = hv_fetch(GvHVn(PL_envgv),"PATH",4,FALSE); if (svp && *svp) { if (SvTAINTED(*svp)) { TAINT; taint_proper("Insecure %s%s", "$ENV{PATH}"); } if ((mg = mg_find(*svp, PERL_MAGIC_envelem)) && MgTAINTEDDIR(mg)) { TAINT; taint_proper("Insecure directory in %s%s", "$ENV{PATH}"); } } #ifndef VMS /* tainted $TERM is okay if it contains no metachars */ svp = hv_fetch(GvHVn(PL_envgv),"TERM",4,FALSE); if (svp && *svp && SvTAINTED(*svp)) { STRLEN n_a; bool was_tainted = PL_tainted; char *t = SvPV(*svp, n_a); char *e = t + n_a; PL_tainted = was_tainted; if (t < e && isALNUM(*t)) t++; while (t < e && (isALNUM(*t) || strchr("-_.+", *t))) t++; if (t < e) { TAINT; taint_proper("Insecure $ENV{%s}%s", "TERM"); } } #endif /* !VMS */ for (e = misc_env; *e; e++) { svp = hv_fetch(GvHVn(PL_envgv), *e, strlen(*e), FALSE); if (svp && *svp != &PL_sv_undef && SvTAINTED(*svp)) { TAINT; taint_proper("Insecure $ENV{%s}%s", *e); } } }
static int fmm_ascmagic(unsigned char *buf, size_t nbytes, char **mime_type) { int has_escapes = 0; unsigned char *s; char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ char *token; register struct names *p; int small_nbytes; char *strtok_state; unsigned char *tp; /* these are easy, do them first */ /* * for troff, look for . + letter + letter or .\"; this must be done to * disambiguate tar archives' ./file and other trash from real troff * input. */ if (*buf == '.') { tp = buf + 1; while (isSPACE(*tp)) ++tp; /* skip leading whitespace */ if ((isALNUM(*tp) || *tp == '\\') && (isALNUM(*(tp + 1)) || *tp == '"')) { strcpy(*mime_type, "application/x-troff"); return 0; } } if ((*buf == 'c' || *buf == 'C') && isSPACE(*(buf + 1))) { /* Fortran */ strcpy(*mime_type, "text/plain"); return 0; } /* look for tokens from names.h - this is expensive!, so we'll limit * ourselves to only SMALL_HOWMANY bytes */ small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes; /* make a copy of the buffer here because strtok() will destroy it */ s = (unsigned char *) memcpy(nbuf, buf, small_nbytes); s[small_nbytes] = '\0'; has_escapes = (memchr(s, '\033', small_nbytes) != NULL); while ((token = strtok_r((char *) s, " \t\n\r\f", &strtok_state)) != NULL) { s = NULL; /* make strtok() keep on tokin' */ for (p = names; p < names + NNAMES; p++) { if (strEQ(p->name, token)) { strcpy(*mime_type, types[p->type]); if (has_escapes) strcat(*mime_type, " (with escape sequences)"); return 0; } } } int is_tarball = is_tar(buf, nbytes); if ( is_tarball == 1 || is_tarball == 2 ) { /* 1: V7 tar archive */ /* 2: POSIX tar archive */ strcpy(*mime_type, "application/x-tar"); return 0; } /* all else fails, but it is ascii... */ strcpy(*mime_type, "text/plain"); return 0; }