static GlobCode glob_fixed(URLGlob *glob, unsigned long *amount) { URLPattern *pat = &glob->pattern[glob->size]; pat->type = UPTSet; pat->content.Set.size = 1; pat->content.Set.ptr_s = 0; pat->globindex = -1; (*amount)++; pat->content.Set.elements = malloc(sizeof(char*)); if(!pat->content.Set.elements) return GLOBERROR("out of memory", 0, GLOB_NO_MEM); pat->content.Set.elements[0] = strdup(glob->glob_buffer); if(!pat->content.Set.elements[0]) return GLOBERROR("out of memory", 0, GLOB_NO_MEM); return GLOB_OK; }
static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len) { URLPattern *pat = &glob->pattern[glob->size]; pat->type = UPTSet; pat->content.Set.size = 1; pat->content.Set.ptr_s = 0; pat->globindex = -1; pat->content.Set.elements = malloc(sizeof(char*)); if(!pat->content.Set.elements) return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); pat->content.Set.elements[0] = malloc(len+1); if(!pat->content.Set.elements[0]) return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); memcpy(pat->content.Set.elements[0], fixed, len); pat->content.Set.elements[0][len] = 0; return CURLE_OK; }
static CURLcode glob_set(URLGlob *glob, char **patternp, size_t *posp, unsigned long *amount, int globindex) { /* processes a set expression with the point behind the opening '{' ','-separated elements are collected until the next closing '}' */ URLPattern *pat; bool done = FALSE; char *buf = glob->glob_buffer; char *pattern = *patternp; char *opattern = pattern; size_t opos = *posp-1; pat = &glob->pattern[glob->size]; /* patterns 0,1,2,... correspond to size=1,3,5,... */ pat->type = UPTSet; pat->content.Set.size = 0; pat->content.Set.ptr_s = 0; pat->content.Set.elements = NULL; pat->globindex = globindex; while(!done) { switch (*pattern) { case '\0': /* URL ended while set was still open */ return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); case '{': case '[': /* no nested expressions at this time */ return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); case '}': /* set element completed */ if(opattern == pattern) return GLOBERROR("empty string within braces", *posp, CURLE_URL_MALFORMAT); /* add 1 to size since it'll be incremented below */ if(multiply(amount, pat->content.Set.size+1)) return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); /* fall-through */ case ',': *buf = '\0'; if(pat->content.Set.elements) { char **new_arr = realloc(pat->content.Set.elements, (pat->content.Set.size + 1) * sizeof(char*)); if(!new_arr) return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); pat->content.Set.elements = new_arr; } else pat->content.Set.elements = malloc(sizeof(char*)); if(!pat->content.Set.elements) return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); pat->content.Set.elements[pat->content.Set.size] = strdup(glob->glob_buffer); if(!pat->content.Set.elements[pat->content.Set.size]) return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); ++pat->content.Set.size; if(*pattern == '}') { pattern++; /* pass the closing brace */ done = TRUE; continue; } buf = glob->glob_buffer; ++pattern; ++(*posp); break; case ']': /* illegal closing bracket */ return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); case '\\': /* escaped character, skip '\' */ if(pattern[1]) { ++pattern; ++(*posp); } /* intentional fallthrough */ default: *buf++ = *pattern++; /* copy character to set element */ ++(*posp); } } *patternp = pattern; /* return with the new position */ return CURLE_OK; }
static CURLcode glob_parse(URLGlob *glob, char *pattern, size_t pos, unsigned long *amount) { /* processes a literal string component of a URL special characters '{' and '[' branch to set/range processing functions */ CURLcode res = CURLE_OK; int globindex = 0; /* count "actual" globs */ *amount = 1; while(*pattern && !res) { char *buf = glob->glob_buffer; size_t sublen = 0; while(*pattern && *pattern != '{') { if(*pattern == '[') { /* Skip over potential IPv6 literals. */ size_t skip; if(peek_ipv6(pattern, &skip)) { memcpy(buf, pattern, skip); buf += skip; pattern += skip; sublen += skip; continue; } break; } if(*pattern == '}' || *pattern == ']') return GLOBERROR("unmatched close brace/bracket", pos, CURLE_URL_MALFORMAT); /* only allow \ to escape known "special letters" */ if(*pattern == '\\' && (*(pattern+1) == '{' || *(pattern+1) == '[' || *(pattern+1) == '}' || *(pattern+1) == ']') ) { /* escape character, skip '\' */ ++pattern; ++pos; } *buf++ = *pattern++; /* copy character to literal */ ++pos; sublen++; } if(sublen) { /* we got a literal string, add it as a single-item list */ *buf = '\0'; res = glob_fixed(glob, glob->glob_buffer, sublen); } else { switch (*pattern) { case '\0': /* done */ break; case '{': /* process set pattern */ pattern++; pos++; res = glob_set(glob, &pattern, &pos, amount, globindex++); break; case '[': /* process range pattern */ pattern++; pos++; res = glob_range(glob, &pattern, &pos, amount, globindex++); break; } } if(++glob->size > GLOB_PATTERN_NUM) return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); } return res; }
static CURLcode glob_range(URLGlob *glob, char **patternp, size_t *posp, unsigned long *amount, int globindex) { /* processes a range expression with the point behind the opening '[' - char range: e.g. "a-z]", "B-Q]" - num range: e.g. "0-9]", "17-2000]" - num range with leading zeros: e.g. "001-999]" expression is checked for well-formedness and collected until the next ']' */ URLPattern *pat; int rc; char *pattern = *patternp; char *c; pat = &glob->pattern[glob->size]; pat->globindex = globindex; if(ISALPHA(*pattern)) { /* character range detected */ char min_c; char max_c; int step=1; pat->type = UPTCharRange; rc = sscanf(pattern, "%c-%c", &min_c, &max_c); if((rc == 2) && (pattern[3] == ':')) { char *endp; unsigned long lstep; errno = 0; lstep = strtoul(&pattern[4], &endp, 10); if(errno || (*endp != ']')) step = -1; else { pattern = endp+1; step = (int)lstep; if(step > (max_c - min_c)) step = -1; } } else pattern += 4; *posp += (pattern - *patternp); if((rc != 2) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) || (step <= 0) ) /* the pattern is not well-formed */ return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); /* if there was a ":[num]" thing, use that as step or else use 1 */ pat->content.CharRange.step = step; pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; pat->content.CharRange.max_c = max_c; if(multiply(amount, (pat->content.CharRange.max_c - pat->content.CharRange.min_c) / pat->content.CharRange.step + 1) ) return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); } else if(ISDIGIT(*pattern)) { /* numeric range detected */ unsigned long min_n; unsigned long max_n = 0; unsigned long step_n = 0; char *endp; pat->type = UPTNumRange; pat->content.NumRange.padlength = 0; if(*pattern == '0') { /* leading zero specified, count them! */ c = pattern; while(ISDIGIT(*c)) { c++; ++pat->content.NumRange.padlength; /* padding length is set for all instances of this pattern */ } } errno = 0; min_n = strtoul(pattern, &endp, 10); if(errno || (endp == pattern)) endp=NULL; else { if(*endp != '-') endp = NULL; else { pattern = endp+1; errno = 0; max_n = strtoul(pattern, &endp, 10); if(errno || (*endp == ':')) { pattern = endp+1; errno = 0; step_n = strtoul(pattern, &endp, 10); if(errno) /* over/underflow situation */ endp = NULL; } else step_n = 1; if(endp && (*endp == ']')) { pattern= endp+1; } else endp = NULL; } } *posp += (pattern - *patternp); if(!endp || (min_n > max_n) || (step_n > (max_n - min_n)) || (step_n <= 0) ) /* the pattern is not well-formed */ return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); /* typecasting to ints are fine here since we make sure above that we are within 31 bits */ pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; pat->content.NumRange.max_n = max_n; pat->content.NumRange.step = step_n; if(multiply(amount, (pat->content.NumRange.max_n - pat->content.NumRange.min_n) / pat->content.NumRange.step + 1) ) return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); } else return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); *patternp = pattern; return CURLE_OK; }
static GlobCode glob_parse(URLGlob *glob, char *pattern, size_t pos, unsigned long *amount) { /* processes a literal string component of a URL special characters '{' and '[' branch to set/range processing functions */ char* buf = glob->glob_buffer; GlobCode res = GLOB_OK; int globindex = 0; /* count "actual" globs */ while(*pattern && !res) { int sublen = 0; while(*pattern && *pattern != '{' && *pattern != '[') { if(*pattern == '}' || *pattern == ']') return GLOBERROR("unmatched close brace/bracket", pos, GLOB_ERROR); /* only allow \ to escape known "special letters" */ if(*pattern == '\\' && (*(pattern+1) == '{' || *(pattern+1) == '[' || *(pattern+1) == '}' || *(pattern+1) == ']') ) { /* escape character, skip '\' */ ++pattern; ++pos; } *buf++ = *pattern++; /* copy character to literal */ ++pos; sublen++; } if(sublen) { /* we got a literal string, add it as a single-item list */ *buf = '\0'; res = glob_fixed(glob, amount); } else { if(!*amount) *amount = 1; switch (*pattern) { case '\0': /* done */ break; case '{': /* process set pattern */ pattern++; pos++; res = glob_set(glob, &pattern, &pos, amount, globindex++); break; case '[': /* process range pattern */ pattern++; pos++; res = glob_range(glob, &pattern, &pos, amount, globindex++); break; } } if(++glob->size > GLOB_PATTERN_NUM) return GLOBERROR("too many globs", pos, GLOB_ERROR); } return res; }