static void setup_map(void) { static gboolean done; guint i; if (done) return; for (i = 0; i < G_N_ELEMENTS(map); i++) { guchar c; if (i > 0 && utf8_byte_is_allowed(i)) { if (is_ascii_upper(i)) { c = ascii_tolower(i); } else if ( is_ascii_punct(i) || is_ascii_cntrl(i) || is_ascii_space(i) ) { c = ' '; } else { c = i; } } else { c = 0; } map[i] = c; } done = TRUE; }
/** * Strip leading and trailing blanks in text string. * * @param text start of text to strip (NUL-terminated) * @param len_ptr updated with new length if non-NULL * * @returns start of new text, and adjusted length in case we stripped. */ static const char * xfmt_strip_blanks(const char *text, size_t *len_ptr) { const char *p = text; unsigned retlen; int c; bool seen_non_blank = FALSE; const char *last_non_blank; const char *first_non_blank; first_non_blank = last_non_blank = p = text; /* * Text is NUL-terminated, so we can use utf8_decode_char_fast(). */ while ('\0' != (c = utf8_decode_char_fast(p, &retlen))) { p += retlen; if (seen_non_blank) { if (is_ascii_space(c)) last_non_blank = p; /* next char */ } else { if (!is_ascii_space(c)) { seen_non_blank = TRUE; last_non_blank = p; /* next char */ first_non_blank = p - retlen; /* this char */ } } } if (len_ptr != NULL) *len_ptr = last_non_blank - first_non_blank; return first_non_blank; }
/** * Compute character mask "hash", using one bit per letter of the alphabet, * plus one for any digit. */ static guint32 mask_hash(const char *s) { guchar c; guint32 mask = 0; while ((c = (guchar) *s++)) { if (is_ascii_space(c)) continue; else if (is_ascii_digit(c)) mask |= MASK_DIGIT; else { int idx = ascii_tolower(c) - 97; if (idx >= 0 && idx < 26) mask |= MASK_LETTER(idx); } } return mask; }
/** * Dump field on specified file descriptor. */ static void hfield_dump(const header_field_t *h, FILE *out) { slist_iter_t *iter; bool first; header_field_check(h); g_assert(h->lines); fprintf(out, "%s: ", h->name); first = TRUE; iter = slist_iter_on_head(h->lines); for (/* NOTHING */; slist_iter_has_item(iter); slist_iter_next(iter)) { const char *s; if (first) first = FALSE; else fputs(" ", out); /* Continuation line */ s = slist_iter_current(iter); if (is_printable_iso8859_string(s)) { fputs(s, out); } else { char buf[80]; const char *p = s; int c; size_t len = strlen(s); str_bprintf(buf, sizeof buf, "<%u non-printable byte%s>", (unsigned) len, plural(len)); fputs(buf, out); while ((c = *p++)) { if (is_ascii_print(c) || is_ascii_space(c)) fputc(c, out); else fputc('.', out); /* Less visual clutter than '?' */ } } fputc('\n', out); } slist_iter_free(&iter); }
/** * Intuit the media type they are searching based on the first XML tag * we find in the meta data string, using simplistic lexical parsing which * will encompass 99% of the cases. */ static uint32 g2_node_intuit_media_type(const char *md) { const char *p = md; const char *start; int c; uint32 flags; while ('<' != (c = *p++) && c != 0) /* empty */; if (0 == c) return 0; /* Did not find any tag opening */ start = p = skip_ascii_spaces(p); while (0 != (c = *p)) { if (is_ascii_space(c) || '/' == c || '>' == c) { char *name; /* Found end of word, we got the tag name */ name = h_strndup(start, p - start); flags = TOKENIZE(name, g2_q2_md); if (0 == flags) { g_warning("%s(): unknown tag \"%s\", XML string was \"%s\"", G_STRFUNC, name, md); } hfree(name); return flags; } p++; } return 0; }
/** * Remove trailing white space from line held within buffer. * * This is meant to be used to validate the line returned by fgets() and to * remove final "\n", or "\r\n" markers as well as any other trailing white * space. * * @param line buffer where line is held * @param size buffer size * @paran lenptr if non-NULL, the final string length is written there * * @return TRUE if we were facing a line terminated by "\n", FALSE otherwise. */ bool file_line_chomp_tail(char *line, size_t size, size_t *lenptr) { size_t len; char *p; len = clamp_strlen(line, size); if (size == len || 0 == len) return FALSE; /* No NUL found or empty string */ if ('\n' != line[len - 1]) return FALSE; /* Truncated line, reading buffer was too small */ p = &line[len - 1]; do { *p = '\0'; } while (p != line && is_ascii_space(*--p)); if (lenptr != NULL) *lenptr = p - line + ('\0' == *p ? 0 : 1); return TRUE; }
/** * Decodes "chunked" data. * * The function returns as soon as it needs more data to proceed, on * error, if the state CHUNK_STATE_END was reached, or if the state * CHUNK_STATE_DATA was reached. In the latter case the chunk payload * itself must be consumed and this function must not be called again * until the state CHUNK_STATE_DATA_CRLF is reached. * * @param rx the current RX driver. * @param src the chunk data. * @param size no document. * @param p_error_str if not NULL and parse_chunk() fails, it will point * to an informational error message. * * @return 0 on failure; non-zero amount of consumed bytes on success. */ static size_t parse_chunk(rxdrv_t *rx, const char *src, size_t size, const char **p_error_str) { struct attr *attr = rx->opaque; const char *error_str; size_t len; g_assert(attr); g_assert(src); g_assert(size > 0); g_assert(attr->state < NUM_CHUNK_STATES); g_assert(0 == attr->data_remain); len = size; do { switch (attr->state) { case CHUNK_STATE_DATA_CRLF: /* The chunk-data must be followed by a CRLF */ while (len > 0) { uchar c; len--; c = *src++; if ('\r' == c) { /* * This allows more than one CR but we must consume * some data or keep state over this otherwise. */ continue; } else if ('\n' == c) { attr->state = CHUNK_STATE_SIZE; break; } else { /* * Normally it is an error, there should be CRLF after * the chunk data. However, they might have forgotten * to send the '\n' or the whole sequence. * * If what follows looks like a valid chunk size, then * we should be able to resync properly: Unread the * character and move on to the chunk size decoding. */ if (!(attr->flags & IF_NO_CRLF)) { attr->flags |= IF_NO_CRLF; g_warning("Host %s forgot CRLF after data", gnet_host_to_string(&rx->host)); } len++; src--; attr->state = CHUNK_STATE_SIZE; break; } } break; case CHUNK_STATE_SIZE: g_assert(attr->hex_pos < sizeof attr->hex_buf); while (len > 0) { uchar c; len--; c = *src++; if (is_ascii_xdigit(c)) { if (attr->hex_pos >= sizeof attr->hex_buf) { error_str = "Overflow in chunk-size"; goto error; } /* Collect up to 16 hex characters */ attr->hex_buf[attr->hex_pos++] = c; } else { /* * There might be a chunk-extension after the * hexadecimal chunk-size but there shouldn't * anything else. */ if ( 0 == attr->hex_pos || (!is_ascii_space(c) && ';' != c) ) { error_str = "Bad chunk-size"; goto error; } attr->state = CHUNK_STATE_EXT; break; } } break; case CHUNK_STATE_EXT: /* Just skip over the chunk-extension */ while (len > 0) { len--; if ('\n' == *src++) { /* * Pick up the collected hex digits and * calculate the chunk-size. */ g_assert(attr->hex_pos > 0); g_assert(attr->hex_pos <= sizeof attr->hex_buf); { uint64 v = 0; uint i; for (i = 0; i < attr->hex_pos; i++) v = (v << 4) | hex2int_inline(attr->hex_buf[i]); attr->data_remain = v; attr->hex_pos = 0; } attr->state = 0 != attr->data_remain ? CHUNK_STATE_DATA : CHUNK_STATE_TRAILER_START; break; } } break; case CHUNK_STATE_TRAILER_START: /* We've reached another trailer line */ if (len < 1) break; if ('\r' == src[0]) { /* * This allows more than one CR but we must consume * some data or keep state over this otherwise. */ src++; len--; } if (len < 1) break; if ('\n' == src[0]) { /* An empty line means the end of all trailers was reached */ src++; len--; attr->state = CHUNK_STATE_END; break; } attr->state = CHUNK_STATE_TRAILER; /* FALL THROUGH */ case CHUNK_STATE_TRAILER: /* Just skip over the trailer line */ while (len > 0) { len--; if ('\n' == *src++) { /* * Now check whether there's another trailer * line or whether we've reached the end */ attr->state = CHUNK_STATE_TRAILER_START; break; } } break; case CHUNK_STATE_END: /* * We're not supposed to receive data after the chunk stream * has been ended. But if we do, it means either we * misinterpreted the chunk end stream or the other end is just * going berserk. */ error_str = "Remaining data after chunk end"; goto error; case CHUNK_STATE_DATA: case CHUNK_STATE_ERROR: case NUM_CHUNK_STATES: g_assert_not_reached(); break; } /* NB: Some data from ``src'' must have been consumed or an * infinite loop may occur. */ if (CHUNK_STATE_DATA == attr->state) { if (GNET_PROPERTY(rx_debug) > 9) g_debug("parse_chunk: chunk size %s bytes", uint64_to_string(attr->data_remain)); break; } } while (len > 0 && CHUNK_STATE_END != attr->state); if (p_error_str) *p_error_str = NULL; return size - len; error: if (p_error_str) *p_error_str = error_str; attr->state = CHUNK_STATE_ERROR; return 0; }
/** * Retrieves the major and minor version from a feature in the X-Features * header, if no support was found both major and minor are 0 and FALSE * is returned. */ bool header_get_feature(const char *name, const header_t *header, uint *major, uint *minor) { static const char x_features[] = "X-Features"; char *buf, *start; if (major) *major = 0; if (minor) *minor = 0; buf = header_get(header, x_features); /* * We could also try to scan for the header: name, so this would * make this function even more generic. But I would suggest another * function for this though. */ if (buf == NULL) { /* * Actually the 'specs' say we should assume it is supported if the * X-Features header is not there. But I wouldn't count on it, and * it was only for "legacy" attributes in the HTTP file exchange. * * Also, for optimization purposes, the X-Features line will be sent * once per persistent HTTP connection, as the client is expected to * cache the supported features. */ return FALSE; } /* * We must locate the name exactly, and not a subpart of another * feature. If we look for "bar", then we must not match on "foobar". */ start = buf; for (;;) { int pc; /* Previous char */ buf = ascii_strcasestr(buf, name); if (buf == NULL) return FALSE; if (buf == start) break; /* * Since we're looking for whole words separated by a space or the * regular header punctuation, the next match can't occur before * the end of the current string we matched... */ pc = *(buf - 1); buf += strlen(name); if (*buf != '/') continue; /* Matched "barcode" when looking for "bar" */ if (is_ascii_space(pc) || pc == ',' || pc == ';') break; /* Found it! */ } buf++; if (*buf == '\0') return FALSE; return 0 == parse_major_minor(buf, NULL, major, minor); }
static void html_render_text(struct render_context *ctx, const struct array text) { unsigned c_len; size_t i; bool whitespace = FALSE; struct array entity, current; entity = zero_array; current = zero_array; for (i = 0; i < text.size; i += c_len) { const unsigned char c = text.data[i]; bool is_whitespace; is_whitespace = FALSE; c_len = utf8_first_byte_length_hint(c); if (!ctx->preformatted && is_ascii_space(c)) { if (whitespace) continue; is_whitespace = TRUE; whitespace = TRUE; if (0x20 == c && i > 0 && i < text.size - c_len) { const unsigned char next_c = text.data[i + c_len]; if (!is_ascii_space(next_c)) is_whitespace = FALSE; } } else { whitespace = FALSE; } if ('&' == c || ';' == c || is_whitespace) { if (current.size > 0) { html_output_print(ctx->output, current); current = zero_array; } } if (is_whitespace) { if (i > 0 || ctx->closing) html_output_print(ctx->output, array_from_string(" ")); } else if ('&' == c) { if (entity.data) { html_render_entity(ctx, entity); } entity.data = deconstify_gchar(&text.data[i + c_len]); entity.size = 0; continue; } else if (';' == c) { if (entity.data) { html_render_entity(ctx, entity); entity = zero_array; continue; } } else if (entity.data) { entity.size += c_len; } else { if (!current.data) current.data = &text.data[i]; current.size += c_len; } } if (current.size > 0) { html_output_print(ctx->output, current); } }
/** * Default implementation to get the default gateway by parsing the * output of the "netstat -rn" command. * * @param addrp where gateway address is to be written * * @return 0 on success, -1 on failure with errno set. */ static G_GNUC_COLD int parse_netstat(host_addr_t *addrp) #ifdef HAS_POPEN { FILE *f = NULL; char tmp[80]; uint32 gate = 0; /* * This implementation should be a safe default on UNIX platforms, but * it is inefficient and as such can only constitute a fallback. */ if (-1 != access("/bin/netstat", X_OK)) { f = popen("/bin/netstat -rn", "r"); } else if (-1 != access("/usr/bin/netstat", X_OK)) { f = popen("/usr/bin/netstat -rn", "r"); } if (NULL == f) { errno = ENOENT; /* netstat not found */ return -1; } /* * Typical netstat -rn output: * * Destination Gateway Flags ..... * 0.0.0.0 192.168.0.200 UG * default 192.168.0.200 UG * * Some systems like linux display "0.0.0.0", but traditional UNIX * output is "default" for the default route. */ while (fgets(tmp, sizeof tmp, f)) { char *p; uint32 ip; p = is_strprefix(tmp, "default"); if (NULL == p) p = is_strprefix(tmp, "0.0.0.0"); if (NULL == p || !is_ascii_space(*p)) continue; ip = string_to_ip(p); if (ip != 0) { gate = ip; break; } } pclose(f); if (0 == gate) { errno = ENETUNREACH; return -1; } *addrp = host_addr_get_ipv4(gate); return 0; }
struct array html_get_attribute(const struct array *tag, enum html_attr attribute) { size_t i = 0; if ( !tag || !tag->data || NUM_HTML_ATTR == attribute || HTML_ATTR_UNKNOWN == attribute ) goto not_found; /** <tag-name>([<space>][<attr>[<space>]'='[<space>]'"'<value>'"'])* */ /* skip <tag-name> */ while (i < tag->size && !is_ascii_space(tag->data[i])) i++; while (i < tag->size) { struct array value, attr; /* skip <space> */ while (i < tag->size && is_ascii_space(tag->data[i])) i++; attr = array_init(&tag->data[i], tag->size - i); /* skip <attr> */ while (i < tag->size) { const unsigned char c = tag->data[i]; if ('=' == c || is_ascii_space(c)) break; i++; } /* skip <space> */ while (i < tag->size && is_ascii_space(tag->data[i])) i++; if (i < tag->size && '=' == tag->data[i]) { bool quoted = FALSE; size_t start; i++; /* skip <space> */ while (i < tag->size && is_ascii_space(tag->data[i])) i++; if (i < tag->size && '"' == tag->data[i]) { i++; quoted = TRUE; } start = i; /* skip <value> */ while (i < tag->size) { const unsigned char c = tag->data[i]; if (quoted) { if ('"' == c) break; } else if (is_ascii_space(c)) { break; } i++; } value = array_init(&tag->data[start], i - start); } else { value = array_init(&tag->data[i], 0); } if (attribute == parse_attribute(attr)) return value; } not_found: return zero_array; }
/** * Parse an IPv4 Geo IP line and record the range in the database. */ static void gip_parse_ipv4(const char *line, int linenum) { const char *end; uint16 code; int c; struct range_context ctx; /* * Each line looks like: * * 15.0.0.0 - 15.130.191.255 fr * * So we just have to parse the two IP addresses, then compute * all the ranges they cover in order to insert them into * the IP database. */ end = strchr(line, '-'); if (end == NULL) { g_warning("%s, line %d: no IP address separator in \"%s\"", gip_source[GIP_IPV4].file, linenum, line); return; } if (!string_to_ip_strict(line, &ctx.ip1, NULL)) { g_warning("%s, line %d: invalid first IP in \"%s\"", gip_source[GIP_IPV4].file, linenum, line); return; } /* * Skip spaces until the second IP. */ end++; /* Go past the minus, parsing the second IP */ while ((c = *end)) { if (!is_ascii_space(c)) break; end++; } if (!string_to_ip_strict(end, &ctx.ip2, &end)) { g_warning("%s, line %d: invalid second IP in \"%s\"", gip_source[GIP_IPV4].file, linenum, line); return; } /* * Make sure the IP addresses are ordered correctly */ if (ctx.ip1 > ctx.ip2) { g_warning("%s, line %d: invalid IP order in \"%s\"", gip_source[GIP_IPV4].file, linenum, line); return; } /* * Skip spaces until the country code. */ while ((c = *end)) { if (!is_ascii_space(c)) break; end++; } if (c == '\0') { g_warning("%s, line %d: missing country code in \"%s\"", gip_source[GIP_IPV4].file, linenum, line); return; } code = iso3166_encode_cc(end); if (ISO3166_INVALID == code) { g_warning("%s, line %d: bad country code in \"%s\"", gip_source[GIP_IPV4].file, linenum, line); return; } /* code must not be zero and the LSB must be zero due to using it as * as key for ipranges */ ctx.country = (code + 1) << 1; ctx.line = line; ctx.linenum = linenum; /* * Now compute the CIDR ranges between the ip1 and ip2 addresses * and insert each range into the database, linking it to the * country code. */ ip_range_split(ctx.ip1, ctx.ip2, gip_add_cidr, &ctx); }
/** * Do an actual search. * * @param table table containing organized entries to search from * @param search_term the query string * @param callback routine to invoke for each match * @param ctx user-supplied data to pass on to callback * @param max_res maximum amount of results to return * @param qhv query hash vector built from query string, for routing * * @return number of hits we produced */ G_GNUC_HOT int st_search( search_table_t *table, const char *search_term, st_search_callback callback, gpointer ctx, int max_res, query_hashvec_t *qhv) { char *search; int key, nres = 0; guint i, len; struct st_bin *best_bin = NULL; int best_bin_size = INT_MAX; word_vec_t *wovec; guint wocnt; cpattern_t **pattern; struct st_entry **vals; guint vcnt; int scanned = 0; /* measure search mask efficiency */ guint32 search_mask; size_t minlen; guint random_offset; /* Randomizer for search returns */ search = UNICODE_CANONIZE(search_term); if (GNET_PROPERTY(query_debug) > 4 && 0 != strcmp(search, search_term)) { char *safe_search = hex_escape(search, FALSE); char *safe_search_term = hex_escape(search_term, FALSE); g_debug("original search term: \"%s\"", safe_search_term); g_debug("canonical search term: \"%s\"", safe_search); if (safe_search != search) HFREE_NULL(safe_search); if (safe_search_term != search_term) HFREE_NULL(safe_search_term); } len = strlen(search); /* * Find smallest bin */ if (len >= 2) { for (i = 0; i < len - 1; i++) { struct st_bin *bin; if (is_ascii_space(search[i]) || is_ascii_space(search[i+1])) continue; key = st_key(table, search + i); if ((bin = table->bins[key]) == NULL) { best_bin = NULL; break; } if (bin->nvals < best_bin_size) { best_bin = bin; best_bin_size = bin->nvals; } } if (GNET_PROPERTY(matching_debug) > 4) g_debug("MATCH st_search(): str=\"%s\", len=%d, best_bin_size=%d", lazy_safe_search(search_term), len, best_bin_size); } /* * If the best_bin is NULL, we did not find a matching bin, and we're * sure we won't be able to find the search string. * * Note that on search strings like "r e m ", we always have a letter * followed by spaces, so we won't search that. * --RAM, 06/10/2001 */ if (best_bin == NULL) { /* * If we have a `qhv', we need to compute the word vector anway, * for query routing... */ if (qhv == NULL) goto finish; } /* * Prepare matching patterns */ wocnt = word_vec_make(search, &wovec); /* * Compute the query hashing information for query routing, if needed. */ if (qhv != NULL) { for (i = 0; i < wocnt; i++) { if (wovec[i].len >= QRP_MIN_WORD_LENGTH) qhvec_add(qhv, wovec[i].word, QUERY_H_WORD); } } if (wocnt == 0 || best_bin == NULL) { if (wocnt > 0) word_vec_free(wovec, wocnt); goto finish; } g_assert(best_bin_size > 0); /* Allocated bin, it must hold something */ pattern = walloc0(wocnt * sizeof *pattern); /* * Prepare matching optimization, an idea from Mike Green. * * At library building time, we computed a mask hash, made from the * lowercased file name, using one bit per different letter, roughly * (see mask_hash() for the exact algorigthm). * * We're now going to compute the same mask on the query, and compare * it bitwise with the mask for each file. If the file does not hold * at least all the chars present in the query, it's no use applying * the pattern matching algorithm, it won't match at all. * * --RAM, 01/10/2001 */ search_mask = mask_hash(search); /* * Prepare second matching optimization: since all words in the query * must match the exact amount of time, we can compute the minimum length * the searched file must have. We add one character after each word * but the last, to account for space between words. * --RAM, 11/07/2002 */ for (minlen = 0, i = 0; i < wocnt; i++) minlen += wovec[i].len + 1; minlen--; g_assert(minlen <= INT_MAX); /* * Search through the smallest bin */ vcnt = best_bin->nvals; vals = best_bin->vals; random_offset = random_u32() % vcnt; nres = 0; for (i = 0; i < vcnt; i++) { const struct st_entry *e; shared_file_t *sf; size_t canonic_len; /* * As we only return a limited count of results, pick a random * offset, so that repeated searches will match different items * instead of always the first - with some probability. */ e = vals[(i + random_offset) % vcnt]; if ((e->mask & search_mask) != search_mask) continue; /* Can't match */ sf = e->sf; canonic_len = shared_file_name_canonic_len(sf); if (canonic_len < minlen) continue; /* Can't match */ scanned++; if (entry_match(e->string, canonic_len, pattern, wovec, wocnt)) { if (GNET_PROPERTY(matching_debug) > 4) { g_debug("MATCH \"%s\" matches %s", search, shared_file_name_nfc(sf)); } if ((*callback)(ctx, sf)) { nres++; if (nres >= max_res) break; } } } if (GNET_PROPERTY(matching_debug) > 3) g_debug("MATCH st_search(): scanned %d entr%s from the %d in bin, " "got %d match%s", scanned, 1 == scanned ? "y" : "ies", best_bin_size, nres, 1 == nres ? "" : "es"); for (i = 0; i < wocnt; i++) if (pattern[i]) /* Lazily compiled by entry_match() */ pattern_free(pattern[i]); wfree(pattern, wocnt * sizeof *pattern); word_vec_free(wovec, wocnt); finish: if (search != search_term) { HFREE_NULL(search); } return nres; }
G_GNUC_COLD void upload_stats_load_history(void) { FILE *upload_stats_file; file_path_t fp; char line[FILENAME_MAX + 64]; guint lineno = 0; gcu_upload_stats_gui_freeze(); file_path_set(&fp, settings_config_dir(), ul_stats_file); /* open file for reading */ upload_stats_file = file_config_open_read(ul_stats_what, &fp, 1); if (upload_stats_file == NULL) goto done; /* parse, insert names into ul_stats_clist */ while (fgets(line, sizeof(line), upload_stats_file)) { static const struct ul_stats zero_item; struct ul_stats item; struct sha1 sha1_buf; const char *p; size_t i; lineno++; if (line[0] == '#' || line[0] == '\n') continue; p = strchr(line, '\t'); if (NULL == p) goto corrupted; line[p - line] = '\0'; /* line is now the URL-escaped file name */ p++; /* URL-unescape in-place */ if (!url_unescape(line, TRUE)) goto corrupted; item = zero_item; item.pathname = line; for (i = 0; i < 8; i++) { guint64 v; int error; const char *endptr; p = skip_ascii_spaces(p); /* SVN versions up to 15322 had only 6 fields in the history */ if (5 == i && '\0' == *p) break; switch (i) { case 7: /* We have a SHA1 or '*' if none known */ if ('*' != *p) { size_t len = clamp_strlen(p, SHA1_BASE32_SIZE); error = !parse_base32_sha1(p, len, &sha1_buf); item.sha1 = error ? NULL : &sha1_buf; } else { error = FALSE; } p = skip_ascii_non_spaces(p); v = 0; break; default: v = parse_uint64(p, &endptr, 10, &error); p = deconstify_gchar(endptr); } if (error || !is_ascii_space(*endptr)) goto corrupted; switch (i) { case 0: item.size = v; break; case 1: item.attempts = v; break; case 2: item.complete = v; break; case 3: item.bytes_sent |= ((guint64) (guint32) v) << 32; break; case 4: item.bytes_sent |= (guint32) v; break; case 5: item.rtime = MIN(v + (time_t) 0, TIME_T_MAX + (guint64) 0); case 6: item.dtime = MIN(v + (time_t) 0, TIME_T_MAX + (guint64) 0); case 7: break; /* Already stored above */ default: g_assert_not_reached(); goto corrupted; } } /* * We store the filenames UTF-8 encoded but the file might have been * edited or corrupted. */ if (is_absolute_path(item.pathname)) { item.filename = lazy_filename_to_utf8_normalized( filepath_basename(item.pathname), UNI_NORM_NFC); } else { item.filename = lazy_unknown_to_utf8_normalized( filepath_basename(item.pathname), UNI_NORM_NFC, NULL); } if (upload_stats_find(NULL, item.pathname, item.size)) { g_warning("upload_stats_load_history():" " Ignoring line %u due to duplicate file.", lineno); } else if (upload_stats_find(item.sha1, item.pathname, item.size)) { g_warning("upload_stats_load_history():" " Ignoring line %u due to duplicate file.", lineno); } else { upload_stats_add(item.pathname, item.size, item.filename, item.attempts, item.complete, item.bytes_sent, item.rtime, item.dtime, item.sha1); } continue; corrupted: g_warning("upload statistics file corrupted at line %u.", lineno); } /* close file */ fclose(upload_stats_file); done: gcu_upload_stats_gui_thaw(); return; }
/** * Creates a valid and sanitized filename from the supplied string. For most * Unix-like platforms anything goes but for security reasons, shell meta * characters are replaced by harmless characters. * * @param filename the suggested filename. * @param no_spaces if TRUE, spaces are replaced with underscores. * @param no_evil if TRUE, "evil" characters are replaced with underscores. * * @returns a newly allocated string using halloc() or ``filename'' * if it was a valid filename already. */ char * filename_sanitize(const char *filename, bool no_spaces, bool no_evil) { const char *p; const char *s; char *q; g_assert(filename); /* Almost all evil characters are forbidden on Windows, anyway */ no_evil |= is_running_on_mingw(); /* Leading spaces are just confusing */ p = skip_ascii_spaces(filename); /* Make sure the filename isn't too long */ if (strlen(p) >= FILENAME_MAXBYTES) { q = halloc(FILENAME_MAXBYTES); filename_shrink(p, q, FILENAME_MAXBYTES); s = q; } else { s = p; q = NULL; } /* * Replace shell meta characters and likely problematic characters. * * Although parentheses are not evil per se, they make it a pain to * copy-n-paste filenames without going through the shell's auto- * completion (which normally does the necessary escaping). * * To keep things "readable", we replace parentheses with brackets. * Although brackets are meaningful for the shells, they are only * interpreted in the presence of "*" or "?", two characters that we * strip already. * --RAM, 2013-11-03 */ { size_t i; uchar c; for (i = 0; '\0' != (c = s[i]); i++) { if ( c < 32 || is_ascii_cntrl(c) || G_DIR_SEPARATOR == c || '/' == c || (0 == i && ('.' == c || '-' == c)) || (no_spaces && is_ascii_space(c)) || (no_evil && filename_is_evil_char(c)) ) { if (!q) q = h_strdup(s); q[i] = '_'; /* replace undesired char with underscore */ } else if ('(' == c) { if (!q) q = h_strdup(s); q[i] = '['; } else if (')' == c) { if (!q) q = h_strdup(s); q[i] = ']'; } } /** * Windows does not like filenames ending with a space or period. */ while (i-- > 0 && (is_ascii_space(s[i]) || '.' == s[i])) { if (!q) q = h_strdup(s); q[i] = '\0'; /* truncate string */ } } if (filename_is_reserved(q ? q : s)) { HFREE_NULL(q); q = h_strdup("noname"); } if (NULL == q && s != filename) q = h_strdup(s); /* Trimmed leading white space, must copy */ return q ? q : deconstify_gchar(s); }
/** * Loads the whitelist into memory. */ static void G_COLD whitelist_retrieve(void) { char line[1024]; FILE *f; filestat_t st; unsigned linenum = 0; file_path_t fp[1]; whitelist_generation++; file_path_set(fp, settings_config_dir(), whitelist_file); f = file_config_open_read_norename("Host Whitelist", fp, N_ITEMS(fp)); if (!f) return; if (fstat(fileno(f), &st)) { g_warning("%s(): fstat() failed: %m", G_STRFUNC); fclose(f); return; } while (fgets(line, sizeof line, f)) { pslist_t *sl_addr, *sl; const char *endptr, *start; host_addr_t addr; uint16 port; uint8 bits; bool item_ok; bool use_tls; char *hname; linenum++; if (!file_line_chomp_tail(line, sizeof line, NULL)) { g_warning("%s(): line %u too long, aborting", G_STRFUNC, linenum); break; } if (file_line_is_skipable(line)) continue; sl_addr = NULL; addr = zero_host_addr; endptr = NULL; hname = NULL; endptr = is_strprefix(line, "tls:"); if (endptr) { use_tls = TRUE; start = endptr; } else { use_tls = FALSE; start = line; } port = 0; if (string_to_host_addr_port(start, &endptr, &addr, &port)) { sl_addr = name_to_host_addr(host_addr_to_string(addr), settings_dns_net()); } else if (string_to_host_or_addr(start, &endptr, &addr)) { uchar c = *endptr; switch (c) { case '\0': case ':': case '/': break; default: if (!is_ascii_space(c)) endptr = NULL; } if (!endptr) { g_warning("%s(): line %d: " "expected a hostname or IP address \"%s\"", G_STRFUNC, linenum, line); continue; } /* Terminate the string for name_to_host_addr() */ hname = h_strndup(start, endptr - start); } else { g_warning("%s(): line %d: expected hostname or IP address \"%s\"", G_STRFUNC, linenum, line); continue; } g_assert(sl_addr != NULL || hname != NULL); g_assert(NULL != endptr); bits = 0; item_ok = TRUE; /* * When an explicit address is given (no hostname) and with no * port, one can suffix the address with bits to indicate a CIDR * range of whitelisted addresses. */ if (0 == port) { /* Ignore trailing items separated by a space */ while ('\0' != *endptr && !is_ascii_space(*endptr)) { uchar c = *endptr++; if (':' == c) { int error; uint32 v; if (0 != port) { g_warning("%s(): line %d: multiple colons after host", G_STRFUNC, linenum); item_ok = FALSE; break; } v = parse_uint32(endptr, &endptr, 10, &error); port = (error || v > 0xffff) ? 0 : v; if (0 == port) { g_warning("%s(): line %d: " "invalid port value after host", G_STRFUNC, linenum); item_ok = FALSE; break; } } else if ('/' == c) { const char *ep; uint32 mask; if (0 != bits) { g_warning("%s(): line %d: " "multiple slashes after host", G_STRFUNC, linenum); item_ok = FALSE; break; } if (string_to_ip_strict(endptr, &mask, &ep)) { if (!host_addr_is_ipv4(addr)) { g_warning("%s(): line %d: " "IPv4 netmask after non-IPv4 address", G_STRFUNC, linenum); item_ok = FALSE; break; } endptr = ep; if (0 == (bits = netmask_to_cidr(mask))) { g_warning("%s(): line %d: " "IPv4 netmask after non-IPv4 address", G_STRFUNC, linenum); item_ok = FALSE; break; } } else { int error; uint32 v; v = parse_uint32(endptr, &endptr, 10, &error); if ( error || 0 == v || (v > 32 && host_addr_is_ipv4(addr)) || (v > 128 && host_addr_is_ipv6(addr)) ) { g_warning("%s(): line %d: " "invalid numeric netmask after host", G_STRFUNC, linenum); item_ok = FALSE; break; } bits = v; } } else { g_warning("%s(): line %d: " "unexpected character after host", G_STRFUNC, linenum); item_ok = FALSE; break; } } } if (item_ok) { struct whitelist *item; if (hname) { item = whitelist_hostname_create(use_tls, hname, port); whitelist_dns_resolve(item, FALSE); } else { PSLIST_FOREACH(sl_addr, sl) { host_addr_t *aptr = sl->data; g_assert(aptr != NULL); item = whitelist_addr_create(use_tls, *aptr, port, bits); whitelist_add(item); } } } else {
/** * Append a new line of text at the end of the header. * A private copy of the text is made. * * @return an error code, or HEAD_OK if appending was successful. */ int header_append(header_t *o, const char *text, int len) { char buf[MAX_LINE_SIZE]; const char *p = text; uchar c; header_field_t *hf; header_check(o); g_assert(len >= 0); if (o->flags & HEAD_F_EOH) return HEAD_EOH_REACHED; /* * If empty line, we reached EOH. */ if (len == 0) { o->flags |= HEAD_F_EOH; /* Mark we reached EOH */ return HEAD_EOH; } /* * Sanity checks. */ if (o->size >= HEAD_MAX_SIZE) return HEAD_TOO_LARGE; if (++(o->num_lines) >= HEAD_MAX_LINES) return HEAD_MANY_LINES; /* * Detect whether line is a new header or a continuation. */ c = *p; if (is_ascii_space(c)) { /* * It's a continuation. * * Make sure we already have recorded something, or we have * an unexpected continuation line. */ if (NULL == o->fields) return HEAD_CONTINUATION; /* Unexpected continuation */ /* * When a previous header line was malformed, we cannot accept * further continuation lines. */ if (o->flags & HEAD_F_SKIP) return HEAD_SKIPPED; /* * We strip leading spaces of all continuations before storing * them. If we have to dump the header, we will have to put * some spaces, but we don't guarantee we'll put the same amount. */ p++; /* First char is known space */ while ((c = *p)) { if (!is_ascii_space(c)) break; p++; } /* * If we've reached the end of the line, then the continuation * line was made of spaces only. Weird, but we can ignore it. * Note that it's not an EOH mark. */ if (*p == '\0') return HEAD_OK; /* * Save the continuation line by appending into the last header * field we handled. */ hf = slist_tail(o->fields); hfield_append(hf, p); add_continuation(o, hf->name, p); o->size += len - (p - text); /* Count only effective text */ /* * Also append the data in the hash table. */ } else { char *b; bool seen_space = FALSE; /* * It's a new header line. */ o->flags &= ~HEAD_F_SKIP; /* Assume this line will be OK */ /* * Parse header field. Must be composed of ascii chars only. * (no control characters, no space, no ISO Latin or other extension). * The field name ends with ':', after possible white spaces. */ for (b = buf, c = *p; c; c = *(++p)) { if (c == ':') { *b++ = '\0'; /* Reached end of field */ break; /* Done, buf[] holds field name */ } if (is_ascii_space(c)) { seen_space = TRUE; /* Only trailing spaces allowed */ continue; } if ( seen_space || (c != '-' && c != '.' && (!isascii(c) || is_ascii_cntrl(c) || is_ascii_punct(c))) ) { o->flags |= HEAD_F_SKIP; return HEAD_BAD_CHARS; } *b++ = c; } /* * If buf[] does not end with a NUL, we did not fully recognize * the header: we reached the end of the line without encountering * the ':' marker. * * If the buffer starts with a NUL char, it's also clearly malformed. */ g_assert(b > buf || (b == buf && *text == '\0')); if (b == buf || *(b-1) != '\0') { o->flags |= HEAD_F_SKIP; return HEAD_MALFORMED; } /* * We have a valid header field in buf[]. */ hf = hfield_make(buf); /* * Strip leading spaces in the value. */ g_assert(*p == ':'); p++; /* First char is field separator */ p = skip_ascii_spaces(p); /* * Record field value. */ hfield_append(hf, p); add_header(o, buf, p); if (!o->fields) { o->fields = slist_new(); } slist_append(o->fields, hf); o->size += len - (p - text); /* Count only effective text */ } return HEAD_OK; }
/** * Parse an IPv6 Geo IP line and record the range in the database. */ static void gip_parse_ipv6(const char *line, int linenum) { const char *end; uint16 code; int error; uint8 ip[16]; unsigned bits; /* * Each line looks like: * * 2a03:be00::/32 nl * * The leading part up to the space is the IPv6 network in CIDR format. * The trailing word is the 2-letter ISO country code. */ if (!parse_ipv6_addr(line, ip, &end)) { g_warning("%s, line %d: bad IPv6 network address \"%s\"", gip_source[GIP_IPV6].file, linenum, line); return; } if ('/' != *end) { g_warning("%s, line %d: missing network separator in \"%s\"", gip_source[GIP_IPV6].file, linenum, line); return; } bits = parse_uint(end + 1, &end, 10, &error); if (error) { g_warning("%s, line %d: cannot parse network bit amount in \"%s\"", gip_source[GIP_IPV6].file, linenum, line); return; } if (bits > 128) { g_warning("%s, line %d: invalid bit amount %u in \"%s\"", gip_source[GIP_IPV6].file, linenum, bits, line); return; } if (!is_ascii_space(*end)) { g_warning("%s, line %d: missing spaces after network in \"%s\"", gip_source[GIP_IPV6].file, linenum, line); return; } while (is_ascii_space(*end)) end++; if ('\0' == *end) { g_warning("%s, line %d: missing country code in \"%s\"", gip_source[GIP_IPV6].file, linenum, line); return; } code = iso3166_encode_cc(end); if (ISO3166_INVALID == code) { g_warning("%s, line %d: bad country code in \"%s\"", gip_source[GIP_IPV6].file, linenum, line); return; } error = iprange_add_cidr6(geo_db, ip, bits, (code + 1) << 1); if (IPR_ERR_OK != error) { g_warning("%s, line %d: cannot insert %s/%u: %s", gip_source[GIP_IPV6].file, linenum, ipv6_to_string(ip), bits, iprange_strerror(error)); } }