Beispiel #1
0
static void
setup_map(void)
{
	static gboolean done;
	guint i;

	if (done)
		return;

	for (i = 0; i < G_N_ELEMENTS(map); i++)	{
		guchar c;

		if (i > 0 && utf8_byte_is_allowed(i)) {
			if (is_ascii_upper(i)) {
				c = ascii_tolower(i);
			} else if (
				is_ascii_punct(i) || is_ascii_cntrl(i) || is_ascii_space(i)
			) {
				c = ' ';
			} else { 
				c = i;
			}
		} else {
			c = 0;
		}
		map[i] = c;
	}

	done = TRUE;
}
Beispiel #2
0
/**
 * Strip leading and trailing blanks in text string.
 *
 * @param text		start of text to strip (NUL-terminated)
 * @param len_ptr	updated with new length if non-NULL
 *
 * @returns start of new text, and adjusted length in case we stripped.
 */
static const char *
xfmt_strip_blanks(const char *text, size_t *len_ptr)
{
    const char *p = text;
    unsigned retlen;
    int c;
    bool seen_non_blank = FALSE;
    const char *last_non_blank;
    const char *first_non_blank;

    first_non_blank = last_non_blank = p = text;

    /*
     * Text is NUL-terminated, so we can use utf8_decode_char_fast().
     */

    while ('\0' != (c = utf8_decode_char_fast(p, &retlen))) {
        p += retlen;

        if (seen_non_blank) {
            if (is_ascii_space(c))
                last_non_blank = p;				/* next char */
        } else {
            if (!is_ascii_space(c)) {
                seen_non_blank = TRUE;
                last_non_blank = p;				/* next char */
                first_non_blank = p - retlen;	/* this char */
            }
        }
    }

    if (len_ptr != NULL)
        *len_ptr = last_non_blank - first_non_blank;

    return first_non_blank;
}
Beispiel #3
0
/**
 * Compute character mask "hash", using one bit per letter of the alphabet,
 * plus one for any digit.
 */
static guint32
mask_hash(const char *s) {
	guchar c;
	guint32 mask = 0;

	while ((c = (guchar) *s++)) {
		if (is_ascii_space(c))
			continue;
		else if (is_ascii_digit(c))
			mask |= MASK_DIGIT;
		else {
			int idx = ascii_tolower(c) - 97;
			if (idx >= 0 && idx < 26)
				mask |= MASK_LETTER(idx);
		}
	}

	return mask;
}
Beispiel #4
0
/**
 * Dump field on specified file descriptor.
 */
static void
hfield_dump(const header_field_t *h, FILE *out)
{
	slist_iter_t *iter;
	bool first;

	header_field_check(h);
	g_assert(h->lines);

	fprintf(out, "%s: ", h->name);

	first = TRUE;
	iter = slist_iter_on_head(h->lines);
	for (/* NOTHING */; slist_iter_has_item(iter); slist_iter_next(iter)) {
		const char *s;

		if (first)
			first = FALSE;
		else
			fputs("    ", out);			/* Continuation line */

		s = slist_iter_current(iter);
		if (is_printable_iso8859_string(s)) {
			fputs(s, out);
		} else {
			char buf[80];
			const char *p = s;
			int c;
			size_t len = strlen(s);
			str_bprintf(buf, sizeof buf, "<%u non-printable byte%s>",
				(unsigned) len, plural(len));
			fputs(buf, out);
			while ((c = *p++)) {
				if (is_ascii_print(c) || is_ascii_space(c))
					fputc(c, out);
				else
					fputc('.', out);	/* Less visual clutter than '?' */
			}
		}
		fputc('\n', out);
	}
	slist_iter_free(&iter);
}
Beispiel #5
0
/**
 * Intuit the media type they are searching based on the first XML tag
 * we find in the meta data string, using simplistic lexical parsing which
 * will encompass 99% of the cases.
 */
static uint32
g2_node_intuit_media_type(const char *md)
{
	const char *p = md;
	const char *start;
	int c;
	uint32 flags;

	while ('<' != (c = *p++) && c != 0)
		/* empty */;

	if (0 == c)
		return 0;		/* Did not find any tag opening */

	start = p = skip_ascii_spaces(p);

	while (0 != (c = *p)) {
		if (is_ascii_space(c) || '/' == c || '>' == c) {
			char *name;

			/* Found end of word, we got the tag name */

			name = h_strndup(start, p - start);
			flags = TOKENIZE(name, g2_q2_md);
			if (0 == flags) {
				g_warning("%s(): unknown tag \"%s\", XML string was \"%s\"",
					G_STRFUNC, name, md);
			}
			hfree(name);
			return flags;
		}
		p++;
	}

	return 0;
}
Beispiel #6
0
/**
 * Remove trailing white space from line held within buffer.
 *
 * This is meant to be used to validate the line returned by fgets() and to
 * remove final "\n", or "\r\n" markers as well as any other trailing white
 * space.
 *
 * @param line		buffer where line is held
 * @param size		buffer size
 * @paran lenptr	if non-NULL, the final string length is written there
 *
 * @return TRUE if we were facing a line terminated by "\n", FALSE otherwise.
 */
bool
file_line_chomp_tail(char *line, size_t size, size_t *lenptr)
{
	size_t len;
	char *p;

	len = clamp_strlen(line, size);

	if (size == len || 0 == len)
		return FALSE;		/* No NUL found or empty string */

	if ('\n' != line[len - 1])
		return FALSE;		/* Truncated line, reading buffer was too small */

	p = &line[len - 1];
	do {
		*p = '\0';
	} while (p != line && is_ascii_space(*--p));

	if (lenptr != NULL)
		*lenptr = p - line + ('\0' == *p ? 0 : 1);

	return TRUE;
}
Beispiel #7
0
/**
 * Decodes "chunked" data.
 *
 * The function returns as soon as it needs more data to proceed, on
 * error, if the state CHUNK_STATE_END was reached, or if the state
 * CHUNK_STATE_DATA was reached. In the latter case the chunk payload
 * itself must be consumed and this function must not be called again
 * until the state CHUNK_STATE_DATA_CRLF is reached.
 *
 * @param rx			the current RX driver.
 * @param src			the chunk data.
 * @param size			no document.
 * @param p_error_str	if not NULL and parse_chunk() fails, it will point
 *						to an informational error message.
 *
 * @return 0 on failure; non-zero amount of consumed bytes on success.
 */
static size_t
parse_chunk(rxdrv_t *rx, const char *src, size_t size,
	const char **p_error_str)
{
	struct attr *attr = rx->opaque;
	const char *error_str;
	size_t len;

	g_assert(attr);
	g_assert(src);
	g_assert(size > 0);
	g_assert(attr->state < NUM_CHUNK_STATES);
	g_assert(0 == attr->data_remain);

	len = size;

	do {
		switch (attr->state) {
		case CHUNK_STATE_DATA_CRLF:
			/* The chunk-data must be followed by a CRLF */
			while (len > 0) {
				uchar c;

				len--;
				c = *src++;
				if ('\r' == c) {
				   /*
					* This allows more than one CR but we must consume
				 	* some data or keep state over this otherwise.
					*/
					continue;
				} else if ('\n' == c) {
					attr->state = CHUNK_STATE_SIZE;
					break;
				} else {
					/*
					 * Normally it is an error, there should be CRLF after
					 * the chunk data.  However, they might have forgotten
					 * to send the '\n' or the whole sequence.
					 *
					 * If what follows looks like a valid chunk size, then
					 * we should be able to resync properly: Unread the
					 * character and move on to the chunk size decoding.
					 */

					if (!(attr->flags & IF_NO_CRLF)) {
						attr->flags |= IF_NO_CRLF;
						g_warning("Host %s forgot CRLF after data",
							gnet_host_to_string(&rx->host));
					}

					len++;
					src--;
					attr->state = CHUNK_STATE_SIZE;
					break;
				}
			}
			break;

		case CHUNK_STATE_SIZE:
			g_assert(attr->hex_pos < sizeof attr->hex_buf);
			while (len > 0) {
				uchar c;

				len--;
				c = *src++;
				if (is_ascii_xdigit(c)) {
					if (attr->hex_pos >= sizeof attr->hex_buf) {
						error_str = "Overflow in chunk-size";
						goto error;
					}
					/* Collect up to 16 hex characters */
					attr->hex_buf[attr->hex_pos++] = c;
				} else {
					/*
					 * There might be a chunk-extension after the
					 * hexadecimal chunk-size but there shouldn't
					 * anything else.
					 */

					if (
						0 == attr->hex_pos ||
						(!is_ascii_space(c) && ';' != c)
					) {
						error_str = "Bad chunk-size";
						goto error;
					}
					attr->state = CHUNK_STATE_EXT;
					break;
				}
			}
			break;

		case CHUNK_STATE_EXT:
			/* Just skip over the chunk-extension */
			while (len > 0) {
				len--;
				if ('\n' == *src++) {

					/*
					 * Pick up the collected hex digits and
					 * calculate the chunk-size.
					 */

					g_assert(attr->hex_pos > 0);
					g_assert(attr->hex_pos <= sizeof attr->hex_buf);

					{
						uint64 v = 0;
						uint i;

						for (i = 0; i < attr->hex_pos; i++)
							v = (v << 4) | hex2int_inline(attr->hex_buf[i]);

						attr->data_remain = v;
						attr->hex_pos = 0;
					}

					attr->state = 0 != attr->data_remain
						? CHUNK_STATE_DATA
						: CHUNK_STATE_TRAILER_START;
					break;
				}
			}
			break;

		case CHUNK_STATE_TRAILER_START:
			/* We've reached another trailer line */
			if (len < 1)
				break;
			if ('\r' == src[0]) {
				/*
				 * This allows more than one CR but we must consume
				 * some data or keep state over this otherwise.
				 */
				src++;
				len--;
			}
			if (len < 1)
				break;
			if ('\n' == src[0]) {
				/* An empty line means the end of all trailers was reached */
				src++;
				len--;
				attr->state = CHUNK_STATE_END;
				break;
			}
			attr->state = CHUNK_STATE_TRAILER;
			/* FALL THROUGH */

		case CHUNK_STATE_TRAILER:
			/* Just skip over the trailer line */
			while (len > 0) {
				len--;
				if ('\n' == *src++) {
					/*
					 * Now check whether there's another trailer
					 * line or whether we've reached the end
					 */

					attr->state = CHUNK_STATE_TRAILER_START;
					break;
				}
			}
			break;

		case CHUNK_STATE_END:
			/*
			 * We're not supposed to receive data after the chunk stream
			 * has been ended.  But if we do, it means either we
			 * misinterpreted the chunk end stream or the other end is just
			 * going berserk.
			 */

			error_str = "Remaining data after chunk end";
			goto error;

		case CHUNK_STATE_DATA:
		case CHUNK_STATE_ERROR:
		case NUM_CHUNK_STATES:
			g_assert_not_reached();
			break;
		}

		/* NB: Some data from ``src'' must have been consumed or an
		 *     infinite loop may occur.
		 */

		if (CHUNK_STATE_DATA == attr->state) {
			if (GNET_PROPERTY(rx_debug) > 9)
				g_debug("parse_chunk: chunk size %s bytes",
					uint64_to_string(attr->data_remain));
			break;
		}

	} while (len > 0 && CHUNK_STATE_END != attr->state);

	if (p_error_str)
		*p_error_str = NULL;

	return size - len;

error:

	if (p_error_str)
		*p_error_str = error_str;

	attr->state = CHUNK_STATE_ERROR;
	return 0;
}
Beispiel #8
0
/**
 * Retrieves the major and minor version from a feature in the X-Features
 * header, if no support was found both major and minor are 0 and FALSE
 * is returned.
 */
bool
header_get_feature(const char *name, const header_t *header,
	uint *major, uint *minor)
{
	static const char x_features[] = "X-Features";
	char *buf, *start;

	if (major)
		*major = 0;
	if (minor)
		*minor = 0;

	buf = header_get(header, x_features);

	/*
	 * We could also try to scan for the header: name, so this would
     * make this function even more generic. But I would suggest another
     * function for this though.
     */

	if (buf == NULL) {
		/*
		 * Actually the 'specs' say we should assume it is supported if the
		 * X-Features header is not there. But I wouldn't count on it, and
		 * it was only for "legacy" attributes in the HTTP file exchange.
		 *
		 * Also, for optimization purposes, the X-Features line will be sent
		 * once per persistent HTTP connection, as the client is expected to
		 * cache the supported features.
		 */

		return FALSE;
	}

	/*
	 * We must locate the name exactly, and not a subpart of another
	 * feature.  If we look for "bar", then we must not match on "foobar".
	 */

	start = buf;
	for (;;) {
		int pc;			/* Previous char */

		buf = ascii_strcasestr(buf, name);

		if (buf == NULL)
			return FALSE;
		if (buf == start)
			break;

		/*
		 * Since we're looking for whole words separated by a space or the
		 * regular header punctuation, the next match can't occur before
		 * the end of the current string we matched...
		 */

		pc = *(buf - 1);
		buf += strlen(name);

		if (*buf != '/')
			continue;		/* Matched "barcode" when looking for "bar" */

		if (is_ascii_space(pc) || pc == ',' || pc == ';')
			break;			/* Found it! */
	}

	buf++;

	if (*buf == '\0')
		return FALSE;

	return 0 == parse_major_minor(buf, NULL, major, minor);
}
Beispiel #9
0
static void
html_render_text(struct render_context *ctx, const struct array text)
{
	unsigned c_len;
	size_t i;
	bool whitespace = FALSE;
	struct array entity, current;

	entity = zero_array;
	current = zero_array;

	for (i = 0; i < text.size; i += c_len) {
		const unsigned char c = text.data[i];
		bool is_whitespace;

		is_whitespace = FALSE;
		c_len = utf8_first_byte_length_hint(c);
		if (!ctx->preformatted && is_ascii_space(c)) {
			if (whitespace)
				continue;
			is_whitespace = TRUE;
			whitespace = TRUE;
			if (0x20 == c && i > 0 && i < text.size - c_len) {
				const unsigned char next_c = text.data[i + c_len];

				if (!is_ascii_space(next_c))
					is_whitespace = FALSE;
			}
		} else {
			whitespace = FALSE;
		}
		if ('&' == c || ';' == c || is_whitespace) {
			if (current.size > 0) {
				html_output_print(ctx->output, current);
				current = zero_array;
			}
		}
		if (is_whitespace) {
			if (i > 0 || ctx->closing)
				html_output_print(ctx->output, array_from_string(" "));
		} else if ('&' == c) {
			if (entity.data) {
				html_render_entity(ctx, entity);
			}
			entity.data = deconstify_gchar(&text.data[i + c_len]);
			entity.size = 0;
			continue;
		} else if (';' == c) {
			if (entity.data) {
				html_render_entity(ctx, entity);
				entity = zero_array;
				continue;
			}
		} else if (entity.data) {
			entity.size += c_len;
		} else {
			if (!current.data)
				current.data = &text.data[i];
			current.size += c_len;
		}
	}
	if (current.size > 0) {
		html_output_print(ctx->output, current);
	}
}
Beispiel #10
0
/**
 * Default implementation to get the default gateway by parsing the
 * output of the "netstat -rn" command.
 *
 * @param addrp		where gateway address is to be written
 *
 * @return 0 on success, -1 on failure with errno set.
 */
static G_GNUC_COLD int
parse_netstat(host_addr_t *addrp)
#ifdef HAS_POPEN
{
	FILE *f = NULL;
	char tmp[80];
	uint32 gate = 0;

	/*
	 * This implementation should be a safe default on UNIX platforms, but
	 * it is inefficient and as such can only constitute a fallback.
	 */

	if (-1 != access("/bin/netstat", X_OK)) {
		f = popen("/bin/netstat -rn", "r");
	} else if (-1 != access("/usr/bin/netstat", X_OK)) {
		f = popen("/usr/bin/netstat -rn", "r");
	}

	if (NULL == f) {
		errno = ENOENT;		/* netstat not found */
		return -1;
	}

	/*
	 * Typical netstat -rn output:
	 *
	 * Destination        Gateway            Flags .....
	 * 0.0.0.0            192.168.0.200      UG
	 * default            192.168.0.200      UG
	 *
	 * Some systems like linux display "0.0.0.0", but traditional UNIX
	 * output is "default" for the default route.
	 */

	while (fgets(tmp, sizeof tmp, f)) {
		char *p;
		uint32 ip;

		p = is_strprefix(tmp, "default");
		if (NULL == p)
			p = is_strprefix(tmp, "0.0.0.0");

		if (NULL == p || !is_ascii_space(*p))
			continue;

		ip = string_to_ip(p);
		if (ip != 0) {
			gate = ip;
			break;
		}
	}

	pclose(f);

	if (0 == gate) {
		errno = ENETUNREACH;
		return -1;
	}

	*addrp = host_addr_get_ipv4(gate);
	return 0;
}
Beispiel #11
0
struct array
html_get_attribute(const struct array *tag, enum html_attr attribute)
{
	size_t i = 0;

	if (
		!tag || !tag->data ||
		NUM_HTML_ATTR == attribute || HTML_ATTR_UNKNOWN == attribute
	)
		goto not_found;

	/**
	   <tag-name>([<space>][<attr>[<space>]'='[<space>]'"'<value>'"'])*
	 */

	/* skip <tag-name> */
	while (i < tag->size && !is_ascii_space(tag->data[i]))
		i++;

	while (i < tag->size) {
		struct array value, attr;

		/* skip <space> */
		while (i < tag->size && is_ascii_space(tag->data[i]))
			i++;

		attr = array_init(&tag->data[i], tag->size - i);

		/* skip <attr> */
		while (i < tag->size) {
			const unsigned char c = tag->data[i];
			if ('=' == c || is_ascii_space(c))
				break;
			i++;
		}

		/* skip <space> */
		while (i < tag->size && is_ascii_space(tag->data[i]))
			i++;

		if (i < tag->size && '=' == tag->data[i]) {
			bool quoted = FALSE;
			size_t start;

			i++;

			/* skip <space> */
			while (i < tag->size && is_ascii_space(tag->data[i]))
				i++;

			if (i < tag->size && '"' == tag->data[i]) {
				i++;
				quoted = TRUE;
			}
			start = i;

			/* skip <value> */
			while (i < tag->size) {
				const unsigned char c = tag->data[i];
				if (quoted) {
					if ('"' == c)
						break;
				} else if (is_ascii_space(c)) {
					break;
				}
				i++;
			}
			value = array_init(&tag->data[start], i - start);
		} else {
			value = array_init(&tag->data[i], 0);
		}

		if (attribute == parse_attribute(attr))
			return value;
	}

not_found:
	return zero_array;
}
Beispiel #12
0
/**
 * Parse an IPv4 Geo IP line and record the range in the database.
 */
static void
gip_parse_ipv4(const char *line, int linenum)
{
	const char *end;
	uint16 code;
	int c;
	struct range_context ctx;

	/*
	 * Each line looks like:
	 *
	 *    15.0.0.0 - 15.130.191.255 fr
	 *
	 * So we just have to parse the two IP addresses, then compute
	 * all the ranges they cover in order to insert them into
	 * the IP database.
	 */

	end = strchr(line, '-');
	if (end == NULL) {
		g_warning("%s, line %d: no IP address separator in \"%s\"",
			gip_source[GIP_IPV4].file, linenum, line);
		return;
	}

	if (!string_to_ip_strict(line, &ctx.ip1, NULL)) {
		g_warning("%s, line %d: invalid first IP in \"%s\"",
			gip_source[GIP_IPV4].file, linenum, line);
		return;
	}

	/*
	 * Skip spaces until the second IP.
	 */

	end++;			/* Go past the minus, parsing the second IP */

	while ((c = *end)) {
		if (!is_ascii_space(c))
			break;
		end++;
	}

	if (!string_to_ip_strict(end, &ctx.ip2, &end)) {
		g_warning("%s, line %d: invalid second IP in \"%s\"",
			gip_source[GIP_IPV4].file, linenum, line);
		return;
	}

	/*
	 * Make sure the IP addresses are ordered correctly
	 */

	if (ctx.ip1 > ctx.ip2) {
		g_warning("%s, line %d: invalid IP order in \"%s\"",
			gip_source[GIP_IPV4].file, linenum, line);
		return;
	}

	/*
	 * Skip spaces until the country code.
	 */

	while ((c = *end)) {
		if (!is_ascii_space(c))
			break;
		end++;
	}

	if (c == '\0') {
		g_warning("%s, line %d: missing country code in \"%s\"",
			gip_source[GIP_IPV4].file, linenum, line);
		return;
	}

	code = iso3166_encode_cc(end);
	if (ISO3166_INVALID == code) {
		g_warning("%s, line %d: bad country code in \"%s\"",
			gip_source[GIP_IPV4].file, linenum, line);
		return;
	}

	/* code must not be zero and the LSB must be zero due to using it as
	 * as key for ipranges */
	ctx.country = (code + 1) << 1;
	ctx.line = line;
	ctx.linenum = linenum;

	/*
	 * Now compute the CIDR ranges between the ip1 and ip2 addresses
	 * and insert each range into the database, linking it to the
	 * country code.
	 */

	ip_range_split(ctx.ip1, ctx.ip2, gip_add_cidr, &ctx);
}
Beispiel #13
0
/**
 * Do an actual search.
 *
 * @param table			table containing organized entries to search from
 * @param search_term	the query string
 * @param callback		routine to invoke for each match
 * @param ctx			user-supplied data to pass on to callback
 * @param max_res		maximum amount of results to return
 * @param qhv			query hash vector built from query string, for routing
 *
 * @return number of hits we produced
 */
G_GNUC_HOT int
st_search(
	search_table_t *table,
	const char *search_term,
	st_search_callback callback,
	gpointer ctx,
	int max_res,
	query_hashvec_t *qhv)
{
	char *search;
	int key, nres = 0;
	guint i, len;
	struct st_bin *best_bin = NULL;
	int best_bin_size = INT_MAX;
	word_vec_t *wovec;
	guint wocnt;
	cpattern_t **pattern;
	struct st_entry **vals;
	guint vcnt;
	int scanned = 0;		/* measure search mask efficiency */
	guint32 search_mask;
	size_t minlen;
	guint random_offset;  /* Randomizer for search returns */

	search = UNICODE_CANONIZE(search_term);

	if (GNET_PROPERTY(query_debug) > 4 && 0 != strcmp(search, search_term)) {
		char *safe_search = hex_escape(search, FALSE);
		char *safe_search_term = hex_escape(search_term, FALSE);
		g_debug("original search term: \"%s\"", safe_search_term);
		g_debug("canonical search term: \"%s\"", safe_search);
		if (safe_search != search)
			HFREE_NULL(safe_search);
		if (safe_search_term != search_term)
			HFREE_NULL(safe_search_term);
	}
	len = strlen(search);

	/*
	 * Find smallest bin
	 */

	if (len >= 2) {
		for (i = 0; i < len - 1; i++) {
			struct st_bin *bin;
			if (is_ascii_space(search[i]) || is_ascii_space(search[i+1]))
				continue;
			key = st_key(table, search + i);
			if ((bin = table->bins[key]) == NULL) {
				best_bin = NULL;
				break;
			}
			if (bin->nvals < best_bin_size) {
				best_bin = bin;
				best_bin_size = bin->nvals;
			}
		}

		if (GNET_PROPERTY(matching_debug) > 4)
			g_debug("MATCH st_search(): str=\"%s\", len=%d, best_bin_size=%d",
				lazy_safe_search(search_term), len, best_bin_size);
	}

	/*
	 * If the best_bin is NULL, we did not find a matching bin, and we're
	 * sure we won't be able to find the search string.
	 *
	 * Note that on search strings like "r e m ", we always have a letter
	 * followed by spaces, so we won't search that.
	 *		--RAM, 06/10/2001
	 */

	if (best_bin == NULL) {
		/*
		 * If we have a `qhv', we need to compute the word vector anway,
		 * for query routing...
		 */

		if (qhv == NULL)
			goto finish;
	}

	/*
	 * Prepare matching patterns
	 */

	wocnt = word_vec_make(search, &wovec);

	/*
	 * Compute the query hashing information for query routing, if needed.
	 */

	if (qhv != NULL) {
		for (i = 0; i < wocnt; i++) {
			if (wovec[i].len >= QRP_MIN_WORD_LENGTH)
				qhvec_add(qhv, wovec[i].word, QUERY_H_WORD);
		}
	}

	if (wocnt == 0 || best_bin == NULL) {
		if (wocnt > 0)
			word_vec_free(wovec, wocnt);
		goto finish;
	}

	g_assert(best_bin_size > 0);	/* Allocated bin, it must hold something */


	pattern = walloc0(wocnt * sizeof *pattern);

	/*
	 * Prepare matching optimization, an idea from Mike Green.
	 *
	 * At library building time, we computed a mask hash, made from the
	 * lowercased file name, using one bit per different letter, roughly
	 * (see mask_hash() for the exact algorigthm).
	 *
	 * We're now going to compute the same mask on the query, and compare
	 * it bitwise with the mask for each file.  If the file does not hold
	 * at least all the chars present in the query, it's no use applying
	 * the pattern matching algorithm, it won't match at all.
	 *
	 *		--RAM, 01/10/2001
	 */

	search_mask = mask_hash(search);

	/*
	 * Prepare second matching optimization: since all words in the query
	 * must match the exact amount of time, we can compute the minimum length
	 * the searched file must have.  We add one character after each word
	 * but the last, to account for space between words.
	 *		--RAM, 11/07/2002
	 */

	for (minlen = 0, i = 0; i < wocnt; i++)
		minlen += wovec[i].len + 1;
	minlen--;
	g_assert(minlen <= INT_MAX);

	/*
	 * Search through the smallest bin
	 */

	vcnt = best_bin->nvals;
	vals = best_bin->vals;
	random_offset = random_u32() % vcnt;

	nres = 0;
	for (i = 0; i < vcnt; i++) {
		const struct st_entry *e;
		shared_file_t *sf;
		size_t canonic_len;

		/*
		 * As we only return a limited count of results, pick a random
		 * offset, so that repeated searches will match different items
		 * instead of always the first - with some probability.
		 */
		e = vals[(i + random_offset) % vcnt];
		
		if ((e->mask & search_mask) != search_mask)
			continue;		/* Can't match */

		sf = e->sf;

		canonic_len = shared_file_name_canonic_len(sf);
		if (canonic_len < minlen)
			continue;		/* Can't match */

		scanned++;

		if (entry_match(e->string, canonic_len, pattern, wovec, wocnt)) {
			if (GNET_PROPERTY(matching_debug) > 4) {
				g_debug("MATCH \"%s\" matches %s",
					search, shared_file_name_nfc(sf));
			}

			if ((*callback)(ctx, sf)) {
				nres++;
				if (nres >= max_res)
					break;
			}
		}
	}

	if (GNET_PROPERTY(matching_debug) > 3)
		g_debug("MATCH st_search(): scanned %d entr%s from the %d in bin, "
			"got %d match%s",
			scanned, 1 == scanned ? "y" : "ies",
			best_bin_size, nres, 1 == nres ? "" : "es");

	for (i = 0; i < wocnt; i++)
		if (pattern[i])					/* Lazily compiled by entry_match() */
			pattern_free(pattern[i]);

	wfree(pattern, wocnt * sizeof *pattern);
	word_vec_free(wovec, wocnt);

finish:
	if (search != search_term) {
		HFREE_NULL(search);
	}

	return nres;
}
Beispiel #14
0
G_GNUC_COLD void
upload_stats_load_history(void)
{
	FILE *upload_stats_file;
	file_path_t fp;
	char line[FILENAME_MAX + 64];
	guint lineno = 0;

	gcu_upload_stats_gui_freeze();
	
	file_path_set(&fp, settings_config_dir(), ul_stats_file);

	/* open file for reading */
	upload_stats_file = file_config_open_read(ul_stats_what, &fp, 1);
	if (upload_stats_file == NULL)
		goto done;

	/* parse, insert names into ul_stats_clist */
	while (fgets(line, sizeof(line), upload_stats_file)) {
		static const struct ul_stats zero_item;
		struct ul_stats item;
		struct sha1 sha1_buf;
		const char *p;
		size_t i;

		lineno++;
		if (line[0] == '#' || line[0] == '\n')
			continue;

		p = strchr(line, '\t');
		if (NULL == p)
			goto corrupted;

		line[p - line] = '\0';		/* line is now the URL-escaped file name */
		p++;

		/* URL-unescape in-place */
		if (!url_unescape(line, TRUE))
			goto corrupted;

		item = zero_item;
		item.pathname = line;

		for (i = 0; i < 8; i++) {
			guint64 v;
			int error;
			const char *endptr;

			p = skip_ascii_spaces(p);

			/* SVN versions up to 15322 had only 6 fields in the history */
			if (5 == i && '\0' == *p)
				break;

			switch (i) {
			case 7:
				/* We have a SHA1 or '*' if none known */
				if ('*' != *p) {
					size_t len = clamp_strlen(p, SHA1_BASE32_SIZE);
					
					error = !parse_base32_sha1(p, len, &sha1_buf);
					item.sha1 = error ? NULL : &sha1_buf;
				} else {
					error = FALSE;
				}
				p = skip_ascii_non_spaces(p);
				v = 0;
				break;
			default:
				v = parse_uint64(p, &endptr, 10, &error);
				p = deconstify_gchar(endptr);
			}

			if (error || !is_ascii_space(*endptr))
				goto corrupted;

			switch (i) {
			case 0: item.size = v; break;
			case 1: item.attempts = v; break;
			case 2: item.complete = v; break;
			case 3: item.bytes_sent |= ((guint64) (guint32) v) << 32; break;
			case 4: item.bytes_sent |= (guint32) v; break;
			case 5: item.rtime = MIN(v + (time_t) 0, TIME_T_MAX + (guint64) 0);
			case 6: item.dtime = MIN(v + (time_t) 0, TIME_T_MAX + (guint64) 0); 
			case 7: break;	/* Already stored above */
			default:
				g_assert_not_reached();
				goto corrupted;
			}
		}

		/* 
		 * We store the filenames UTF-8 encoded but the file might have been
		 * edited or corrupted.
		 */
		if (is_absolute_path(item.pathname)) {
			item.filename = lazy_filename_to_utf8_normalized(
						filepath_basename(item.pathname), UNI_NORM_NFC);
		} else {
			item.filename = lazy_unknown_to_utf8_normalized(
						filepath_basename(item.pathname), UNI_NORM_NFC, NULL);
		}

		if (upload_stats_find(NULL, item.pathname, item.size)) {
			g_warning("upload_stats_load_history():"
				" Ignoring line %u due to duplicate file.", lineno);
		} else if (upload_stats_find(item.sha1, item.pathname, item.size)) {
			g_warning("upload_stats_load_history():"
				" Ignoring line %u due to duplicate file.", lineno);
		} else {
			upload_stats_add(item.pathname, item.size, item.filename,
				item.attempts, item.complete, item.bytes_sent,
				item.rtime, item.dtime, item.sha1);
		}
		continue;

	corrupted:
		g_warning("upload statistics file corrupted at line %u.", lineno);
	}

	/* close file */
	fclose(upload_stats_file);

done:
	gcu_upload_stats_gui_thaw();
	return;
}
Beispiel #15
0
/**
 * Creates a valid and sanitized filename from the supplied string. For most
 * Unix-like platforms anything goes but for security reasons, shell meta
 * characters are replaced by harmless characters.
 *
 * @param filename the suggested filename.
 * @param no_spaces if TRUE, spaces are replaced with underscores.
 * @param no_evil if TRUE, "evil" characters are replaced with underscores.
 *
 * @returns a newly allocated string using halloc() or ``filename''
 *			if it was a valid filename already.
 */
char *
filename_sanitize(const char *filename, bool no_spaces, bool no_evil)
{
	const char *p;
	const char *s;
	char *q;

	g_assert(filename);

	/* Almost all evil characters are forbidden on Windows, anyway */
	no_evil |= is_running_on_mingw();

	/* Leading spaces are just confusing */
	p = skip_ascii_spaces(filename);

	/* Make sure the filename isn't too long */
	if (strlen(p) >= FILENAME_MAXBYTES) {
		q = halloc(FILENAME_MAXBYTES);
		filename_shrink(p, q, FILENAME_MAXBYTES);
		s = q;
	} else {
		s = p;
		q = NULL;
	}

	/*
	 * Replace shell meta characters and likely problematic characters.
	 *
	 * Although parentheses are not evil per se, they make it a pain to
	 * copy-n-paste filenames without going through the shell's auto-
	 * completion (which normally does the necessary escaping).
	 *
	 * To keep things "readable", we replace parentheses with brackets.
	 * Although brackets are meaningful for the shells, they are only
	 * interpreted in the presence of "*" or "?", two characters that we
	 * strip already.
	 *		--RAM, 2013-11-03
	 */
	{
		size_t i;
		uchar c;

		for (i = 0; '\0' != (c = s[i]); i++) {
			if (
				c < 32
				|| is_ascii_cntrl(c)
				|| G_DIR_SEPARATOR == c
				|| '/' == c
				|| (0 == i && ('.' == c || '-' == c))
				|| (no_spaces && is_ascii_space(c))
				|| (no_evil && filename_is_evil_char(c))
			) {
				if (!q)
					q = h_strdup(s);
				q[i] = '_';	/* replace undesired char with underscore */
			} else if ('(' == c) {
				if (!q)
					q = h_strdup(s);
				q[i] = '[';
			} else if (')' == c) {
				if (!q)
					q = h_strdup(s);
				q[i] = ']';
			}
		}

		/**
		 * Windows does not like filenames ending with a space or period.
		 */
		while (i-- > 0 && (is_ascii_space(s[i]) || '.' == s[i])) {
			if (!q)
				q = h_strdup(s);
			q[i] = '\0';	/* truncate string */
		}
	}

	if (filename_is_reserved(q ? q : s)) {
		HFREE_NULL(q);
		q = h_strdup("noname");
	}

	if (NULL == q && s != filename)
		q = h_strdup(s);		/* Trimmed leading white space, must copy */

	return q ? q : deconstify_gchar(s);
}
Beispiel #16
0
/**
 * Loads the whitelist into memory.
 */
static void G_COLD
whitelist_retrieve(void)
{
	char line[1024];
	FILE *f;
	filestat_t st;
	unsigned linenum = 0;
	file_path_t fp[1];

	whitelist_generation++;

	file_path_set(fp, settings_config_dir(), whitelist_file);
	f = file_config_open_read_norename("Host Whitelist", fp, N_ITEMS(fp));
	if (!f)
		return;

	if (fstat(fileno(f), &st)) {
		g_warning("%s(): fstat() failed: %m", G_STRFUNC);
		fclose(f);
		return;
	}

    while (fgets(line, sizeof line, f)) {
		pslist_t *sl_addr, *sl;
		const char *endptr, *start;
		host_addr_t addr;
    	uint16 port;
		uint8 bits;
		bool item_ok;
		bool use_tls;
		char *hname;

        linenum++;

		if (!file_line_chomp_tail(line, sizeof line, NULL)) {
			g_warning("%s(): line %u too long, aborting", G_STRFUNC, linenum);
			break;
		}

        if (file_line_is_skipable(line))
			continue;

		sl_addr = NULL;
		addr = zero_host_addr;
		endptr = NULL;
		hname = NULL;

		endptr = is_strprefix(line, "tls:");
		if (endptr) {
			use_tls = TRUE;
			start = endptr;
		} else {
			use_tls = FALSE;
			start = line;
		}

		port = 0;
		if (string_to_host_addr_port(start, &endptr, &addr, &port)) {
       		sl_addr = name_to_host_addr(host_addr_to_string(addr),
							settings_dns_net());
		} else if (string_to_host_or_addr(start, &endptr, &addr)) {
			uchar c = *endptr;

			switch (c) {
			case '\0':
			case ':':
			case '/':
				break;
			default:
				if (!is_ascii_space(c))
					endptr = NULL;
			}

			if (!endptr) {
				g_warning("%s(): line %d: "
					"expected a hostname or IP address \"%s\"",
					G_STRFUNC, linenum, line);
				continue;
			}

			/* Terminate the string for name_to_host_addr() */
			hname = h_strndup(start, endptr - start);
		} else {
            g_warning("%s(): line %d: expected hostname or IP address \"%s\"",
				G_STRFUNC, linenum, line);
			continue;
		}

       	g_assert(sl_addr != NULL || hname != NULL);
		g_assert(NULL != endptr);
		bits = 0;
		item_ok = TRUE;

		/*
		 * When an explicit address is given (no hostname) and with no
		 * port, one can suffix the address with bits to indicate a CIDR
		 * range of whitelisted addresses.
		 */

		if (0 == port) {
			/* Ignore trailing items separated by a space */
			while ('\0' != *endptr && !is_ascii_space(*endptr)) {
				uchar c = *endptr++;

				if (':' == c) {
					int error;
					uint32 v;

					if (0 != port) {
						g_warning("%s(): line %d: multiple colons after host",
							G_STRFUNC, linenum);
						item_ok = FALSE;
						break;
					}

					v = parse_uint32(endptr, &endptr, 10, &error);
					port = (error || v > 0xffff) ? 0 : v;
					if (0 == port) {
						g_warning("%s(): line %d: "
							"invalid port value after host",
							G_STRFUNC, linenum);
						item_ok = FALSE;
						break;
					}
				} else if ('/' == c) {
					const char *ep;
					uint32 mask;

					if (0 != bits) {
						g_warning("%s(): line %d: "
							"multiple slashes after host", G_STRFUNC, linenum);
						item_ok = FALSE;
						break;
					}

					if (string_to_ip_strict(endptr, &mask, &ep)) {
						if (!host_addr_is_ipv4(addr)) {
							g_warning("%s(): line %d: "
								"IPv4 netmask after non-IPv4 address",
								G_STRFUNC, linenum);
							item_ok = FALSE;
							break;
						}
						endptr = ep;

						if (0 == (bits = netmask_to_cidr(mask))) {
							g_warning("%s(): line %d: "
								"IPv4 netmask after non-IPv4 address",
								G_STRFUNC, linenum);
							item_ok = FALSE;
							break;
						}

					} else {
						int error;
						uint32 v;

						v = parse_uint32(endptr, &endptr, 10, &error);
						if (
							error ||
							0 == v ||
							(v > 32 && host_addr_is_ipv4(addr)) ||
							(v > 128 && host_addr_is_ipv6(addr))
						) {
							g_warning("%s(): line %d: "
								"invalid numeric netmask after host",
								G_STRFUNC, linenum);
							item_ok = FALSE;
							break;
						}
						bits = v;
					}
				} else {
					g_warning("%s(): line %d: "
						"unexpected character after host", G_STRFUNC, linenum);
					item_ok = FALSE;
					break;
				}
			}
		}

		if (item_ok) {
			struct whitelist *item;
			if (hname) {
				item = whitelist_hostname_create(use_tls, hname, port);
				whitelist_dns_resolve(item, FALSE);
			} else {
				PSLIST_FOREACH(sl_addr, sl) {
					host_addr_t *aptr = sl->data;
					g_assert(aptr != NULL);
					item = whitelist_addr_create(use_tls, *aptr, port, bits);
					whitelist_add(item);
				}
			}
		} else {
Beispiel #17
0
/**
 * Append a new line of text at the end of the header.
 * A private copy of the text is made.
 *
 * @return an error code, or HEAD_OK if appending was successful.
 */
int
header_append(header_t *o, const char *text, int len)
{
	char buf[MAX_LINE_SIZE];
	const char *p = text;
	uchar c;
	header_field_t *hf;

	header_check(o);
	g_assert(len >= 0);

	if (o->flags & HEAD_F_EOH)
		return HEAD_EOH_REACHED;

	/*
	 * If empty line, we reached EOH.
	 */

	if (len == 0) {
		o->flags |= HEAD_F_EOH;				/* Mark we reached EOH */
		return HEAD_EOH;
	}

	/*
	 * Sanity checks.
	 */

	if (o->size >= HEAD_MAX_SIZE)
		return HEAD_TOO_LARGE;

	if (++(o->num_lines) >= HEAD_MAX_LINES)
		return HEAD_MANY_LINES;

	/*
	 * Detect whether line is a new header or a continuation.
	 */

	c = *p;
	if (is_ascii_space(c)) {

		/*
		 * It's a continuation.
		 *
		 * Make sure we already have recorded something, or we have
		 * an unexpected continuation line.
		 */

		if (NULL == o->fields)
			return HEAD_CONTINUATION;		/* Unexpected continuation */

		/*
		 * When a previous header line was malformed, we cannot accept
		 * further continuation lines.
		 */

		if (o->flags & HEAD_F_SKIP)
			return HEAD_SKIPPED;

		/*
		 * We strip leading spaces of all continuations before storing
		 * them.  If we have to dump the header, we will have to put
		 * some spaces, but we don't guarantee we'll put the same amount.
		 */

		p++;								/* First char is known space */
		while ((c = *p)) {
			if (!is_ascii_space(c))
				break;
			p++;
		}

		/*
		 * If we've reached the end of the line, then the continuation
		 * line was made of spaces only.  Weird, but we can ignore it.
		 * Note that it's not an EOH mark.
		 */

		if (*p == '\0')
			return HEAD_OK;

		/*
		 * Save the continuation line by appending into the last header
		 * field we handled.
		 */

		hf = slist_tail(o->fields);
		hfield_append(hf, p);
		add_continuation(o, hf->name, p);
		o->size += len - (p - text);	/* Count only effective text */

		/*
		 * Also append the data in the hash table.
		 */

	} else {
		char *b;
		bool seen_space = FALSE;

		/*
		 * It's a new header line.
		 */

		o->flags &= ~HEAD_F_SKIP;		/* Assume this line will be OK */

		/*
		 * Parse header field.  Must be composed of ascii chars only.
		 * (no control characters, no space, no ISO Latin or other extension).
		 * The field name ends with ':', after possible white spaces.
		 */

		for (b = buf, c = *p; c; c = *(++p)) {
			if (c == ':') {
				*b++ = '\0';			/* Reached end of field */
				break;					/* Done, buf[] holds field name */
			}
			if (is_ascii_space(c)) {
				seen_space = TRUE;		/* Only trailing spaces allowed */
				continue;
			}
			if (
				seen_space || (c != '-' && c != '.' &&
					(!isascii(c) || is_ascii_cntrl(c) || is_ascii_punct(c)))
			) {
				o->flags |= HEAD_F_SKIP;
				return HEAD_BAD_CHARS;
			}
			*b++ = c;
		}

		/*
		 * If buf[] does not end with a NUL, we did not fully recognize
		 * the header: we reached the end of the line without encountering
		 * the ':' marker.
		 *
		 * If the buffer starts with a NUL char, it's also clearly malformed.
		 */

		g_assert(b > buf || (b == buf && *text == '\0'));

		if (b == buf || *(b-1) != '\0') {
			o->flags |= HEAD_F_SKIP;
			return HEAD_MALFORMED;
		}

		/*
		 * We have a valid header field in buf[].
		 */

		hf = hfield_make(buf);

		/*
		 * Strip leading spaces in the value.
		 */

		g_assert(*p == ':');

		p++;							/* First char is field separator */
		p = skip_ascii_spaces(p);

		/*
		 * Record field value.
		 */

		hfield_append(hf, p);
		add_header(o, buf, p);
		if (!o->fields) {
			o->fields = slist_new();
		}
		slist_append(o->fields, hf);
		o->size += len - (p - text);	/* Count only effective text */
	}

	return HEAD_OK;
}
Beispiel #18
0
/**
 * Parse an IPv6 Geo IP line and record the range in the database.
 */
static void
gip_parse_ipv6(const char *line, int linenum)
{
	const char *end;
	uint16 code;
	int error;
	uint8 ip[16];
	unsigned bits;

	/*
	 * Each line looks like:
	 *
	 *    2a03:be00::/32 nl
	 *
	 * The leading part up to the space is the IPv6 network in CIDR format.
	 * The trailing word is the 2-letter ISO country code.
	 */

	if (!parse_ipv6_addr(line, ip, &end)) {
		g_warning("%s, line %d: bad IPv6 network address \"%s\"",
			gip_source[GIP_IPV6].file, linenum, line);
		return;
	}

	if ('/' != *end) {
		g_warning("%s, line %d: missing network separator in \"%s\"",
			gip_source[GIP_IPV6].file, linenum, line);
		return;
	}

	bits = parse_uint(end + 1, &end, 10, &error);

	if (error) {
		g_warning("%s, line %d: cannot parse network bit amount in \"%s\"",
			gip_source[GIP_IPV6].file, linenum, line);
		return;
	}

	if (bits > 128) {
		g_warning("%s, line %d: invalid bit amount %u in \"%s\"",
			gip_source[GIP_IPV6].file, linenum, bits, line);
		return;
	}

	if (!is_ascii_space(*end)) {
		g_warning("%s, line %d: missing spaces after network in \"%s\"",
			gip_source[GIP_IPV6].file, linenum, line);
		return;
	}

	while (is_ascii_space(*end))
		end++;

	if ('\0' == *end) {
		g_warning("%s, line %d: missing country code in \"%s\"",
			gip_source[GIP_IPV6].file, linenum, line);
		return;
	}

	code = iso3166_encode_cc(end);
	if (ISO3166_INVALID == code) {
		g_warning("%s, line %d: bad country code in \"%s\"",
			gip_source[GIP_IPV6].file, linenum, line);
		return;
	}

	error = iprange_add_cidr6(geo_db, ip, bits, (code + 1) << 1);

	if (IPR_ERR_OK != error) {
		g_warning("%s, line %d: cannot insert %s/%u: %s",
			gip_source[GIP_IPV6].file, linenum, ipv6_to_string(ip),
			bits, iprange_strerror(error));
	}
}