Пример #1
0
/**
 * Retry publishing after some delay.
 *
 * @param pe		the entry to publish
 * @param delay		delay in seconds
 * @param msg		if non-NULL, logging message explaining the delay
 */
static void
publisher_retry(struct publisher_entry *pe, int delay, const char *msg)
{
	struct pubdata *pd;

	publisher_check(pe);
	g_assert(NULL == pe->publish_ev);
	g_assert(delay > 0);

	pd = get_pubdata(pe->sha1);
	if (pd != NULL) {
		pd->next_enqueue = time_advance(tm_time(), UNSIGNED(delay));
		dbmw_write(db_pubdata, pe->sha1, pd, sizeof *pd);
	}

	pe->publish_ev = cq_insert(publish_cq, delay * 1000, handle_entry, pe);
	pe->last_delayed = tm_time();

	if (GNET_PROPERTY(publisher_debug) > 3) {
		shared_file_t *sf = shared_file_by_sha1(pe->sha1);
		g_debug("PUBLISHER will retry SHA-1 %s %s\"%s\" in %s: %s",
			sha1_to_string(pe->sha1),
			(sf && sf != SHARE_REBUILDING && shared_file_is_partial(sf)) ?
				"partial " : "",
			(sf && sf != SHARE_REBUILDING) ? shared_file_name_nfc(sf) : "",
			compact_time(delay), msg != NULL ? msg : "<no reason>");
		shared_file_unref(&sf);
	}
}
Пример #2
0
/**
 * Add `comp' to the current completed count, and update the amount of
 * bytes transferred.  Note that `comp' can be zero.
 * When `update_dtime' is TRUE, we update the "done time", otherwise we
 * change the "last request time".
 *
 * If the row does not exist (race condition: deleted since upload started),
 * recreate one.
 */
static void
upload_stats_file_add(
	const shared_file_t *sf,
	int comp, guint64 sent, gboolean update_dtime)
{
	const char *pathname = shared_file_path(sf);
	filesize_t size = shared_file_size(sf);
	struct ul_stats *s;
	const struct sha1 *sha1;

	g_assert(comp >= 0);

	sha1 = sha1_hash_available(sf) ? shared_file_sha1(sf) : NULL;

	/* find this file in the ul_stats_clist */
	s = upload_stats_find(sha1, pathname, size);

	/* increment the completed counter */
	if (NULL == s) {
		/* uh oh, row has since been deleted, add it: 1 attempt */
		upload_stats_add(pathname, size, shared_file_name_nfc(sf),
			1, comp, sent, tm_time(), tm_time(), sha1);
	} else {
		s->bytes_sent += sent;
		s->norm = 1.0 * s->bytes_sent / s->size;
		s->complete += comp;
		if (update_dtime)
			s->dtime = tm_time();
		else
			s->rtime = tm_time();
		gcu_upload_stats_gui_update(s);
	}

	dirty = TRUE;		/* Request asynchronous save of stats */
}
Пример #3
0
/**
 * Called when an upload starts.
 */
void
upload_stats_file_begin(const shared_file_t *sf)
{
	struct ul_stats *s;
	const char *pathname;
	filesize_t size;
	const struct sha1 *sha1;

	g_return_if_fail(sf);
	pathname = shared_file_path(sf);
	size = shared_file_size(sf);
	sha1 = sha1_hash_available(sf) ? shared_file_sha1(sf) : NULL;

	/* find this file in the ul_stats_clist */
	s = upload_stats_find(sha1, pathname, size);

	/* increment the attempted counter */
	if (NULL == s) {
		upload_stats_add(pathname, size, shared_file_name_nfc(sf),
			1, 0, 0, tm_time(), 0, sha1);
	} else {
		s->attempts++;
		s->rtime = tm_time();
		gcu_upload_stats_gui_update(s);
	}

	dirty = TRUE;		/* Request asynchronous save of stats */
}
Пример #4
0
/**
 * Make sure the filename associated to a SHA1 is given the name of
 * the shared file and no longer bears the name of the partial file.
 * This can happen when the partial file is seeded then the file is
 * renamed and shared.
 */
void
upload_stats_enforce_local_filename(const shared_file_t *sf)
{
	struct ul_stats *s;
	const struct sha1 *sha1;
	const char *name;

	if (!upload_stats_by_sha1)
		return;		/* Nothing known by SHA1 yet */

	sha1 = sha1_hash_available(sf) ? shared_file_sha1(sf) : NULL;

	if (!sha1)
		return;		/* File's SHA1 not known yet, nothing to do here */

	s = g_hash_table_lookup(upload_stats_by_sha1, sha1);

	if (NULL == s)
		return;							/* SHA1 not in stats, nothing to do */

	name = shared_file_name_nfc(sf);
	if (name == s->filename)			/* Both are string atoms */
		return;							/* Everything is fine */

	/*
	 * We need to update the filename to match the shared file.
	 */

	hash_list_remove(upload_stats_list, s);
	atom_str_change(&s->pathname, shared_file_path(sf));
	atom_str_change(&s->filename, name);
	hash_list_append(upload_stats_list, s);

	gcu_upload_stats_gui_update_name(s);
}
Пример #5
0
/**
 * Delete pubdata from database.
 */
static void
delete_pubdata(const sha1_t *sha1)
{
	dbmw_delete(db_pubdata, sha1);

	if (GNET_PROPERTY(publisher_debug) > 2) {
		shared_file_t *sf = shared_file_by_sha1(sha1);
		g_debug("PUBLISHER SHA-1 %s %s\"%s\" reclaimed",
			sha1_to_string(sha1),
			(sf && sf != SHARE_REBUILDING && shared_file_is_partial(sf)) ?
				"partial " : "",
			(sf && sf != SHARE_REBUILDING) ? shared_file_name_nfc(sf) : "");
		shared_file_unref(&sf);
	}
}
Пример #6
0
static bool
huge_spam_check(shared_file_t *sf, const struct sha1 *sha1)
{
	if (NULL != sha1 && spam_sha1_check(sha1)) {
		g_warning("file \"%s\" is listed as spam (SHA1)", shared_file_path(sf));
		return TRUE;
	}

	if (
		spam_check_filename_size(shared_file_name_nfc(sf),
			shared_file_size(sf))
	) {
		g_warning("file \"%s\" is listed as spam (Name)", shared_file_path(sf));
		return TRUE;
	}
	return FALSE;
}
Пример #7
0
/**
 * Writes the browse host data of the context ``ctx'' to the buffer
 * ``dest''. This must be called multiple times to retrieve the complete
 * data until zero is returned i.e., the end of file is reached.
 *
 * This routine deals with HTML data generation.
 *
 * @param ctx an initialized browse host context.
 * @param dest the destination buffer.
 * @param size the amount of bytes ``dest'' can hold.
 *
 * @return -1 on failure, zero at the end-of-file condition or if size
 *         was zero. On success, the amount of bytes copied to ``dest''
 *         is returned.
 */
static ssize_t
browse_host_read_html(struct special_upload *ctx,
	void *const dest, size_t size)
{
	static const char header[] =
		"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\r\n"
		"<html>\r\n"
		"<head>\r\n"
		"<title>Browse Host</title>\r\n"
		"</head>\r\n"
		"<body>\r\n";
	static const char trailer[] = "</ul>\r\n</body>\r\n</html>\r\n";
	struct browse_host_upload *bh = cast_to_browse_host_upload(ctx);
	char *p = dest;

	g_assert(NULL != bh);
	g_assert(NULL != dest);
	g_assert(size <= INT_MAX);

	g_assert(UNSIGNED(bh->state) < NUM_BH_STATES);
	g_assert(bh->b_size <= INT_MAX);
	g_assert(bh->b_offset <= bh->b_size);

	do {
		switch (bh->state) {
		case BH_STATE_HEADER:
			if (!bh->b_data) {
				bh->b_data = header;
				bh->b_size = CONST_STRLEN(header);
			}
			p += browse_host_read_data(bh, p, &size);
			if (bh->b_size == bh->b_offset)
				browse_host_next_state(bh, BH_STATE_LIBRARY_INFO);
			break;

		case BH_STATE_LIBRARY_INFO:
			if (!bh->b_data) {
				bh->w_buf_size = w_concat_strings(&bh->w_buf,
					"<h1>", product_get_name(), "</h1>\r\n"
					"<h3>", version_get_string(),
				   	" sharing ",
					uint64_to_string(shared_files_scanned()),
					" file",
					shared_files_scanned() == 1 ? "" : "s",
					" ",
					short_kb_size(shared_kbytes_scanned(),
						GNET_PROPERTY(display_metric_units)),
					" total</h3>\r\n"
					"<ul>\r\n", (void *) 0);
				bh->b_data = bh->w_buf;
				bh->b_size = bh->w_buf_size - 1; /* minus trailing NUL */
				bh->b_offset = 0;
			}
			p += browse_host_read_data(bh, p, &size);
			if (bh->b_size == bh->b_offset)
				browse_host_next_state(bh, BH_STATE_FILES);
			break;

		case BH_STATE_TRAILER:
			if (!bh->b_data) {
				bh->b_data = trailer;
				bh->b_size = CONST_STRLEN(trailer);
			}
			p += browse_host_read_data(bh, p, &size);
			if (bh->b_size == bh->b_offset)
				browse_host_next_state(bh, BH_STATE_EOF);
			break;

		case BH_STATE_FILES:
			if (bh->b_data && bh->b_size == bh->b_offset) {
				g_assert(bh->w_buf == bh->b_data);
				wfree(bh->w_buf, bh->w_buf_size);
				bh->w_buf = NULL;
				bh->w_buf_size = 0;
				bh->b_data = NULL;
			}

			if (!bh->b_data) {
				const shared_file_t *sf;

				bh->file_index++;
				sf = shared_file_sorted(bh->file_index);
				if (!sf) {
				   	if (bh->file_index > shared_files_scanned())
						browse_host_next_state(bh, BH_STATE_TRAILER);
					/* Skip holes in the file_index table */
				} else if (SHARE_REBUILDING == sf) {
					browse_host_next_state(bh, BH_STATE_REBUILDING);
				} else {
					const char * const name_nfc = shared_file_name_nfc(sf);
					const filesize_t file_size = shared_file_size(sf);
					size_t html_size;
					char *html_name;

					{
						const char *dir;
						char *name;
						
						dir = shared_file_relative_path(sf);
						if (dir) {
							name = h_strconcat(dir, "/", name_nfc, (void *) 0);
						} else {
							name = deconstify_char(name_nfc);
						}

						html_size = 1 + html_escape(name, NULL, 0);
						html_name = walloc(html_size);
						html_escape(name, html_name, html_size);
						if (name != name_nfc) {
							HFREE_NULL(name);
						}
					}

					if (sha1_hash_available(sf)) {
						const struct sha1 *sha1 = shared_file_sha1(sf);

						bh->w_buf_size = w_concat_strings(&bh->w_buf,
							"<li><a href=\"/uri-res/N2R?urn:sha1:",
							sha1_base32(sha1),
							"\">", html_name, "</a>&nbsp;[",
							short_html_size(file_size,
								GNET_PROPERTY(display_metric_units)),
							"]</li>\r\n",
							(void *) 0);
					} else {
						char *escaped;

						escaped = url_escape(name_nfc);
						bh->w_buf_size = w_concat_strings(&bh->w_buf,
							"<li><a href=\"/get/",
							uint32_to_string(shared_file_index(sf)),
							"/", escaped, "\">", html_name, "</a>"
							"&nbsp;[",
							short_html_size(file_size,
								GNET_PROPERTY(display_metric_units)),
							"]</li>\r\n", (void *) 0);

						if (escaped != name_nfc) {
							HFREE_NULL(escaped);
						}
					}

					wfree(html_name, html_size);
					bh->b_data = bh->w_buf;
					bh->b_size = bh->w_buf_size - 1; /* minus trailing NUL */
					bh->b_offset = 0;
				}
			}

			if (bh->b_data)
				p += browse_host_read_data(bh, p, &size);

			break;

		case BH_STATE_REBUILDING:
			if (!bh->b_data) {
				static const char msg[] =
					"<li>"
						"<b>"
							"The library is currently being rebuild. Please, "
							"try again in a moment."
						"</b>"
					"</li>";

				bh->b_data = msg;
				bh->b_size = CONST_STRLEN(msg);
			}
			p += browse_host_read_data(bh, p, &size);
			if (bh->b_size == bh->b_offset)
				browse_host_next_state(bh, BH_STATE_TRAILER);
			break;

		case BH_STATE_EOF:
			return p - cast_to_char_ptr(dest);

		case NUM_BH_STATES:
			g_assert_not_reached();
		}
	} while (size > 0);

	return p - cast_to_char_ptr(dest);
}
Пример #8
0
/**
 * Add file to the current query hit.
 *
 * @return TRUE if we kept the file, FALSE if we did not include it in the hit.
 */
static bool
g2_build_qh2_add(struct g2_qh2_builder *ctx, const shared_file_t *sf)
{
	const sha1_t *sha1;
	g2_tree_t *h, *c;

	shared_file_check(sf);

	/*
	 * Make sure the file is still in the library.
	 */

	if (0 == shared_file_index(sf))
		return FALSE;

	/*
	 * On G2, the H/URN child is required, meaning we need the SHA1 at least.
	 */

	if (!sha1_hash_available(sf))
		return FALSE;

	/*
	 * Do not send duplicates, as determined by the SHA1 of the resource.
	 *
	 * A user may share several files with different names but the same SHA1,
	 * and if all of them are hits, we only want to send one instance.
	 *
	 * When generating hits for host-browsing, we do not care about duplicates
	 * and ctx->hs is NULL then.
	 */

	sha1 = shared_file_sha1(sf);		/* This is an atom */

	if (ctx->hs != NULL) {
		if (hset_contains(ctx->hs, sha1))
			return FALSE;

		hset_insert(ctx->hs, sha1);
	}

	/*
	 * Create the "H" child and attach it to the current tree.
	 */

	if (NULL == ctx->t)
		g2_build_qh2_start(ctx);

	h = g2_tree_alloc_empty("H");
	g2_tree_add_child(ctx->t, h);

	/*
	 * URN -- Universal Resource Name
	 *
	 * If there is a known TTH, then we can generate a bitprint, otherwise
	 * we just convey the SHA1.
	 */

	{
		const tth_t * const tth = shared_file_tth(sf);
		char payload[SHA1_RAW_SIZE + TTH_RAW_SIZE + sizeof G2_URN_BITPRINT];
		char *p = payload;

		if (NULL == tth) {
			p = mempcpy(p, G2_URN_SHA1, sizeof G2_URN_SHA1);
			p += clamp_memcpy(p, sizeof payload - ptr_diff(p, payload),
				sha1, SHA1_RAW_SIZE);
		} else {
			p = mempcpy(p, G2_URN_BITPRINT, sizeof G2_URN_BITPRINT);
			p += clamp_memcpy(p, sizeof payload - ptr_diff(p, payload),
				sha1, SHA1_RAW_SIZE);
			p += clamp_memcpy(p, sizeof payload - ptr_diff(p, payload),
				tth, TTH_RAW_SIZE);
		}

		g_assert(ptr_diff(p, payload) <= sizeof payload);

		c = g2_tree_alloc_copy("URN", payload, ptr_diff(p, payload));
		g2_tree_add_child(h, c);
	}

	/*
	 * URL -- empty to indicate that we share the file via uri-res.
	 */

	if (ctx->flags & QHIT_F_G2_URL) {
		uint known;
		uint16 csc;

		c = g2_tree_alloc_empty("URL");
		g2_tree_add_child(h, c);

		/*
		 * CSC -- if we know alternate sources, indicate how many in "CSC".
		 *
		 * This child is only emitted when they requested "URL".
		 */

		known = dmesh_count(sha1);
		csc = MIN(known, MAX_INT_VAL(uint16));

		if (csc != 0) {
			char payload[2];

			poke_le16(payload, csc);
			c = g2_tree_alloc_copy("CSC", payload, sizeof payload);
			g2_tree_add_child(h, c);
		}

		/*
		 * PART -- if we only have a partial file, indicate how much we have.
		 *
		 * This child is only emitted when they requested "URL".
		 */

		if (shared_file_is_partial(sf) && !shared_file_is_finished(sf)) {
			filesize_t available = shared_file_available(sf);
			char payload[8];	/* If we have to encode file size as 64-bit */
			uint32 av32;
			time_t mtime = shared_file_modification_time(sf);

			c = g2_tree_alloc_empty("PART");
			g2_tree_add_child(h, c);

			av32 = available;
			if (av32 == available) {
				/* Fits within a 32-bit quantity */
				poke_le32(payload, av32);
				g2_tree_set_payload(c, payload, sizeof av32, TRUE);
			} else {
				/* Encode as a 64-bit quantity then */
				poke_le64(payload, available);
				g2_tree_set_payload(c, payload, sizeof payload, TRUE);
			}

			/*
			 * GTKG extension: encode the last modification time of the
			 * partial file in an "MT" child.  This lets the other party
			 * determine whether the host is still able to actively complete
			 * the file.
			 */

			poke_le32(payload, (uint32) mtime);
			g2_tree_add_child(c,
				g2_tree_alloc_copy("MT", payload, sizeof(uint32)));
		}

		/*
		 * CT -- creation time of the resource (GTKG extension).
		 */

		{
			time_t create_time = shared_file_creation_time(sf);

			if ((time_t) -1 != create_time) {
				char payload[8];
				int n;

				create_time = MAX(0, create_time);
				n = vlint_encode(create_time, payload);
				g2_tree_add_child(h,
					g2_tree_alloc_copy("CT", payload, n));	/* No trailing 0s */
			}
		}
	}

	/*
	 * DN -- distinguished name.
	 *
	 * Note that the presence of DN also governs the presence of SZ if the
	 * file length does not fit a 32-bit unsigned quantity.
	 */

	if (ctx->flags & QHIT_F_G2_DN) {
		char payload[8];		/* If we have to encode file size as 64-bit */
		uint32 fs32;
		filesize_t fs = shared_file_size(sf);
		const char *name;
		const char *rp;

		c = g2_tree_alloc_empty("DN");

		fs32 = fs;
		if (fs32 == fs) {
			/* Fits within a 32-bit quantity */
			poke_le32(payload, fs32);
			g2_tree_set_payload(c, payload, sizeof fs32, TRUE);
		} else {
			/* Does not fit a 32-bit quantity, emit a SZ child */
			poke_le64(payload, fs);
			g2_tree_add_child(h,
				g2_tree_alloc_copy("SZ", payload, sizeof payload));
		}

		name = shared_file_name_nfc(sf);
		g2_tree_append_payload(c, name, shared_file_name_nfc_len(sf));
		g2_tree_add_child(h, c);

		/*
		 * GTKG extension: if there is a file path, expose it as a "P" child
		 * under the DN node.
		 */

		rp = shared_file_relative_path(sf);
		if (rp != NULL) {
			g2_tree_add_child(c, g2_tree_alloc_copy("P", rp, strlen(rp)));
		}
	}

	/*
	 * GTKG extension: if they requested alt-locs in the /Q2/I with "A", then
	 * send them some known alt-locs in an "ALT" child.
	 *
	 * Note that these alt-locs can be for Gnutella hosts: since both Gnutella
	 * and G2 share a common HTTP-based file transfer mechanism with compatible
	 * extra headers, there is no need to handle them separately.
	 */

	if (ctx->flags & QHIT_F_G2_ALT) {
		gnet_host_t hvec[G2_BUILD_QH2_MAX_ALT];
		int hcnt = 0;

		hcnt = dmesh_fill_alternate(sha1, hvec, N_ITEMS(hvec));

		if (hcnt > 0) {
			int i;

			c = g2_tree_alloc_empty("ALT");

			for (i = 0; i < hcnt; i++) {
				host_addr_t addr;
				uint16 port;

				addr = gnet_host_get_addr(&hvec[i]);
				port = gnet_host_get_port(&hvec[i]);

				if (host_addr_is_ipv4(addr)) {
					char payload[6];

					host_ip_port_poke(payload, addr, port, NULL);
					g2_tree_append_payload(c, payload, sizeof payload);
				}
			}

			/*
			 * If the payload is still empty, then drop the "ALT" child.
			 * Otherwise, attach it to the "H" node.
			 */

			if (NULL == g2_tree_node_payload(c, NULL)) {
				g2_tree_free_null(&c);
			} else {
				g2_tree_add_child(h, c);
			}
		}
	}

	/*
	 * Update the size of the query hit we're generating.
	 */

	ctx->current_size += g2_frame_serialize(h, NULL, 0);

	return TRUE;
}
Пример #9
0
/**
 * Do an actual search.
 *
 * @param table			table containing organized entries to search from
 * @param search_term	the query string
 * @param callback		routine to invoke for each match
 * @param ctx			user-supplied data to pass on to callback
 * @param max_res		maximum amount of results to return
 * @param qhv			query hash vector built from query string, for routing
 *
 * @return number of hits we produced
 */
G_GNUC_HOT int
st_search(
	search_table_t *table,
	const char *search_term,
	st_search_callback callback,
	gpointer ctx,
	int max_res,
	query_hashvec_t *qhv)
{
	char *search;
	int key, nres = 0;
	guint i, len;
	struct st_bin *best_bin = NULL;
	int best_bin_size = INT_MAX;
	word_vec_t *wovec;
	guint wocnt;
	cpattern_t **pattern;
	struct st_entry **vals;
	guint vcnt;
	int scanned = 0;		/* measure search mask efficiency */
	guint32 search_mask;
	size_t minlen;
	guint random_offset;  /* Randomizer for search returns */

	search = UNICODE_CANONIZE(search_term);

	if (GNET_PROPERTY(query_debug) > 4 && 0 != strcmp(search, search_term)) {
		char *safe_search = hex_escape(search, FALSE);
		char *safe_search_term = hex_escape(search_term, FALSE);
		g_debug("original search term: \"%s\"", safe_search_term);
		g_debug("canonical search term: \"%s\"", safe_search);
		if (safe_search != search)
			HFREE_NULL(safe_search);
		if (safe_search_term != search_term)
			HFREE_NULL(safe_search_term);
	}
	len = strlen(search);

	/*
	 * Find smallest bin
	 */

	if (len >= 2) {
		for (i = 0; i < len - 1; i++) {
			struct st_bin *bin;
			if (is_ascii_space(search[i]) || is_ascii_space(search[i+1]))
				continue;
			key = st_key(table, search + i);
			if ((bin = table->bins[key]) == NULL) {
				best_bin = NULL;
				break;
			}
			if (bin->nvals < best_bin_size) {
				best_bin = bin;
				best_bin_size = bin->nvals;
			}
		}

		if (GNET_PROPERTY(matching_debug) > 4)
			g_debug("MATCH st_search(): str=\"%s\", len=%d, best_bin_size=%d",
				lazy_safe_search(search_term), len, best_bin_size);
	}

	/*
	 * If the best_bin is NULL, we did not find a matching bin, and we're
	 * sure we won't be able to find the search string.
	 *
	 * Note that on search strings like "r e m ", we always have a letter
	 * followed by spaces, so we won't search that.
	 *		--RAM, 06/10/2001
	 */

	if (best_bin == NULL) {
		/*
		 * If we have a `qhv', we need to compute the word vector anway,
		 * for query routing...
		 */

		if (qhv == NULL)
			goto finish;
	}

	/*
	 * Prepare matching patterns
	 */

	wocnt = word_vec_make(search, &wovec);

	/*
	 * Compute the query hashing information for query routing, if needed.
	 */

	if (qhv != NULL) {
		for (i = 0; i < wocnt; i++) {
			if (wovec[i].len >= QRP_MIN_WORD_LENGTH)
				qhvec_add(qhv, wovec[i].word, QUERY_H_WORD);
		}
	}

	if (wocnt == 0 || best_bin == NULL) {
		if (wocnt > 0)
			word_vec_free(wovec, wocnt);
		goto finish;
	}

	g_assert(best_bin_size > 0);	/* Allocated bin, it must hold something */


	pattern = walloc0(wocnt * sizeof *pattern);

	/*
	 * Prepare matching optimization, an idea from Mike Green.
	 *
	 * At library building time, we computed a mask hash, made from the
	 * lowercased file name, using one bit per different letter, roughly
	 * (see mask_hash() for the exact algorigthm).
	 *
	 * We're now going to compute the same mask on the query, and compare
	 * it bitwise with the mask for each file.  If the file does not hold
	 * at least all the chars present in the query, it's no use applying
	 * the pattern matching algorithm, it won't match at all.
	 *
	 *		--RAM, 01/10/2001
	 */

	search_mask = mask_hash(search);

	/*
	 * Prepare second matching optimization: since all words in the query
	 * must match the exact amount of time, we can compute the minimum length
	 * the searched file must have.  We add one character after each word
	 * but the last, to account for space between words.
	 *		--RAM, 11/07/2002
	 */

	for (minlen = 0, i = 0; i < wocnt; i++)
		minlen += wovec[i].len + 1;
	minlen--;
	g_assert(minlen <= INT_MAX);

	/*
	 * Search through the smallest bin
	 */

	vcnt = best_bin->nvals;
	vals = best_bin->vals;
	random_offset = random_u32() % vcnt;

	nres = 0;
	for (i = 0; i < vcnt; i++) {
		const struct st_entry *e;
		shared_file_t *sf;
		size_t canonic_len;

		/*
		 * As we only return a limited count of results, pick a random
		 * offset, so that repeated searches will match different items
		 * instead of always the first - with some probability.
		 */
		e = vals[(i + random_offset) % vcnt];
		
		if ((e->mask & search_mask) != search_mask)
			continue;		/* Can't match */

		sf = e->sf;

		canonic_len = shared_file_name_canonic_len(sf);
		if (canonic_len < minlen)
			continue;		/* Can't match */

		scanned++;

		if (entry_match(e->string, canonic_len, pattern, wovec, wocnt)) {
			if (GNET_PROPERTY(matching_debug) > 4) {
				g_debug("MATCH \"%s\" matches %s",
					search, shared_file_name_nfc(sf));
			}

			if ((*callback)(ctx, sf)) {
				nres++;
				if (nres >= max_res)
					break;
			}
		}
	}

	if (GNET_PROPERTY(matching_debug) > 3)
		g_debug("MATCH st_search(): scanned %d entr%s from the %d in bin, "
			"got %d match%s",
			scanned, 1 == scanned ? "y" : "ies",
			best_bin_size, nres, 1 == nres ? "" : "es");

	for (i = 0; i < wocnt; i++)
		if (pattern[i])					/* Lazily compiled by entry_match() */
			pattern_free(pattern[i]);

	wfree(pattern, wocnt * sizeof *pattern);
	word_vec_free(wovec, wocnt);

finish:
	if (search != search_term) {
		HFREE_NULL(search);
	}

	return nres;
}
Пример #10
0
/**
 * Handle a SHA-1 entry, publishing its alt-loc to the DHT if still shared.
 */
static void
publisher_handle(struct publisher_entry *pe)
{
	shared_file_t *sf;
	bool is_partial = FALSE;
	int alt_locs;
	time_delta_t min_uptime;
	uint32 avg_uptime;

	publisher_check(pe);
	g_assert(NULL == pe->publish_ev);

	sf = shared_file_by_sha1(pe->sha1);

	/*
	 * Remove SHA1 if no longer shared.
	 */

	if (NULL == sf) {
		fileinfo_t *fi = file_info_by_sha1(pe->sha1);

		/*
		 * If a partial file has lees than the minimum amount of data for PFSP,
		 * shared_file_by_sha1() will return NULL, hence we need to explicitly
		 * check for existence through file_info_by_sha1() and that the file
		 * still exists.
		 */

		if (fi != NULL && file_exists(fi->pathname)) {
			/* Waiting for more data to be able to share, or PFSP re-enabled */
			publisher_retry(pe, PUBLISH_BUSY, "partial file missing");
			return;
		}

		if (GNET_PROPERTY(publisher_debug)) {
			g_debug("PUBLISHER SHA-1 %s is no longer shared",
				sha1_to_string(pe->sha1));
		}
		publisher_entry_free(pe, TRUE);
		return;
	}

	/*
	 * Wait when rebuilding the library.
	 */

	if (SHARE_REBUILDING == sf) {
		publisher_retry(pe, PUBLISH_BUSY, "library being rebuilt");
		return;
	}

	is_partial = shared_file_is_partial(sf);

	/*
	 * If the SHA1 is not available, wait.
	 */

	if (
		!is_partial &&
		(!sha1_hash_available(sf) || !sha1_hash_is_uptodate(sf))
	) {
		publisher_retry(pe, PUBLISH_BUSY, "SHA-1 of file unknown yet");
		goto done;
	}

	/*
	 * Look whether this node has a sufficient average uptime.
	 *
	 * We're stricter to publish partial files because we want to favor
	 * publishing of full files in the DHT, and the benefits of publishing
	 * partial entries come only if we're up for a long enough time.
	 *
	 * Since publishing imposes lookup traffic in the DHT, it is not efficient
	 * to have transient nodes publish file sharing information because this
	 * will likely never be useful.
	 */

	min_uptime = PUBLISH_TRANSIENT;
	if (is_partial)
		min_uptime *= 2;

	avg_uptime = get_average_servent_uptime(tm_time());

	if (avg_uptime < UNSIGNED(min_uptime)) {
		time_delta_t delay = min_uptime - avg_uptime;

		delay = MAX(delay, PUBLISH_BUSY);
		publisher_retry(pe, delay, "minimum average uptime not reached yet");
		goto done;
	}

	/*
	 * If we are dealing with a file for which we know enough alternate
	 * locations, assume it is popular and do not publish it yet.
	 *
	 * We do not publish the SHA-1 of a partial file for which we know
	 * of at least two alternate locations because the purpose of us publishing
	 * these partial SHA-1s is to attract other PFSP-aware hosts and
	 * recreate a mesh.
	 */

	alt_locs = dmesh_count(pe->sha1);
	is_partial = is_partial && !shared_file_is_finished(sf);

	if (alt_locs > (is_partial ? PUBLISH_PARTIAL_MAX : PUBLISH_DMESH_MAX)) {
		if (GNET_PROPERTY(publisher_debug)) {
			g_debug("PUBLISHER SHA-1 %s %s\"%s\" has %d download mesh "
				"entr%s, skipped", sha1_to_string(pe->sha1),
				is_partial ? "partial " : "", shared_file_name_nfc(sf),
				alt_locs, plural_y(alt_locs));
		}
		publisher_hold(pe, PUBLISH_POPULAR, "popular file");
		goto done;
	}

	/*
	 * If the DHT is not enabled, postpone processing.
	 */

	if (!dht_enabled()) {
		publisher_hold(pe, PUBLISH_BUSY, "DHT  disabled");
		goto done;
	}

	/*
	 * If this is a partial file for which we have less than the minimum
	 * for PFSP sharing, or if PFSP has been disabled, skip it.
	 */

	if (shared_file_is_partial(sf)) {
		fileinfo_t *fi = shared_file_fileinfo(sf);

		if (
			!file_info_partial_shareable(fi) ||
			fi->done < GNET_PROPERTY(pfsp_minimum_filesize)
		) {
			publisher_hold(pe, PUBLISH_BUSY, "PFSP minima not reached");
			goto done;
		}
	}

	/*
	 * Check whether it is time to process the entry, in case we're
	 * restarting quickly after a shutdown.
	 */

	if (0 == pe->last_publish) {
		struct pubdata *pd = get_pubdata(pe->sha1);

		if (pd != NULL) {
			time_t now = tm_time();
			time_delta_t enqueue = delta_time(pd->next_enqueue, now);
			time_delta_t expire = delta_time(pd->expiration, now);

			if (enqueue > 0 && (0 == pd->expiration || expire > 0)) {
				int delay = MIN(enqueue, PUBLISH_POPULAR);
				if (pd->expiration != 0)
					delay = MIN(delay, expire);

				if (GNET_PROPERTY(publisher_debug) > 1) {
					g_debug("PUBLISHER SHA-1 %s delayed by %s",
						sha1_to_string(pe->sha1), compact_time(enqueue));
				}

				publisher_retry(pe, delay, "first-time delay");
				goto done;
			}
		}
	}

	/*
	 * Cancel possible remaining backgrounded publishing.
	 */

	if (pe->backgrounded) {
		pdht_cancel_file(pe->sha1, FALSE);
		pe->backgrounded = FALSE;
	}

	/*
	 * OK, we can publish this alternate location.
	 */

	if (pe->last_publish) {
		if (GNET_PROPERTY(publisher_debug) > 2) {
			g_debug("PUBLISHER SHA-1 %s re-enqueued %d secs "
				"after last publish", sha1_to_string(pe->sha1),
				(int) delta_time(tm_time(), pe->last_publish));
		}
	}

	pe->last_enqueued = tm_time();
	pdht_publish_file(sf, publisher_done, pe);

	/* FALL THROUGH */

done:
	shared_file_unref(&sf);
}
Пример #11
0
/**
 * Publishing callback invoked when asynchronous publication is completed,
 * or ended with an error.
 *
 * @return TRUE if we accept the publishing, FALSE otherwise to get the
 * publishing layer to continue attempts to failed STORE roots and report
 * on progress using the same callback.
 */
static bool
publisher_done(void *arg, pdht_error_t code, const pdht_info_t *info)
{
	struct publisher_entry *pe = arg;
	struct pubdata *pd;
	int delay = PUBLISH_BUSY;
	bool expired = FALSE;
	bool accepted = TRUE;

	publisher_check(pe);

	pd = get_pubdata(pe->sha1);

	/*
	 * Update stats on republishing before value expiration.
	 */

	if (PDHT_E_OK == code) {
		if (pe->last_publish && info->roots > 0) {
			if (pd != NULL) {
				if (pd->expiration && delta_time(tm_time(), pd->expiration) > 0)
					expired = TRUE;
			} else {
				time_delta_t elapsed = delta_time(tm_time(), pe->last_publish);
				if (elapsed > DHT_VALUE_ALOC_EXPIRE)
					expired = TRUE;
			}
			if (expired)
				gnet_stats_inc_general(GNR_DHT_REPUBLISHED_LATE);
		}
	}

	/*
	 * Compute retry delay.
	 */

	switch (code) {
	case PDHT_E_OK:
		/*
		 * If we were not able to publish to KDA_K nodes, decrease the
		 * delay before republishing.  We use a non-linear decimation of
		 * the republish time, as a function of the number of nodes to which
		 * we could publish.
		 */

		delay = publisher_delay(info, DHT_VALUE_ALOC_EXPIRE);
		accepted = publisher_is_acceptable(info);
		break;
	case PDHT_E_POPULAR:
		/*
		 * Compute the suitable delay: the first time, we use PUBLISH_POPULAR,
		 * and then we double each time until we reach PUBLISH_POPULAR_MAX.
		 *
		 * If we already tried to publish the entry, pe->last_delayed will
		 * be non-zero.
		 */
		if (0 != pe->last_delayed) {
			time_delta_t elapsed = delta_time(tm_time(), pe->last_delayed);
			if (elapsed < PUBLISH_POPULAR) {
				delay = PUBLISH_POPULAR;
			} else if (elapsed >= PUBLISH_POPULAR_MAX / 2) {
				delay = PUBLISH_POPULAR_MAX;
			} else {
				delay = elapsed * 2;
			}
		} else {
			delay = PUBLISH_POPULAR;
		}
		break;
	case PDHT_E_NOT_SHARED:
	case PDHT_E_LOOKUP_EXPIRED:
	case PDHT_E_LOOKUP:
	case PDHT_E_UDP_CLOGGED:
	case PDHT_E_PUBLISH_EXPIRED:
	case PDHT_E_PUBLISH_ERROR:
	case PDHT_E_SHA1:
	case PDHT_E_PENDING:
	case PDHT_E_CANCELLED:
	case PDHT_E_GGEP:
	case PDHT_E_NONE:
		delay = PUBLISH_BUSY;
		break;
	case PDHT_E_MAX:
		g_assert_not_reached();
	}

	/*
	 * For a backgrounded entry publishing, we need to adjust the computed
	 * delay with the time that was elapsed
	 */

	g_assert(!pe->backgrounded == !(pe->publish_ev != NULL));

	if (pe->backgrounded) {
		time_delta_t elapsed = delta_time(tm_time(), pe->last_delayed);
		g_assert(pe->last_delayed > 0);
		cq_cancel(&pe->publish_ev);
		if (delay > elapsed) {
			delay -= elapsed;
		} else {
			delay = 1;
		}
	}

	/*
	 * Logging.
	 */

	if (GNET_PROPERTY(publisher_debug) > 1) {
		shared_file_t *sf = shared_file_by_sha1(pe->sha1);
		char retry[80];
		char after[80];
		const char *late = "";

		after[0] = '\0';
		if (pe->last_publish) {
			time_delta_t elapsed = delta_time(tm_time(), pe->last_publish);

			str_bprintf(after, sizeof after,
				" after %s", compact_time(elapsed));

			if (pd != NULL) {
				if (expired)
					late = "late, ";
			} else {
				late = "no data, ";
			}
		}

		str_bprintf(retry, sizeof retry, "%s", compact_time(delay));

		g_debug("PUBLISHER SHA-1 %s %s%s\"%s\" %spublished to %u node%s%s: %s"
			" (%stook %s, total %u node%s, proba %.3f%%, retry in %s,"
			" %s bg, path %u) [%s]",
			sha1_to_string(pe->sha1),
			pe->backgrounded ? "[bg] " : "",
			(sf && sf != SHARE_REBUILDING && shared_file_is_partial(sf)) ?
				"partial " : "",
			(sf && sf != SHARE_REBUILDING) ? shared_file_name_nfc(sf) : "",
			pe->last_publish ? "re" : "",
			info->roots, plural(info->roots),
			after, pdht_strerror(code), late,
			compact_time(delta_time(tm_time(), pe->last_enqueued)),
			info->all_roots, plural(info->all_roots),
			info->presence * 100.0, retry,
			info->can_bg ? "can" : "no", info->path_len,
			accepted ? "OK" : "INCOMPLETE");

		shared_file_unref(&sf);
	}

	/*
	 * Update last publishing time and remember expiration time.
	 */

	if (PDHT_E_OK == code && info->roots > 0) {
		pe->last_publish = tm_time();
		if (pd != NULL) {
			pd->expiration =
				time_advance(pe->last_publish, DHT_VALUE_ALOC_EXPIRE);
			dbmw_write(db_pubdata, pe->sha1, pd, sizeof *pd);
		}
	}

	/*
	 * If entry was deemed popular, we're going to delay its republishing
	 * by a larger amount of time and any data we published already about
	 * it will surely expire.  Since this is our decision, we do not want
	 * to be told that republishing, if it occurs again, was done later than
	 * required.  Hence call publisher_hold() to mark that we don't care.
	 */

	if (PDHT_E_POPULAR == code)
		publisher_hold(pe, delay, "popular entry");
	else
		publisher_retry(pe, delay, accepted ? "accepted publish" : "published");

	pe->backgrounded = !accepted;

	return accepted;
}