/** * Retry publishing after some delay. * * @param pe the entry to publish * @param delay delay in seconds * @param msg if non-NULL, logging message explaining the delay */ static void publisher_retry(struct publisher_entry *pe, int delay, const char *msg) { struct pubdata *pd; publisher_check(pe); g_assert(NULL == pe->publish_ev); g_assert(delay > 0); pd = get_pubdata(pe->sha1); if (pd != NULL) { pd->next_enqueue = time_advance(tm_time(), UNSIGNED(delay)); dbmw_write(db_pubdata, pe->sha1, pd, sizeof *pd); } pe->publish_ev = cq_insert(publish_cq, delay * 1000, handle_entry, pe); pe->last_delayed = tm_time(); if (GNET_PROPERTY(publisher_debug) > 3) { shared_file_t *sf = shared_file_by_sha1(pe->sha1); g_debug("PUBLISHER will retry SHA-1 %s %s\"%s\" in %s: %s", sha1_to_string(pe->sha1), (sf && sf != SHARE_REBUILDING && shared_file_is_partial(sf)) ? "partial " : "", (sf && sf != SHARE_REBUILDING) ? shared_file_name_nfc(sf) : "", compact_time(delay), msg != NULL ? msg : "<no reason>"); shared_file_unref(&sf); } }
/** * Add `comp' to the current completed count, and update the amount of * bytes transferred. Note that `comp' can be zero. * When `update_dtime' is TRUE, we update the "done time", otherwise we * change the "last request time". * * If the row does not exist (race condition: deleted since upload started), * recreate one. */ static void upload_stats_file_add( const shared_file_t *sf, int comp, guint64 sent, gboolean update_dtime) { const char *pathname = shared_file_path(sf); filesize_t size = shared_file_size(sf); struct ul_stats *s; const struct sha1 *sha1; g_assert(comp >= 0); sha1 = sha1_hash_available(sf) ? shared_file_sha1(sf) : NULL; /* find this file in the ul_stats_clist */ s = upload_stats_find(sha1, pathname, size); /* increment the completed counter */ if (NULL == s) { /* uh oh, row has since been deleted, add it: 1 attempt */ upload_stats_add(pathname, size, shared_file_name_nfc(sf), 1, comp, sent, tm_time(), tm_time(), sha1); } else { s->bytes_sent += sent; s->norm = 1.0 * s->bytes_sent / s->size; s->complete += comp; if (update_dtime) s->dtime = tm_time(); else s->rtime = tm_time(); gcu_upload_stats_gui_update(s); } dirty = TRUE; /* Request asynchronous save of stats */ }
/** * Called when an upload starts. */ void upload_stats_file_begin(const shared_file_t *sf) { struct ul_stats *s; const char *pathname; filesize_t size; const struct sha1 *sha1; g_return_if_fail(sf); pathname = shared_file_path(sf); size = shared_file_size(sf); sha1 = sha1_hash_available(sf) ? shared_file_sha1(sf) : NULL; /* find this file in the ul_stats_clist */ s = upload_stats_find(sha1, pathname, size); /* increment the attempted counter */ if (NULL == s) { upload_stats_add(pathname, size, shared_file_name_nfc(sf), 1, 0, 0, tm_time(), 0, sha1); } else { s->attempts++; s->rtime = tm_time(); gcu_upload_stats_gui_update(s); } dirty = TRUE; /* Request asynchronous save of stats */ }
/** * Make sure the filename associated to a SHA1 is given the name of * the shared file and no longer bears the name of the partial file. * This can happen when the partial file is seeded then the file is * renamed and shared. */ void upload_stats_enforce_local_filename(const shared_file_t *sf) { struct ul_stats *s; const struct sha1 *sha1; const char *name; if (!upload_stats_by_sha1) return; /* Nothing known by SHA1 yet */ sha1 = sha1_hash_available(sf) ? shared_file_sha1(sf) : NULL; if (!sha1) return; /* File's SHA1 not known yet, nothing to do here */ s = g_hash_table_lookup(upload_stats_by_sha1, sha1); if (NULL == s) return; /* SHA1 not in stats, nothing to do */ name = shared_file_name_nfc(sf); if (name == s->filename) /* Both are string atoms */ return; /* Everything is fine */ /* * We need to update the filename to match the shared file. */ hash_list_remove(upload_stats_list, s); atom_str_change(&s->pathname, shared_file_path(sf)); atom_str_change(&s->filename, name); hash_list_append(upload_stats_list, s); gcu_upload_stats_gui_update_name(s); }
/** * Delete pubdata from database. */ static void delete_pubdata(const sha1_t *sha1) { dbmw_delete(db_pubdata, sha1); if (GNET_PROPERTY(publisher_debug) > 2) { shared_file_t *sf = shared_file_by_sha1(sha1); g_debug("PUBLISHER SHA-1 %s %s\"%s\" reclaimed", sha1_to_string(sha1), (sf && sf != SHARE_REBUILDING && shared_file_is_partial(sf)) ? "partial " : "", (sf && sf != SHARE_REBUILDING) ? shared_file_name_nfc(sf) : ""); shared_file_unref(&sf); } }
static bool huge_spam_check(shared_file_t *sf, const struct sha1 *sha1) { if (NULL != sha1 && spam_sha1_check(sha1)) { g_warning("file \"%s\" is listed as spam (SHA1)", shared_file_path(sf)); return TRUE; } if ( spam_check_filename_size(shared_file_name_nfc(sf), shared_file_size(sf)) ) { g_warning("file \"%s\" is listed as spam (Name)", shared_file_path(sf)); return TRUE; } return FALSE; }
/** * Writes the browse host data of the context ``ctx'' to the buffer * ``dest''. This must be called multiple times to retrieve the complete * data until zero is returned i.e., the end of file is reached. * * This routine deals with HTML data generation. * * @param ctx an initialized browse host context. * @param dest the destination buffer. * @param size the amount of bytes ``dest'' can hold. * * @return -1 on failure, zero at the end-of-file condition or if size * was zero. On success, the amount of bytes copied to ``dest'' * is returned. */ static ssize_t browse_host_read_html(struct special_upload *ctx, void *const dest, size_t size) { static const char header[] = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\r\n" "<html>\r\n" "<head>\r\n" "<title>Browse Host</title>\r\n" "</head>\r\n" "<body>\r\n"; static const char trailer[] = "</ul>\r\n</body>\r\n</html>\r\n"; struct browse_host_upload *bh = cast_to_browse_host_upload(ctx); char *p = dest; g_assert(NULL != bh); g_assert(NULL != dest); g_assert(size <= INT_MAX); g_assert(UNSIGNED(bh->state) < NUM_BH_STATES); g_assert(bh->b_size <= INT_MAX); g_assert(bh->b_offset <= bh->b_size); do { switch (bh->state) { case BH_STATE_HEADER: if (!bh->b_data) { bh->b_data = header; bh->b_size = CONST_STRLEN(header); } p += browse_host_read_data(bh, p, &size); if (bh->b_size == bh->b_offset) browse_host_next_state(bh, BH_STATE_LIBRARY_INFO); break; case BH_STATE_LIBRARY_INFO: if (!bh->b_data) { bh->w_buf_size = w_concat_strings(&bh->w_buf, "<h1>", product_get_name(), "</h1>\r\n" "<h3>", version_get_string(), " sharing ", uint64_to_string(shared_files_scanned()), " file", shared_files_scanned() == 1 ? "" : "s", " ", short_kb_size(shared_kbytes_scanned(), GNET_PROPERTY(display_metric_units)), " total</h3>\r\n" "<ul>\r\n", (void *) 0); bh->b_data = bh->w_buf; bh->b_size = bh->w_buf_size - 1; /* minus trailing NUL */ bh->b_offset = 0; } p += browse_host_read_data(bh, p, &size); if (bh->b_size == bh->b_offset) browse_host_next_state(bh, BH_STATE_FILES); break; case BH_STATE_TRAILER: if (!bh->b_data) { bh->b_data = trailer; bh->b_size = CONST_STRLEN(trailer); } p += browse_host_read_data(bh, p, &size); if (bh->b_size == bh->b_offset) browse_host_next_state(bh, BH_STATE_EOF); break; case BH_STATE_FILES: if (bh->b_data && bh->b_size == bh->b_offset) { g_assert(bh->w_buf == bh->b_data); wfree(bh->w_buf, bh->w_buf_size); bh->w_buf = NULL; bh->w_buf_size = 0; bh->b_data = NULL; } if (!bh->b_data) { const shared_file_t *sf; bh->file_index++; sf = shared_file_sorted(bh->file_index); if (!sf) { if (bh->file_index > shared_files_scanned()) browse_host_next_state(bh, BH_STATE_TRAILER); /* Skip holes in the file_index table */ } else if (SHARE_REBUILDING == sf) { browse_host_next_state(bh, BH_STATE_REBUILDING); } else { const char * const name_nfc = shared_file_name_nfc(sf); const filesize_t file_size = shared_file_size(sf); size_t html_size; char *html_name; { const char *dir; char *name; dir = shared_file_relative_path(sf); if (dir) { name = h_strconcat(dir, "/", name_nfc, (void *) 0); } else { name = deconstify_char(name_nfc); } html_size = 1 + html_escape(name, NULL, 0); html_name = walloc(html_size); html_escape(name, html_name, html_size); if (name != name_nfc) { HFREE_NULL(name); } } if (sha1_hash_available(sf)) { const struct sha1 *sha1 = shared_file_sha1(sf); bh->w_buf_size = w_concat_strings(&bh->w_buf, "<li><a href=\"/uri-res/N2R?urn:sha1:", sha1_base32(sha1), "\">", html_name, "</a> [", short_html_size(file_size, GNET_PROPERTY(display_metric_units)), "]</li>\r\n", (void *) 0); } else { char *escaped; escaped = url_escape(name_nfc); bh->w_buf_size = w_concat_strings(&bh->w_buf, "<li><a href=\"/get/", uint32_to_string(shared_file_index(sf)), "/", escaped, "\">", html_name, "</a>" " [", short_html_size(file_size, GNET_PROPERTY(display_metric_units)), "]</li>\r\n", (void *) 0); if (escaped != name_nfc) { HFREE_NULL(escaped); } } wfree(html_name, html_size); bh->b_data = bh->w_buf; bh->b_size = bh->w_buf_size - 1; /* minus trailing NUL */ bh->b_offset = 0; } } if (bh->b_data) p += browse_host_read_data(bh, p, &size); break; case BH_STATE_REBUILDING: if (!bh->b_data) { static const char msg[] = "<li>" "<b>" "The library is currently being rebuild. Please, " "try again in a moment." "</b>" "</li>"; bh->b_data = msg; bh->b_size = CONST_STRLEN(msg); } p += browse_host_read_data(bh, p, &size); if (bh->b_size == bh->b_offset) browse_host_next_state(bh, BH_STATE_TRAILER); break; case BH_STATE_EOF: return p - cast_to_char_ptr(dest); case NUM_BH_STATES: g_assert_not_reached(); } } while (size > 0); return p - cast_to_char_ptr(dest); }
/** * Add file to the current query hit. * * @return TRUE if we kept the file, FALSE if we did not include it in the hit. */ static bool g2_build_qh2_add(struct g2_qh2_builder *ctx, const shared_file_t *sf) { const sha1_t *sha1; g2_tree_t *h, *c; shared_file_check(sf); /* * Make sure the file is still in the library. */ if (0 == shared_file_index(sf)) return FALSE; /* * On G2, the H/URN child is required, meaning we need the SHA1 at least. */ if (!sha1_hash_available(sf)) return FALSE; /* * Do not send duplicates, as determined by the SHA1 of the resource. * * A user may share several files with different names but the same SHA1, * and if all of them are hits, we only want to send one instance. * * When generating hits for host-browsing, we do not care about duplicates * and ctx->hs is NULL then. */ sha1 = shared_file_sha1(sf); /* This is an atom */ if (ctx->hs != NULL) { if (hset_contains(ctx->hs, sha1)) return FALSE; hset_insert(ctx->hs, sha1); } /* * Create the "H" child and attach it to the current tree. */ if (NULL == ctx->t) g2_build_qh2_start(ctx); h = g2_tree_alloc_empty("H"); g2_tree_add_child(ctx->t, h); /* * URN -- Universal Resource Name * * If there is a known TTH, then we can generate a bitprint, otherwise * we just convey the SHA1. */ { const tth_t * const tth = shared_file_tth(sf); char payload[SHA1_RAW_SIZE + TTH_RAW_SIZE + sizeof G2_URN_BITPRINT]; char *p = payload; if (NULL == tth) { p = mempcpy(p, G2_URN_SHA1, sizeof G2_URN_SHA1); p += clamp_memcpy(p, sizeof payload - ptr_diff(p, payload), sha1, SHA1_RAW_SIZE); } else { p = mempcpy(p, G2_URN_BITPRINT, sizeof G2_URN_BITPRINT); p += clamp_memcpy(p, sizeof payload - ptr_diff(p, payload), sha1, SHA1_RAW_SIZE); p += clamp_memcpy(p, sizeof payload - ptr_diff(p, payload), tth, TTH_RAW_SIZE); } g_assert(ptr_diff(p, payload) <= sizeof payload); c = g2_tree_alloc_copy("URN", payload, ptr_diff(p, payload)); g2_tree_add_child(h, c); } /* * URL -- empty to indicate that we share the file via uri-res. */ if (ctx->flags & QHIT_F_G2_URL) { uint known; uint16 csc; c = g2_tree_alloc_empty("URL"); g2_tree_add_child(h, c); /* * CSC -- if we know alternate sources, indicate how many in "CSC". * * This child is only emitted when they requested "URL". */ known = dmesh_count(sha1); csc = MIN(known, MAX_INT_VAL(uint16)); if (csc != 0) { char payload[2]; poke_le16(payload, csc); c = g2_tree_alloc_copy("CSC", payload, sizeof payload); g2_tree_add_child(h, c); } /* * PART -- if we only have a partial file, indicate how much we have. * * This child is only emitted when they requested "URL". */ if (shared_file_is_partial(sf) && !shared_file_is_finished(sf)) { filesize_t available = shared_file_available(sf); char payload[8]; /* If we have to encode file size as 64-bit */ uint32 av32; time_t mtime = shared_file_modification_time(sf); c = g2_tree_alloc_empty("PART"); g2_tree_add_child(h, c); av32 = available; if (av32 == available) { /* Fits within a 32-bit quantity */ poke_le32(payload, av32); g2_tree_set_payload(c, payload, sizeof av32, TRUE); } else { /* Encode as a 64-bit quantity then */ poke_le64(payload, available); g2_tree_set_payload(c, payload, sizeof payload, TRUE); } /* * GTKG extension: encode the last modification time of the * partial file in an "MT" child. This lets the other party * determine whether the host is still able to actively complete * the file. */ poke_le32(payload, (uint32) mtime); g2_tree_add_child(c, g2_tree_alloc_copy("MT", payload, sizeof(uint32))); } /* * CT -- creation time of the resource (GTKG extension). */ { time_t create_time = shared_file_creation_time(sf); if ((time_t) -1 != create_time) { char payload[8]; int n; create_time = MAX(0, create_time); n = vlint_encode(create_time, payload); g2_tree_add_child(h, g2_tree_alloc_copy("CT", payload, n)); /* No trailing 0s */ } } } /* * DN -- distinguished name. * * Note that the presence of DN also governs the presence of SZ if the * file length does not fit a 32-bit unsigned quantity. */ if (ctx->flags & QHIT_F_G2_DN) { char payload[8]; /* If we have to encode file size as 64-bit */ uint32 fs32; filesize_t fs = shared_file_size(sf); const char *name; const char *rp; c = g2_tree_alloc_empty("DN"); fs32 = fs; if (fs32 == fs) { /* Fits within a 32-bit quantity */ poke_le32(payload, fs32); g2_tree_set_payload(c, payload, sizeof fs32, TRUE); } else { /* Does not fit a 32-bit quantity, emit a SZ child */ poke_le64(payload, fs); g2_tree_add_child(h, g2_tree_alloc_copy("SZ", payload, sizeof payload)); } name = shared_file_name_nfc(sf); g2_tree_append_payload(c, name, shared_file_name_nfc_len(sf)); g2_tree_add_child(h, c); /* * GTKG extension: if there is a file path, expose it as a "P" child * under the DN node. */ rp = shared_file_relative_path(sf); if (rp != NULL) { g2_tree_add_child(c, g2_tree_alloc_copy("P", rp, strlen(rp))); } } /* * GTKG extension: if they requested alt-locs in the /Q2/I with "A", then * send them some known alt-locs in an "ALT" child. * * Note that these alt-locs can be for Gnutella hosts: since both Gnutella * and G2 share a common HTTP-based file transfer mechanism with compatible * extra headers, there is no need to handle them separately. */ if (ctx->flags & QHIT_F_G2_ALT) { gnet_host_t hvec[G2_BUILD_QH2_MAX_ALT]; int hcnt = 0; hcnt = dmesh_fill_alternate(sha1, hvec, N_ITEMS(hvec)); if (hcnt > 0) { int i; c = g2_tree_alloc_empty("ALT"); for (i = 0; i < hcnt; i++) { host_addr_t addr; uint16 port; addr = gnet_host_get_addr(&hvec[i]); port = gnet_host_get_port(&hvec[i]); if (host_addr_is_ipv4(addr)) { char payload[6]; host_ip_port_poke(payload, addr, port, NULL); g2_tree_append_payload(c, payload, sizeof payload); } } /* * If the payload is still empty, then drop the "ALT" child. * Otherwise, attach it to the "H" node. */ if (NULL == g2_tree_node_payload(c, NULL)) { g2_tree_free_null(&c); } else { g2_tree_add_child(h, c); } } } /* * Update the size of the query hit we're generating. */ ctx->current_size += g2_frame_serialize(h, NULL, 0); return TRUE; }
/** * Do an actual search. * * @param table table containing organized entries to search from * @param search_term the query string * @param callback routine to invoke for each match * @param ctx user-supplied data to pass on to callback * @param max_res maximum amount of results to return * @param qhv query hash vector built from query string, for routing * * @return number of hits we produced */ G_GNUC_HOT int st_search( search_table_t *table, const char *search_term, st_search_callback callback, gpointer ctx, int max_res, query_hashvec_t *qhv) { char *search; int key, nres = 0; guint i, len; struct st_bin *best_bin = NULL; int best_bin_size = INT_MAX; word_vec_t *wovec; guint wocnt; cpattern_t **pattern; struct st_entry **vals; guint vcnt; int scanned = 0; /* measure search mask efficiency */ guint32 search_mask; size_t minlen; guint random_offset; /* Randomizer for search returns */ search = UNICODE_CANONIZE(search_term); if (GNET_PROPERTY(query_debug) > 4 && 0 != strcmp(search, search_term)) { char *safe_search = hex_escape(search, FALSE); char *safe_search_term = hex_escape(search_term, FALSE); g_debug("original search term: \"%s\"", safe_search_term); g_debug("canonical search term: \"%s\"", safe_search); if (safe_search != search) HFREE_NULL(safe_search); if (safe_search_term != search_term) HFREE_NULL(safe_search_term); } len = strlen(search); /* * Find smallest bin */ if (len >= 2) { for (i = 0; i < len - 1; i++) { struct st_bin *bin; if (is_ascii_space(search[i]) || is_ascii_space(search[i+1])) continue; key = st_key(table, search + i); if ((bin = table->bins[key]) == NULL) { best_bin = NULL; break; } if (bin->nvals < best_bin_size) { best_bin = bin; best_bin_size = bin->nvals; } } if (GNET_PROPERTY(matching_debug) > 4) g_debug("MATCH st_search(): str=\"%s\", len=%d, best_bin_size=%d", lazy_safe_search(search_term), len, best_bin_size); } /* * If the best_bin is NULL, we did not find a matching bin, and we're * sure we won't be able to find the search string. * * Note that on search strings like "r e m ", we always have a letter * followed by spaces, so we won't search that. * --RAM, 06/10/2001 */ if (best_bin == NULL) { /* * If we have a `qhv', we need to compute the word vector anway, * for query routing... */ if (qhv == NULL) goto finish; } /* * Prepare matching patterns */ wocnt = word_vec_make(search, &wovec); /* * Compute the query hashing information for query routing, if needed. */ if (qhv != NULL) { for (i = 0; i < wocnt; i++) { if (wovec[i].len >= QRP_MIN_WORD_LENGTH) qhvec_add(qhv, wovec[i].word, QUERY_H_WORD); } } if (wocnt == 0 || best_bin == NULL) { if (wocnt > 0) word_vec_free(wovec, wocnt); goto finish; } g_assert(best_bin_size > 0); /* Allocated bin, it must hold something */ pattern = walloc0(wocnt * sizeof *pattern); /* * Prepare matching optimization, an idea from Mike Green. * * At library building time, we computed a mask hash, made from the * lowercased file name, using one bit per different letter, roughly * (see mask_hash() for the exact algorigthm). * * We're now going to compute the same mask on the query, and compare * it bitwise with the mask for each file. If the file does not hold * at least all the chars present in the query, it's no use applying * the pattern matching algorithm, it won't match at all. * * --RAM, 01/10/2001 */ search_mask = mask_hash(search); /* * Prepare second matching optimization: since all words in the query * must match the exact amount of time, we can compute the minimum length * the searched file must have. We add one character after each word * but the last, to account for space between words. * --RAM, 11/07/2002 */ for (minlen = 0, i = 0; i < wocnt; i++) minlen += wovec[i].len + 1; minlen--; g_assert(minlen <= INT_MAX); /* * Search through the smallest bin */ vcnt = best_bin->nvals; vals = best_bin->vals; random_offset = random_u32() % vcnt; nres = 0; for (i = 0; i < vcnt; i++) { const struct st_entry *e; shared_file_t *sf; size_t canonic_len; /* * As we only return a limited count of results, pick a random * offset, so that repeated searches will match different items * instead of always the first - with some probability. */ e = vals[(i + random_offset) % vcnt]; if ((e->mask & search_mask) != search_mask) continue; /* Can't match */ sf = e->sf; canonic_len = shared_file_name_canonic_len(sf); if (canonic_len < minlen) continue; /* Can't match */ scanned++; if (entry_match(e->string, canonic_len, pattern, wovec, wocnt)) { if (GNET_PROPERTY(matching_debug) > 4) { g_debug("MATCH \"%s\" matches %s", search, shared_file_name_nfc(sf)); } if ((*callback)(ctx, sf)) { nres++; if (nres >= max_res) break; } } } if (GNET_PROPERTY(matching_debug) > 3) g_debug("MATCH st_search(): scanned %d entr%s from the %d in bin, " "got %d match%s", scanned, 1 == scanned ? "y" : "ies", best_bin_size, nres, 1 == nres ? "" : "es"); for (i = 0; i < wocnt; i++) if (pattern[i]) /* Lazily compiled by entry_match() */ pattern_free(pattern[i]); wfree(pattern, wocnt * sizeof *pattern); word_vec_free(wovec, wocnt); finish: if (search != search_term) { HFREE_NULL(search); } return nres; }
/** * Handle a SHA-1 entry, publishing its alt-loc to the DHT if still shared. */ static void publisher_handle(struct publisher_entry *pe) { shared_file_t *sf; bool is_partial = FALSE; int alt_locs; time_delta_t min_uptime; uint32 avg_uptime; publisher_check(pe); g_assert(NULL == pe->publish_ev); sf = shared_file_by_sha1(pe->sha1); /* * Remove SHA1 if no longer shared. */ if (NULL == sf) { fileinfo_t *fi = file_info_by_sha1(pe->sha1); /* * If a partial file has lees than the minimum amount of data for PFSP, * shared_file_by_sha1() will return NULL, hence we need to explicitly * check for existence through file_info_by_sha1() and that the file * still exists. */ if (fi != NULL && file_exists(fi->pathname)) { /* Waiting for more data to be able to share, or PFSP re-enabled */ publisher_retry(pe, PUBLISH_BUSY, "partial file missing"); return; } if (GNET_PROPERTY(publisher_debug)) { g_debug("PUBLISHER SHA-1 %s is no longer shared", sha1_to_string(pe->sha1)); } publisher_entry_free(pe, TRUE); return; } /* * Wait when rebuilding the library. */ if (SHARE_REBUILDING == sf) { publisher_retry(pe, PUBLISH_BUSY, "library being rebuilt"); return; } is_partial = shared_file_is_partial(sf); /* * If the SHA1 is not available, wait. */ if ( !is_partial && (!sha1_hash_available(sf) || !sha1_hash_is_uptodate(sf)) ) { publisher_retry(pe, PUBLISH_BUSY, "SHA-1 of file unknown yet"); goto done; } /* * Look whether this node has a sufficient average uptime. * * We're stricter to publish partial files because we want to favor * publishing of full files in the DHT, and the benefits of publishing * partial entries come only if we're up for a long enough time. * * Since publishing imposes lookup traffic in the DHT, it is not efficient * to have transient nodes publish file sharing information because this * will likely never be useful. */ min_uptime = PUBLISH_TRANSIENT; if (is_partial) min_uptime *= 2; avg_uptime = get_average_servent_uptime(tm_time()); if (avg_uptime < UNSIGNED(min_uptime)) { time_delta_t delay = min_uptime - avg_uptime; delay = MAX(delay, PUBLISH_BUSY); publisher_retry(pe, delay, "minimum average uptime not reached yet"); goto done; } /* * If we are dealing with a file for which we know enough alternate * locations, assume it is popular and do not publish it yet. * * We do not publish the SHA-1 of a partial file for which we know * of at least two alternate locations because the purpose of us publishing * these partial SHA-1s is to attract other PFSP-aware hosts and * recreate a mesh. */ alt_locs = dmesh_count(pe->sha1); is_partial = is_partial && !shared_file_is_finished(sf); if (alt_locs > (is_partial ? PUBLISH_PARTIAL_MAX : PUBLISH_DMESH_MAX)) { if (GNET_PROPERTY(publisher_debug)) { g_debug("PUBLISHER SHA-1 %s %s\"%s\" has %d download mesh " "entr%s, skipped", sha1_to_string(pe->sha1), is_partial ? "partial " : "", shared_file_name_nfc(sf), alt_locs, plural_y(alt_locs)); } publisher_hold(pe, PUBLISH_POPULAR, "popular file"); goto done; } /* * If the DHT is not enabled, postpone processing. */ if (!dht_enabled()) { publisher_hold(pe, PUBLISH_BUSY, "DHT disabled"); goto done; } /* * If this is a partial file for which we have less than the minimum * for PFSP sharing, or if PFSP has been disabled, skip it. */ if (shared_file_is_partial(sf)) { fileinfo_t *fi = shared_file_fileinfo(sf); if ( !file_info_partial_shareable(fi) || fi->done < GNET_PROPERTY(pfsp_minimum_filesize) ) { publisher_hold(pe, PUBLISH_BUSY, "PFSP minima not reached"); goto done; } } /* * Check whether it is time to process the entry, in case we're * restarting quickly after a shutdown. */ if (0 == pe->last_publish) { struct pubdata *pd = get_pubdata(pe->sha1); if (pd != NULL) { time_t now = tm_time(); time_delta_t enqueue = delta_time(pd->next_enqueue, now); time_delta_t expire = delta_time(pd->expiration, now); if (enqueue > 0 && (0 == pd->expiration || expire > 0)) { int delay = MIN(enqueue, PUBLISH_POPULAR); if (pd->expiration != 0) delay = MIN(delay, expire); if (GNET_PROPERTY(publisher_debug) > 1) { g_debug("PUBLISHER SHA-1 %s delayed by %s", sha1_to_string(pe->sha1), compact_time(enqueue)); } publisher_retry(pe, delay, "first-time delay"); goto done; } } } /* * Cancel possible remaining backgrounded publishing. */ if (pe->backgrounded) { pdht_cancel_file(pe->sha1, FALSE); pe->backgrounded = FALSE; } /* * OK, we can publish this alternate location. */ if (pe->last_publish) { if (GNET_PROPERTY(publisher_debug) > 2) { g_debug("PUBLISHER SHA-1 %s re-enqueued %d secs " "after last publish", sha1_to_string(pe->sha1), (int) delta_time(tm_time(), pe->last_publish)); } } pe->last_enqueued = tm_time(); pdht_publish_file(sf, publisher_done, pe); /* FALL THROUGH */ done: shared_file_unref(&sf); }
/** * Publishing callback invoked when asynchronous publication is completed, * or ended with an error. * * @return TRUE if we accept the publishing, FALSE otherwise to get the * publishing layer to continue attempts to failed STORE roots and report * on progress using the same callback. */ static bool publisher_done(void *arg, pdht_error_t code, const pdht_info_t *info) { struct publisher_entry *pe = arg; struct pubdata *pd; int delay = PUBLISH_BUSY; bool expired = FALSE; bool accepted = TRUE; publisher_check(pe); pd = get_pubdata(pe->sha1); /* * Update stats on republishing before value expiration. */ if (PDHT_E_OK == code) { if (pe->last_publish && info->roots > 0) { if (pd != NULL) { if (pd->expiration && delta_time(tm_time(), pd->expiration) > 0) expired = TRUE; } else { time_delta_t elapsed = delta_time(tm_time(), pe->last_publish); if (elapsed > DHT_VALUE_ALOC_EXPIRE) expired = TRUE; } if (expired) gnet_stats_inc_general(GNR_DHT_REPUBLISHED_LATE); } } /* * Compute retry delay. */ switch (code) { case PDHT_E_OK: /* * If we were not able to publish to KDA_K nodes, decrease the * delay before republishing. We use a non-linear decimation of * the republish time, as a function of the number of nodes to which * we could publish. */ delay = publisher_delay(info, DHT_VALUE_ALOC_EXPIRE); accepted = publisher_is_acceptable(info); break; case PDHT_E_POPULAR: /* * Compute the suitable delay: the first time, we use PUBLISH_POPULAR, * and then we double each time until we reach PUBLISH_POPULAR_MAX. * * If we already tried to publish the entry, pe->last_delayed will * be non-zero. */ if (0 != pe->last_delayed) { time_delta_t elapsed = delta_time(tm_time(), pe->last_delayed); if (elapsed < PUBLISH_POPULAR) { delay = PUBLISH_POPULAR; } else if (elapsed >= PUBLISH_POPULAR_MAX / 2) { delay = PUBLISH_POPULAR_MAX; } else { delay = elapsed * 2; } } else { delay = PUBLISH_POPULAR; } break; case PDHT_E_NOT_SHARED: case PDHT_E_LOOKUP_EXPIRED: case PDHT_E_LOOKUP: case PDHT_E_UDP_CLOGGED: case PDHT_E_PUBLISH_EXPIRED: case PDHT_E_PUBLISH_ERROR: case PDHT_E_SHA1: case PDHT_E_PENDING: case PDHT_E_CANCELLED: case PDHT_E_GGEP: case PDHT_E_NONE: delay = PUBLISH_BUSY; break; case PDHT_E_MAX: g_assert_not_reached(); } /* * For a backgrounded entry publishing, we need to adjust the computed * delay with the time that was elapsed */ g_assert(!pe->backgrounded == !(pe->publish_ev != NULL)); if (pe->backgrounded) { time_delta_t elapsed = delta_time(tm_time(), pe->last_delayed); g_assert(pe->last_delayed > 0); cq_cancel(&pe->publish_ev); if (delay > elapsed) { delay -= elapsed; } else { delay = 1; } } /* * Logging. */ if (GNET_PROPERTY(publisher_debug) > 1) { shared_file_t *sf = shared_file_by_sha1(pe->sha1); char retry[80]; char after[80]; const char *late = ""; after[0] = '\0'; if (pe->last_publish) { time_delta_t elapsed = delta_time(tm_time(), pe->last_publish); str_bprintf(after, sizeof after, " after %s", compact_time(elapsed)); if (pd != NULL) { if (expired) late = "late, "; } else { late = "no data, "; } } str_bprintf(retry, sizeof retry, "%s", compact_time(delay)); g_debug("PUBLISHER SHA-1 %s %s%s\"%s\" %spublished to %u node%s%s: %s" " (%stook %s, total %u node%s, proba %.3f%%, retry in %s," " %s bg, path %u) [%s]", sha1_to_string(pe->sha1), pe->backgrounded ? "[bg] " : "", (sf && sf != SHARE_REBUILDING && shared_file_is_partial(sf)) ? "partial " : "", (sf && sf != SHARE_REBUILDING) ? shared_file_name_nfc(sf) : "", pe->last_publish ? "re" : "", info->roots, plural(info->roots), after, pdht_strerror(code), late, compact_time(delta_time(tm_time(), pe->last_enqueued)), info->all_roots, plural(info->all_roots), info->presence * 100.0, retry, info->can_bg ? "can" : "no", info->path_len, accepted ? "OK" : "INCOMPLETE"); shared_file_unref(&sf); } /* * Update last publishing time and remember expiration time. */ if (PDHT_E_OK == code && info->roots > 0) { pe->last_publish = tm_time(); if (pd != NULL) { pd->expiration = time_advance(pe->last_publish, DHT_VALUE_ALOC_EXPIRE); dbmw_write(db_pubdata, pe->sha1, pd, sizeof *pd); } } /* * If entry was deemed popular, we're going to delay its republishing * by a larger amount of time and any data we published already about * it will surely expire. Since this is our decision, we do not want * to be told that republishing, if it occurs again, was done later than * required. Hence call publisher_hold() to mark that we don't care. */ if (PDHT_E_POPULAR == code) publisher_hold(pe, delay, "popular entry"); else publisher_retry(pe, delay, accepted ? "accepted publish" : "published"); pe->backgrounded = !accepted; return accepted; }