static void req_log(struct msg *req) { struct msg *rsp; /* peer message (response) */ int64_t req_time; /* time cost for this request */ char *peer_str; /* peer client ip:port */ uint32_t req_len, rsp_len; /* request and response length */ struct string *req_type; /* request type string */ if (log_loggable(LOG_NOTICE) == 0) { return; } /* a fragment? */ if (req->frag_id != 0 && req->frag_owner != req) { return; } /* conn close normally? */ if (req->mlen == 0) { return; } req_time = nc_usec_now() - req->start_ts; rsp = req->peer; req_len = req->mlen; rsp_len = (rsp != NULL) ? rsp->mlen : 0; if (req->key_end) { req->key_end[0] = '\0'; } /* * FIXME: add backend addr here * Maybe we can store addrstr just like server_pool in conn struct * when connections are resolved */ peer_str = nc_unresolve_peer_desc(req->owner->sd); req_type = msg_type_string(req->type); log_debug(LOG_NOTICE, "req %"PRIu64" done on c %d req_time %"PRIi64".%03"PRIi64 " msec type %.*s narg %"PRIu32" req_len %"PRIu32" rsp_len %"PRIu32 " key0 '%s' peer '%s' done %d error %d", req->id, req->owner->sd, req_time / 1000, req_time % 1000, req_type->len, req_type->data, req->narg, req_len, rsp_len, req->key_start, peer_str, req->done, req->error); }
static void server_failure(struct context *ctx, struct server *server) { struct server_pool *pool = server->owner; int64_t now, next; rstatus_t status; if (!pool->auto_eject_hosts) { return; } server->failure_count++; log_debug(LOG_VERB, "server '%.*s' failure count %"PRIu32" limit %"PRIu32, server->pname.len, server->pname.data, server->failure_count, pool->server_failure_limit); if (server->failure_count < pool->server_failure_limit) { return; } now = nc_usec_now(); if (now < 0) { return; } stats_server_set_ts(ctx, server, server_ejected_at, now); next = now + pool->server_retry_timeout; log_debug(LOG_INFO, "update pool %"PRIu32" '%.*s' to delete server '%.*s' " "for next %"PRIu32" secs", pool->idx, pool->name.len, pool->name.data, server->pname.len, server->pname.data, pool->server_retry_timeout / 1000 / 1000); stats_pool_incr(ctx, pool, server_ejects); server->failure_count = 0; server->next_retry = next; status = server_pool_run(pool); if (status != NC_OK) { log_error("updating pool %"PRIu32" '%.*s' failed: %s", pool->idx, pool->name.len, pool->name.data, strerror(errno)); } }
static rstatus_t server_pool_update(struct server_pool *pool) { rstatus_t status; int64_t now; uint32_t pnlive_server; /* prev # live server */ if (!pool->auto_eject_hosts) { return NC_OK; } if (pool->next_rebuild == 0LL) { return NC_OK; } now = nc_usec_now(); if (now < 0) { return NC_ERROR; } if (now <= pool->next_rebuild) { if (pool->nlive_server == 0) { errno = ECONNREFUSED; return NC_ERROR; } return NC_OK; } pnlive_server = pool->nlive_server; status = server_pool_run(pool); if (status != NC_OK) { log_error("updating pool %"PRIu32" with dist %d failed: %s", pool->idx, pool->dist_type, strerror(errno)); return status; } log_debug(LOG_INFO, "update pool %"PRIu32" '%.*s' to add %"PRIu32" servers", pool->idx, pool->name.len, pool->name.data, pool->nlive_server - pnlive_server); return NC_OK; }
/* * Return the current time in milliseconds since Epoch * 返回当前的毫秒 */ int64_t nc_msec_now(void) { return nc_usec_now() / 1000LL; }
static void req_log(struct msg *req) { struct msg *rsp; /* peer message (response) */ int64_t req_time; /* time cost for this request */ char *peer_str; /* peer client ip:port */ uint32_t req_len, rsp_len; /* request and response length */ struct string *req_type; /* request type string */ struct keypos *kpos; if (log_loggable(LOG_NOTICE) == 0) { return; } /* a fake request? */ if (req->owner == NULL) { return; } /* a fragment? */ if (req->frag_id != 0 && req->frag_owner != req) { return; } /* conn close normally? */ if (req->mlen == 0) { return; } /* * there is a race scenario where a requests comes in, the log level is not LOG_NOTICE, * and before the response arrives you modify the log level to LOG_NOTICE * using SIGTTIN OR SIGTTOU, then req_log() wouldn't have msg->start_ts set */ if (req->start_ts == 0) { return; } req_time = nc_usec_now() - req->start_ts; rsp = req->peer; req_len = req->mlen; rsp_len = (rsp != NULL) ? rsp->mlen : 0; if (array_n(req->keys) < 1) { return; } kpos = array_get(req->keys, 0); if (kpos->end != NULL) { *(kpos->end) = '\0'; } /* * FIXME: add backend addr here * Maybe we can store addrstr just like server_pool in conn struct * when connections are resolved */ peer_str = nc_unresolve_peer_desc(req->owner->sd); req_type = msg_type_string(req->type); log_debug(LOG_NOTICE, "req %"PRIu64" done on c %d req_time %"PRIi64".%03"PRIi64 " msec type %.*s narg %"PRIu32" req_len %"PRIu32" rsp_len %"PRIu32 " key0 '%s' peer '%s' done %d error %d", req->id, req->owner->sd, req_time / 1000, req_time % 1000, req_type->len, req_type->data, req->narg, req_len, rsp_len, kpos->start, peer_str, req->done, req->error); }
rstatus_t modula_update(struct server_pool *pool) { uint32_t nserver; /* # server - live and dead */ uint32_t nlive_server; /* # live server */ uint32_t pointer_per_server; /* pointers per server proportional to weight */ uint32_t pointer_counter; /* # pointers on continuum */ uint32_t points_per_server; /* points per server */ uint32_t continuum_index; /* continuum index */ uint32_t continuum_addition; /* extra space in the continuum */ uint32_t server_index; /* server index */ uint32_t weight_index; /* weight index */ uint32_t total_weight; /* total live server weight */ int64_t now; /* current timestamp in usec */ now = nc_usec_now(); if (now < 0) { return NC_ERROR; } nserver = array_n(&pool->server); nlive_server = 0; total_weight = 0; pool->next_rebuild = 0LL; for (server_index = 0; server_index < nserver; server_index++) { struct server *server = array_get(&pool->server, server_index); if (pool->auto_eject_hosts) { if (server->fail == 0) { nlive_server++; } } else { nlive_server++; } ASSERT(server->weight > 0); /* count weight only for live servers */ if (!pool->auto_eject_hosts || server->fail == 0) { total_weight += server->weight; } } pool->nlive_server = nlive_server; if (nlive_server == 0) { ASSERT(pool->continuum != NULL); ASSERT(pool->ncontinuum != 0); log_debug(LOG_DEBUG, "no live servers for pool %"PRIu32" '%.*s'", pool->idx, pool->name.len, pool->name.data); return NC_OK; } log_debug(LOG_DEBUG, "%"PRIu32" of %"PRIu32" servers are live for pool " "%"PRIu32" '%.*s'", nlive_server, nserver, pool->idx, pool->name.len, pool->name.data); continuum_addition = MODULA_CONTINUUM_ADDITION; points_per_server = MODULA_POINTS_PER_SERVER; /* * Allocate the continuum for the pool, the first time, and every time we * add a new server to the pool */ if (total_weight > pool->nserver_continuum) { struct continuum *continuum; uint32_t nserver_continuum = total_weight + MODULA_CONTINUUM_ADDITION; uint32_t ncontinuum = nserver_continuum * MODULA_POINTS_PER_SERVER; continuum = nc_realloc(pool->continuum, sizeof(*continuum) * ncontinuum); if (continuum == NULL) { return NC_ENOMEM; } pool->continuum = continuum; pool->nserver_continuum = nserver_continuum; /* pool->ncontinuum is initialized later as it could be <= ncontinuum */ } /* update the continuum with the servers that are live */ continuum_index = 0; pointer_counter = 0; for (server_index = 0; server_index < nserver; server_index++) { struct server *server = array_get(&pool->server, server_index); if (pool->auto_eject_hosts && server->next_retry > now) { continue; } for (weight_index = 0; weight_index < server->weight; weight_index++) { pointer_per_server = 1; pool->continuum[continuum_index].index = server_index; pool->continuum[continuum_index++].value = 0; pointer_counter += pointer_per_server; } } pool->ncontinuum = pointer_counter; log_debug(LOG_VERB, "updated pool %"PRIu32" '%.*s' with %"PRIu32" of " "%"PRIu32" servers live in %"PRIu32" slots and %"PRIu32" " "active points in %"PRIu32" slots", pool->idx, pool->name.len, pool->name.data, nlive_server, nserver, pool->nserver_continuum, pool->ncontinuum, (pool->nserver_continuum + continuum_addition) * points_per_server); return NC_OK; }
rstatus_t ketama_update(struct server_pool *pool) { uint32_t nserver; /* # server - live and dead */ uint32_t nlive_server; /* # live server */ uint32_t pointer_per_server; /* pointers per server proportional to weight */ uint32_t pointer_per_hash; /* pointers per hash */ uint32_t pointer_counter; /* # pointers on continuum */ uint32_t pointer_index; /* pointer index */ uint32_t points_per_server; /* points per server */ uint32_t continuum_index; /* continuum index */ uint32_t continuum_addition; /* extra space in the continuum */ uint32_t server_index; /* server index */ uint32_t value; /* continuum value */ uint32_t total_weight; /* total live server weight */ int64_t now; /* current timestamp in usec */ ASSERT(array_n(&pool->server) > 0); now = nc_usec_now(); if (now < 0) { return NC_ERROR; } /* * Count live servers and total weight, and also update the next time to * rebuild the distribution */ nserver = array_n(&pool->server); nlive_server = 0; total_weight = 0; pool->next_rebuild = 0LL; for (server_index = 0; server_index < nserver; server_index++) { struct server *server = array_get(&pool->server, server_index); if (pool->auto_eject_hosts) { if (server->next_retry <= now) { server->next_retry = 0LL; nlive_server++; } else if (pool->next_rebuild == 0LL || server->next_retry < pool->next_rebuild) { pool->next_rebuild = server->next_retry; } } else { nlive_server++; } ASSERT(server->weight > 0); /* count weight only for live servers */ if (!pool->auto_eject_hosts || server->next_retry <= now) { total_weight += server->weight; } } pool->nlive_server = nlive_server; if (nlive_server == 0) { log_debug(LOG_DEBUG, "no live servers for pool %"PRIu32" '%.*s'", pool->idx, pool->name.len, pool->name.data); return NC_OK; } log_debug(LOG_DEBUG, "%"PRIu32" of %"PRIu32" servers are live for pool " "%"PRIu32" '%.*s'", nlive_server, nserver, pool->idx, pool->name.len, pool->name.data); continuum_addition = KETAMA_CONTINUUM_ADDITION; points_per_server = KETAMA_POINTS_PER_SERVER; /* * Allocate the continuum for the pool, the first time, and every time we * add a new server to the pool */ if (nlive_server > pool->nserver_continuum) { struct continuum *continuum; uint32_t nserver_continuum = nlive_server + continuum_addition; uint32_t ncontinuum = nserver_continuum * points_per_server; continuum = nc_realloc(pool->continuum, sizeof(*continuum) * ncontinuum); if (continuum == NULL) { return NC_ENOMEM; } pool->continuum = continuum; pool->nserver_continuum = nserver_continuum; /* pool->ncontinuum is initialized later as it could be <= ncontinuum */ } /* * Build a continuum with the servers that are live and points from * these servers that are proportial to their weight */ continuum_index = 0; pointer_counter = 0; for (server_index = 0; server_index < nserver; server_index++) { struct server *server; float pct; server = array_get(&pool->server, server_index); if (pool->auto_eject_hosts && pool->gutter == NULL && server->next_retry > now) { continue; } pct = (float)server->weight / (float)total_weight; pointer_per_server = (uint32_t) ((floorf((float) (pct * KETAMA_POINTS_PER_SERVER / 4 * (float)nlive_server + 0.0000000001))) * 4); pointer_per_hash = 4; log_debug(LOG_VERB, "%.*s:%"PRIu16" weight %"PRIu32" of %"PRIu32" " "pct %0.5f points per server %"PRIu32"", server->name.len, server->name.data, server->port, server->weight, total_weight, pct, pointer_per_server); for (pointer_index = 1; pointer_index <= pointer_per_server / pointer_per_hash; pointer_index++) { char host[KETAMA_MAX_HOSTLEN]= ""; size_t hostlen; uint32_t x; hostlen = snprintf(host, KETAMA_MAX_HOSTLEN, "%.*s-%u", server->name.len, server->name.data, pointer_index - 1); for (x = 0; x < pointer_per_hash; x++) { value = ketama_hash(host, hostlen, x); pool->continuum[continuum_index].index = server_index; pool->continuum[continuum_index++].value = value; } } pointer_counter += pointer_per_server; } pool->ncontinuum = pointer_counter; qsort(pool->continuum, pool->ncontinuum, sizeof(*pool->continuum), ketama_item_cmp); for (pointer_index = 0; pointer_index < ((nlive_server * KETAMA_POINTS_PER_SERVER) - 1); pointer_index++) { if (pointer_index + 1 >= pointer_counter) { break; } ASSERT(pool->continuum[pointer_index].value <= pool->continuum[pointer_index + 1].value); } log_debug(LOG_VERB, "updated pool %"PRIu32" '%.*s' with %"PRIu32" of " "%"PRIu32" servers live in %"PRIu32" slots and %"PRIu32" " "active points in %"PRIu32" slots", pool->idx, pool->name.len, pool->name.data, nlive_server, nserver, pool->nserver_continuum, pool->ncontinuum, (pool->nserver_continuum + continuum_addition) * points_per_server); return NC_OK; }
rstatus_t ketama_update(struct server_pool *pool) { uint32_t nserver; /* # server - live and dead */ uint32_t nlive_server; /* # live server */ uint32_t pointer_per_server; /* pointers per server proportional to weight */ uint32_t pointer_per_hash; /* pointers per hash */ uint32_t pointer_counter; /* # pointers on continuum */ uint32_t pointer_index; /* pointer index */ uint32_t points_per_server; /* points per server */ uint32_t continuum_index; /* continuum index */ uint32_t continuum_addition; /* extra space in the continuum */ uint32_t server_index; /* server index */ uint32_t value; /* continuum value */ uint32_t total_weight; /* total live server weight */ int64_t now; /* current timestamp in usec */ ASSERT(array_n(&pool->server) > 0); now = nc_usec_now(); if (now < 0) { return NC_ERROR; } /* * Count live servers and total weight, and also update the next time to * rebuild the distribution */ nserver = array_n(&pool->server); nlive_server = 0; total_weight = 0; pool->next_rebuild = 0LL; for (server_index = 0; server_index < nserver; server_index++) { struct server *server = array_get(&pool->server, server_index); if (pool->auto_eject_hosts) { if (server->next_retry <= now) { server->next_retry = 0LL; nlive_server++; } else if (pool->next_rebuild == 0LL || server->next_retry < pool->next_rebuild) { pool->next_rebuild = server->next_retry; } } else { nlive_server++; } ASSERT(server->weight > 0); /* count weight only for live servers */ if (!pool->auto_eject_hosts || server->next_retry <= now) { total_weight += server->weight; } } pool->nlive_server = nlive_server; if (nlive_server == 0) { log_debug(LOG_DEBUG, "no live servers for pool %"PRIu32" '%.*s'", pool->idx, pool->name.len, pool->name.data); return NC_OK; } //log_debug(LOG_DEBUG, "%"PRIu32" of %"PRIu32" servers are live for pool " loga("%"PRIu32" of %"PRIu32" servers are live for pool " "%"PRIu32" '%.*s'", nlive_server, nserver, pool->idx, pool->name.len, pool->name.data); /* * Allocate the continuum for the pool, the first time, and every time we * add a new server to the pool */ if (nlive_server > pool->nserver_continuum) { struct continuum *continuum; //uint32_t nserver_continuum = nlive_server + continuum_addition; //uint32_t ncontinuum = nserver_continuum * points_per_server; uint32_t nserver_continuum = nlive_server; uint32_t ncontinuum = nserver_continuum; continuum = nc_realloc(pool->continuum, sizeof(*continuum) * ncontinuum); if (continuum == NULL) { return NC_ENOMEM; } pool->continuum = continuum; pool->nserver_continuum = nserver_continuum; pool->ncontinuum = ncontinuum; loga("pool->continuum: %u pool->ncontinuum: %u, pool->nserver_continuum: %u", pool->continuum, pool->ncontinuum, pool->nserver_continuum); /* pool->ncontinuum is initialized later as it could be <= ncontinuum */ } /* * Build a continuum with the servers that are live and points from * these servers that are proportial to their weight */ continuum_index = 0; pointer_counter = 0; for (server_index = 0; server_index < nserver; server_index++) { struct server *server; float pct; server = array_get(&pool->server, server_index); if (pool->auto_eject_hosts && server->next_retry > now) { continue; } uint32_t hash_value = strtoul (server->name.data, NULL, 10); pool->continuum[continuum_index].index = server_index; pool->continuum[continuum_index++].value = hash_value; loga("+++++++ hash_value: %u index: %u +++++++", hash_value, server_index); } return NC_OK; }