void mcache_delete(mcache *m, char *key, int key_len) { (void)key_len; (void)key; assert(key); assert(key_len > 0); assert(key[key_len] == '\0' || key[key_len] == ' '); if (m == NULL) { return; } if (m->lock) { cb_mutex_enter(m->lock); } if (m->map != NULL) { void *existing = genhash_find(m->map, key); if (existing != NULL) { mcache_item_unlink(m, existing); genhash_delete(m->map, key); m->tot_deletes++; if (settings.verbose > 1) { moxi_log_write("mcache delete: %s\n", key); } } } if (m->lock) { cb_mutex_exit(m->lock); } }
static ENGINE_ERROR_CODE mock_get(ENGINE_HANDLE* handle, const void* cookie, item** item, const void* key, const int nkey) { *item = genhash_find(get_ht(handle), key, nkey); return *item ? ENGINE_SUCCESS : ENGINE_KEY_ENOENT; }
static ENGINE_ERROR_CODE mock_get(ENGINE_HANDLE* handle, const void* cookie, item** itm, const void* key, const int nkey, uint16_t vbucket) { (void)cookie; (void)vbucket; *itm = genhash_find(get_ht(handle), key, nkey); return *itm ? ENGINE_SUCCESS : ENGINE_KEY_ENOENT; }
static void assert_hash_val(char *expected, genhash_t* h, const char* key) { char *found=(char*)genhash_find(h, key); if(expected == NULL) { if(found != NULL) { fprintf(stderr, "Expected NULL for ``%s'', got ``%s''\n", key, found); abort(); } } else { if(strcmp(expected, found) != 0) { fprintf(stderr, "Expected ``%s'' for ``%s'', got ``%s''\n", expected, key, found); abort(); } } }
/** * Observe callback. Called internally by libcouchbase's observe handlers */ void lcb_durability_dset_update(lcb_t instance, lcb_durability_set_t *dset, lcb_error_t err, const lcb_observe_resp_t *resp) { lcb_durability_entry_t *ent; /** * So we have two counters to decrement. One is the global 'done' counter * and the other is the iteration counter. * * The iteration counter is only decremented when we receive a NULL signal * in the callback, whereas the global counter is decremented once, whenever * the entry's criteria have been satisfied */ if (resp->v.v0.key == NULL) { dset_done_waiting(dset); return; } if (dset->nentries == 1) { ent = &dset->single.ent; } else { ent = genhash_find(dset->ht, resp->v.v0.key, resp->v.v0.nkey); } if (ent->done) { /* ignore subsequent errors */ return; } if (err != LCB_SUCCESS) { RESFLD(ent, err) = err; return; } RESFLD(ent, nresponses)++; if (resp->v.v0.cas && resp->v.v0.from_master) { RESFLD(ent, cas) = resp->v.v0.cas; if (REQFLD(ent, cas) && REQFLD(ent, cas) != resp->v.v0.cas) { RESFLD(ent, err) = LCB_KEY_EEXISTS; ent_set_resdone(ent); return; } } if (DSET_OPTFLD(ent->parent, check_delete)) { check_negative_durability(ent, resp); } else { check_positive_durability(ent, resp); } if (ent_is_complete(ent)) { /* clear any transient errors */ RESFLD(ent, err) = LCB_SUCCESS; ent_set_resdone(ent); } (void)instance; }
bool multiget_ascii_downstream(downstream *d, conn *uc, int (*emit_start)(conn *c, char *cmd, int cmd_len), int (*emit_skey)(conn *c, char *skey, int skey_len), int (*emit_end)(conn *c), mcache *front_cache) { assert(d != NULL); assert(d->downstream_conns != NULL); assert(d->multiget == NULL); assert(uc != NULL); assert(uc->noreply == false); proxy_td *ptd = d->ptd; assert(ptd != NULL); proxy_stats_cmd *psc_get = &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET]; proxy_stats_cmd *psc_get_key = &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET_KEY]; int nwrite = 0; int nconns = mcs_server_count(&d->mst); for (int i = 0; i < nconns; i++) { if (d->downstream_conns[i] != NULL && cproxy_prep_conn_for_write(d->downstream_conns[i]) == false) { d->ptd->stats.stats.err_downstream_write_prep++; cproxy_close_conn(d->downstream_conns[i]); return false; } } if (uc->next != NULL) { // More than one upstream conn, so we need a hashtable // to track keys for de-deplication. // d->multiget = genhash_init(128, skeyhash_ops); if (settings.verbose > 1) { fprintf(stderr, "cproxy multiget hash table new\n"); } } // Snapshot the volatile only once. // uint32_t msec_current_time_snapshot = msec_current_time; int uc_num = 0; conn *uc_cur = uc; while (uc_cur != NULL) { assert(uc_cur->cmd == -1); assert(uc_cur->item == NULL); assert(uc_cur->state == conn_pause); assert(IS_ASCII(uc_cur->protocol)); assert(IS_PROXY(uc_cur->protocol)); char *command = uc_cur->cmd_start; assert(command != NULL); char *space = strchr(command, ' '); assert(space > command); int cmd_len = space - command; assert(cmd_len == 3 || cmd_len == 4); // Either get or gets. int cas_emit = (command[3] == 's'); if (settings.verbose > 1) { fprintf(stderr, "forward multiget %s (%d %d)\n", command, cmd_len, uc_num); } while (space != NULL) { char *key = space + 1; char *next_space = strchr(key, ' '); int key_len; if (next_space != NULL) { key_len = next_space - key; } else { key_len = strlen(key); // We've reached the last key. // psc_get->read_bytes += (key - command + key_len); } // This key_len check helps skip consecutive spaces. // if (key_len > 0) { ptd->stats.stats.tot_multiget_keys++; psc_get_key->seen++; psc_get_key->read_bytes += key_len; // Update key-based statistics. // bool do_key_stats = matcher_check(&ptd->key_stats_matcher, key, key_len, true) == true && matcher_check(&ptd->key_stats_unmatcher, key, key_len, false) == false; if (do_key_stats) { touch_key_stats(ptd, key, key_len, msec_current_time_snapshot, STATS_CMD_TYPE_REGULAR, STATS_CMD_GET_KEY, 1, 0, 0, key_len, 0); } // Handle a front cache hit by queuing response. // // Note, front cache stats are part of mcache. // if (!cas_emit) { item *it = mcache_get(front_cache, key, key_len, msec_current_time_snapshot); if (it != NULL) { assert(it->nkey == key_len); assert(strncmp(ITEM_key(it), key, it->nkey) == 0); cproxy_upstream_ascii_item_response(it, uc_cur, 0); psc_get_key->hits++; psc_get_key->write_bytes += it->nbytes; if (do_key_stats) { touch_key_stats(ptd, key, key_len, msec_current_time_snapshot, STATS_CMD_TYPE_REGULAR, STATS_CMD_GET_KEY, 0, 1, 0, 0, it->nbytes); } // The refcount was inc'ed by mcache_get() for us. // item_remove(it); goto loop_next; } } bool self = false; conn *c = cproxy_find_downstream_conn(d, key, key_len, &self); if (c != NULL) { if (self) { // Optimization for talking with ourselves, // to avoid extra network hop. // ptd->stats.stats.tot_optimize_self++; item *it = item_get(key, key_len); if (it != NULL) { cproxy_upstream_ascii_item_response(it, uc_cur, cas_emit); psc_get_key->hits++; psc_get_key->write_bytes += it->nbytes; if (do_key_stats) { touch_key_stats(ptd, key, key_len, msec_current_time_snapshot, STATS_CMD_TYPE_REGULAR, STATS_CMD_GET_KEY, 0, 1, 0, 0, it->nbytes); } // The refcount was inc'ed by item_get() for us. // item_remove(it); if (settings.verbose > 1) { fprintf(stderr, "optimize self multiget hit: %s\n", key); } } else { psc_get_key->misses++; if (do_key_stats) { touch_key_stats(ptd, key, key_len, msec_current_time_snapshot, STATS_CMD_TYPE_REGULAR, STATS_CMD_GET_KEY, 0, 0, 1, 0, 0); } if (settings.verbose > 1) { fprintf(stderr, "optimize self multiget miss: %s\n", key); } } goto loop_next; } // See if we've already requested this key via // the multiget hash table, in order to // de-deplicate repeated keys. // bool first_request = true; if (d->multiget != NULL) { // TODO: Use Trond's allocator here. // multiget_entry *entry = calloc(1, sizeof(multiget_entry)); if (entry != NULL) { entry->upstream_conn = uc_cur; entry->opaque = 0; entry->hits = 0; entry->next = genhash_find(d->multiget, key); genhash_update(d->multiget, key, entry); if (entry->next != NULL) { first_request = false; } } else { // TODO: Handle out of multiget entry memory. } } if (first_request) { assert(c->item == NULL); assert(c->state == conn_pause); assert(IS_PROXY(c->protocol)); assert(c->ilist != NULL); assert(c->isize > 0); if (c->msgused <= 1 && c->msgbytes <= 0) { emit_start(c, command, cmd_len); } // Provide the preceding space as optimization // for ascii-to-ascii configuration. // emit_skey(c, key - 1, key_len + 1); } else { ptd->stats.stats.tot_multiget_keys_dedupe++; if (settings.verbose > 1) { char buf[KEY_MAX_LENGTH + 10]; memcpy(buf, key, key_len); buf[key_len] = '\0'; fprintf(stderr, "%d cproxy multiget dedpue: %s\n", uc_cur->sfd, buf); } } } else { // TODO: Handle when downstream conn is down. } } loop_next: space = next_space; } uc_num++; uc_cur = uc_cur->next; } for (int i = 0; i < nconns; i++) { conn *c = d->downstream_conns[i]; if (c != NULL && (c->msgused > 1 || c->msgbytes > 0)) { emit_end(c); conn_set_state(c, conn_mwrite); c->write_and_go = conn_new_cmd; if (update_event(c, EV_WRITE | EV_PERSIST)) { nwrite++; if (uc->noreply) { c->write_and_go = conn_pause; } } else { if (settings.verbose > 1) { fprintf(stderr, "Couldn't update cproxy write event\n"); } d->ptd->stats.stats.err_oom++; cproxy_close_conn(c); } } } if (settings.verbose > 1) { fprintf(stderr, "forward multiget nwrite %d out of %d\n", nwrite, nconns); } d->downstream_used_start = nwrite; d->downstream_used = nwrite; if (cproxy_dettach_if_noreply(d, uc) == false) { d->upstream_suffix = "END\r\n"; cproxy_start_downstream_timeout(d, NULL); } return nwrite > 0; }
void multiget_ascii_downstream_response(downstream *d, item *it) { assert(d); assert(it); assert(it->nkey > 0); assert(ITEM_key(it) != NULL); proxy_td *ptd = d->ptd; assert(ptd); proxy_stats_cmd *psc_get_key = &ptd->stats.stats_cmd[STATS_CMD_TYPE_REGULAR][STATS_CMD_GET_KEY]; proxy *p = ptd->proxy; assert(p); uint32_t front_cache_lifespan = ptd->behavior_pool.base.front_cache_lifespan; if (front_cache_lifespan > 0) { if (matcher_check(&p->front_cache_matcher, ITEM_key(it), it->nkey, true) == true && matcher_check(&p->front_cache_unmatcher, ITEM_key(it), it->nkey, false) == false) { mcache_set(&p->front_cache, it, front_cache_lifespan + msec_current_time, true, false); } } if (d->multiget != NULL) { // The ITEM_key is not NULL or space terminated. // char key_buf[KEY_MAX_LENGTH + 10]; assert(it->nkey <= KEY_MAX_LENGTH); memcpy(key_buf, ITEM_key(it), it->nkey); key_buf[it->nkey] = '\0'; multiget_entry *entry_first = genhash_find(d->multiget, key_buf); if (entry_first != NULL) { entry_first->hits++; multiget_entry *entry = entry_first; while (entry != NULL) { // The upstream might have been closed mid-request. // // TODO: Revisit the -1 cas_emit parameter. // conn *uc = entry->upstream_conn; if (uc != NULL) { cproxy_upstream_ascii_item_response(it, uc, -1); psc_get_key->hits++; psc_get_key->write_bytes += it->nbytes; if (matcher_check(&ptd->key_stats_matcher, ITEM_key(it), it->nkey, true) == true && matcher_check(&ptd->key_stats_unmatcher, ITEM_key(it), it->nkey, false) == false) { touch_key_stats(ptd, ITEM_key(it), it->nkey, msec_current_time, STATS_CMD_TYPE_REGULAR, STATS_CMD_GET_KEY, 0, 1, 0, 0, it->nbytes); } if (entry != entry_first) { ptd->stats.stats.tot_multiget_bytes_dedupe += it->nbytes; } } entry = entry->next; } } } else { // TODO: We're not tracking miss stats in the simple case. // Do we always need to use a multiget hashtable? // Or, perhaps misses equals number of requests - number of hits. // conn *uc = d->upstream_conn; while (uc != NULL) { // TODO: Revisit the -1 cas_emit parameter. // cproxy_upstream_ascii_item_response(it, uc, -1); psc_get_key->hits++; psc_get_key->write_bytes += it->nbytes; if (matcher_check(&ptd->key_stats_matcher, ITEM_key(it), it->nkey, true) == true && matcher_check(&ptd->key_stats_unmatcher, ITEM_key(it), it->nkey, false) == false) { touch_key_stats(ptd, ITEM_key(it), it->nkey, msec_current_time, STATS_CMD_TYPE_REGULAR, STATS_CMD_GET_KEY, 0, 1, 0, 0, it->nbytes); } uc = uc->next; } } }
void mcache_set(mcache *m, void *it, uint64_t exptime, bool add_only, bool mod_exptime_if_exists) { assert(it); assert(m->funcs); assert(m->funcs->item_get_next(it) == NULL); assert(m->funcs->item_get_prev(it) == NULL); if (m == NULL) { return; } /* TODO: Our lock areas are possibly too wide. */ if (m->lock) { cb_mutex_enter(m->lock); } if (m->map != NULL) { /* Evict some items if necessary. */ int i; for (i = 0; m->lru_tail != NULL && i < 20; i++) { void *last_it; if ((uint32_t)genhash_size(m->map) < m->max) { break; } last_it = m->lru_tail; mcache_item_unlink(m, last_it); if (m->key_alloc) { int len = m->funcs->item_key_len(last_it); char buf[KEY_MAX_LENGTH + 10]; memcpy(buf, m->funcs->item_key(last_it), len); buf[len] = '\0'; genhash_delete(m->map, buf); } else { genhash_delete(m->map, m->funcs->item_key(last_it)); } m->tot_evictions++; } if ((uint32_t)genhash_size(m->map) < m->max) { char *key = m->funcs->item_key(it); int key_len = m->funcs->item_key_len(it); char *key_buf = NULL; if (m->key_alloc) { /* The ITEM_key is not NULL or space terminated, */ /* and we need a copy, too, for hashtable ownership. */ /* TODO: Move this outside the lock area? */ key_buf = malloc(key_len + 1); if (key_buf != NULL) { memcpy(key_buf, key, key_len); key_buf[key_len] = '\0'; key = key_buf; } else { key = NULL; } } if (key != NULL) { void *existing = add_only ? genhash_find(m->map, key) : NULL; if (existing != NULL) { mcache_item_unlink(m, existing); mcache_item_touch(m, existing); if (mod_exptime_if_exists) { m->funcs->item_set_exptime(existing, exptime); } m->tot_add_skips++; if (settings.verbose > 1) { moxi_log_write("mcache add-skip: %s\n", key); } if (key_buf != NULL) { free(key_buf); } } else { m->funcs->item_set_exptime(it, exptime); m->funcs->item_add_ref(it); genhash_update(m->map, key, it); m->tot_adds++; m->tot_add_bytes += m->funcs->item_len(it); if (settings.verbose > 1) { moxi_log_write("mcache add: %s\n", key); } } } else { m->tot_add_fails++; } } else { m->tot_add_fails++; } } if (m->lock) { cb_mutex_exit(m->lock); } }
void *mcache_get(mcache *m, char *key, int key_len, uint64_t curr_time) { (void)key_len; assert(key); if (m == NULL) { return NULL; } assert(m->funcs); if (m->lock) { cb_mutex_enter(m->lock); } if (m->map != NULL) { void *it = genhash_find(m->map, key); if (it != NULL) { mcache_item_unlink(m, it); uint64_t exptime = m->funcs->item_get_exptime(it); if ((exptime <= 0) || (exptime >= curr_time && exptime >= m->oldest_live)) { mcache_item_touch(m, it); m->funcs->item_add_ref(it); /* TODO: Need lock here? */ m->tot_get_hits++; m->tot_get_bytes += m->funcs->item_len(it); if (m->lock) { cb_mutex_exit(m->lock); } if (settings.verbose > 1) { moxi_log_write("mcache hit: %s\n", key); } return it; } /* Handle item expiration. */ m->tot_get_expires++; if (settings.verbose > 1) { moxi_log_write("mcache expire: %s\n", key); } genhash_delete(m->map, key); } else { m->tot_get_misses++; if (settings.verbose > 1) { moxi_log_write("mcache miss: %s\n", key); } } } if (m->lock) { cb_mutex_exit(m->lock); } return NULL; }
bool protocol_stats_merge_name_val(genhash_t *merger, char *prefix, int prefix_len, char *name, int name_len, char *val, int val_len) { char *key; int key_len; cb_assert(merger); cb_assert(name); cb_assert(val); key = name + name_len - 1; /* Key part for merge rule lookup. */ while (key >= name && *key != ':') { /* Scan for last colon. */ key--; } if (key < name) { key = name; } key_len = name_len - (key - name); if (key_len > 0 && key_len < MERGE_BUF_SIZE) { char buf_name[MERGE_BUF_SIZE]; char buf_key[MERGE_BUF_SIZE]; char buf_val[MERGE_BUF_SIZE]; char *prev; token_t prev_tokens[MAX_TOKENS]; size_t prev_ntokens; bool ok; strncpy(buf_name, name, name_len); buf_name[name_len] = '\0'; prev = (char *) genhash_find(merger, buf_name); if (prev == NULL) { char *hval = malloc(prefix_len + 1 + name_len + 1 + val_len + 1); if (hval != NULL) { memcpy(hval, prefix, prefix_len); hval[prefix_len] = ' '; memcpy(hval + prefix_len + 1, name, name_len); hval[prefix_len + 1 + name_len] = ' '; memcpy(hval + prefix_len + 1 + name_len + 1, val, val_len); hval[prefix_len + 1 + name_len + 1 + val_len] = '\0'; genhash_update(merger, hval + prefix_len + 1, hval); } return true; } strncpy(buf_key, key, key_len); buf_key[key_len] = '\0'; if (strstr(protocol_stats_keys_first, buf_key) != NULL) { return true; } prev_ntokens = scan_tokens(prev, prev_tokens, MAX_TOKENS, NULL); if (prev_ntokens != 4) { return true; } strncpy(buf_val, val, val_len); buf_val[val_len] = '\0'; if (strstr(protocol_stats_keys_smallest, buf_key) != NULL) { ok = protocol_stats_merge_smallest(prev_tokens[VALUE_TOKEN].value, prev_tokens[VALUE_TOKEN].length, buf_val, val_len, buf_val, MERGE_BUF_SIZE); } else { ok = protocol_stats_merge_sum(prev_tokens[VALUE_TOKEN].value, prev_tokens[VALUE_TOKEN].length, buf_val, val_len, buf_val, MERGE_BUF_SIZE); } if (ok) { int vlen = strlen(buf_val); char *hval = malloc(prefix_len + 1 + name_len + 1 + vlen + 1); if (hval != NULL) { memcpy(hval, prefix, prefix_len); hval[prefix_len] = ' '; memcpy(hval + prefix_len + 1, name, name_len); hval[prefix_len + 1 + name_len] = ' '; strcpy(hval + prefix_len + 1 + name_len + 1, buf_val); hval[prefix_len + 1 + name_len + 1 + vlen] = '\0'; genhash_update(merger, hval + prefix_len + 1, hval); free(prev); } } /* Note, if we couldn't merge, then just keep */ /* the previous value. */ return true; } return false; }
/** * Observe callback. Called internally by libcouchbase's observe handlers */ void lcb_durability_dset_update(lcb_t instance, lcb_DURSET *dset, lcb_error_t err, const lcb_RESPOBSERVE *resp) { lcb_DURITEM *ent; /** * So we have two counters to decrement. One is the global 'done' counter * and the other is the iteration counter. * * The iteration counter is only decremented when we receive a NULL signal * in the callback, whereas the global counter is decremented once, whenever * the entry's criteria have been satisfied */ if (resp->key == NULL) { dset_done_waiting(dset); return; } if (dset->nentries == 1) { ent = &dset->single.ent; } else { ent = genhash_find(dset->ht, resp->key, resp->nkey); } if (ent->done) { /* ignore subsequent errors */ return; } if (err != LCB_SUCCESS) { RESFLD(ent, rc) = err; /* If it's a non-scheduling error then the item will be retried in the * next iteration */ if (err == LCB_SCHEDFAIL_INTERNAL) { ent_set_resdone(ent); } return; } RESFLD(ent, nresponses)++; if (resp->cas && resp->ismaster) { RESFLD(ent, cas) = resp->cas; if (ent->reqcas && ent->reqcas != resp->cas) { RESFLD(ent, rc) = LCB_KEY_EEXISTS; ent_set_resdone(ent); return; } } if (DSET_OPTFLD(ent->parent, check_delete)) { check_negative_durability(ent, resp); } else { check_positive_durability(ent, resp); } if (ent_is_complete(ent)) { /* clear any transient errors */ RESFLD(ent, rc) = LCB_SUCCESS; ent_set_resdone(ent); } (void)instance; }