/** * Returns true if the entry is complete, false otherwise. This only assumes * successful entries. */ static int ent_is_complete(lcb_durability_entry_t *ent) { lcb_durability_opts_t *opts = &ent->parent->opts; if (!RESFLD(ent, exists_master)) { /** Primary cache doesn't have correct version */ return 0; } if (OPTFLD(opts, persist_to)) { if (!RESFLD(ent, persisted_master)) { return 0; } if (RESFLD(ent, npersisted) < OPTFLD(opts, persist_to)) { return 0; } } if (OPTFLD(opts, replicate_to)) { if (RESFLD(ent, nreplicated) < OPTFLD(opts, replicate_to)) { return 0; } } return 1; }
static lcb_error_t dset_ctx_schedule(lcb_MULTICMD_CTX *mctx, const void *cookie) { unsigned ii; char *kptr; lcb_DURSET *dset = CTX_FROM_MULTI(mctx); kptr = dset->kvbufs.base; for (ii = 0; ii < dset->nentries; ii++) { lcb_DURITEM *ent = dset->entries + ii; RESFLD(ent, key) = kptr; kptr += RESFLD(ent, nkey); if (ent->hashkey.contig.nbytes) { ent->hashkey.contig.bytes = kptr; kptr += ent->hashkey.contig.nbytes; } if (dset->ht) { int mt = genhash_update(dset->ht, RESFLD(ent, key), RESFLD(ent, nkey), ent, 0); if (mt != NEW) { lcb_durability_dset_destroy(dset); return LCB_DUPLICATE_COMMANDS; } } } dset_ref(dset); dset->cookie = cookie; dset->nremaining = dset->nentries; lcb_aspend_add(&dset->instance->pendops, LCB_PENDTYPE_DURABILITY, dset); return poll_once(dset, 1); }
int lcbdur_ent_check_done(lcb_DURITEM *ent) { lcb_DURABILITYOPTSv0 *opts = &ent->parent->opts; if (!RESFLD(ent, exists_master)) { /** Primary cache doesn't have correct version */ return 0; } if (opts->persist_to) { if (!RESFLD(ent, persisted_master)) { return 0; } if (RESFLD(ent, npersisted) < opts->persist_to) { return 0; } } if (opts->replicate_to) { if (RESFLD(ent, nreplicated) < opts->replicate_to) { return 0; } } return 1; }
void lcbdur_prepare_item(lcb_DURITEM *ent, lcb_U16 *ixarray, size_t *nitems) { size_t ii, oix = 0, maxix = 0; lcb_DURSET *dset = ent->parent; lcb_t instance = dset->instance; lcbvb_CONFIG *vbc = LCBT_VBCONFIG(instance); RESFLD(ent, persisted_master) = 0; RESFLD(ent, exists_master) = 0; RESFLD(ent, npersisted) = 0; RESFLD(ent, nreplicated) = 0; RESFLD(ent, cas) = 0; RESFLD(ent, rc) = LCB_SUCCESS; if (DSET_OPTFLD(dset, persist_to) == 1 && DSET_OPTFLD(dset, replicate_to) == 0) { maxix = 1; /* Only master! */ } else { maxix = LCBT_NREPLICAS(instance) + 1; } for (ii = 0; ii < maxix; ii++) { int cur_ix; lcbdur_SERVINFO *info = &ent->sinfo[ii]; const mc_SERVER *s_exp; cur_ix = lcbvb_vbserver(vbc, ent->vbid, ii); if (cur_ix < 0) { memset(info, 0, sizeof(*info)); continue; } s_exp = LCBT_GET_SERVER(instance, cur_ix); if (s_exp != info->server) { memset(info, 0, sizeof(*info)); } else if (server_criteria_satisfied(ent, info, ii==0)) { /* Update counters as required */ if (ii == 0) { RESFLD(ent, exists_master) = 1; } else { RESFLD(ent, nreplicated)++; } if (info->persisted) { RESFLD(ent, npersisted)++; if (ii == 0) { RESFLD(ent, persisted_master) = 1; } } continue; } /* Otherwise, write the expected server out */ ixarray[oix++] = s_exp->pipeline.index; } *nitems = oix; }
/** * All-purpose callback dispatcher. */ static void timer_callback(lcb_socket_t sock, short which, void *arg) { lcb_DURSET *dset = arg; hrtime_t now = gethrtime(); if (dset->ns_timeout && now > dset->ns_timeout) { dset->next_state = LCBDUR_STATE_TIMEOUT; } switch (dset->next_state) { case LCBDUR_STATE_OBSPOLL: case LCBDUR_STATE_INIT: poll_once(dset); break; case LCBDUR_STATE_TIMEOUT: { lcb_size_t ii; lcb_error_t err = dset->lasterr ? dset->lasterr : LCB_ETIMEDOUT; dset->ns_timeout = 0; dset->next_state = LCBDUR_STATE_IGNORE; lcb_log(LOGARGS(dset, WARN), "Polling durability timed out!"); lcbdur_ref(dset); for (ii = 0; ii < DSET_COUNT(dset); ii++) { lcb_DURITEM *ent = DSET_ENTRIES(dset) + ii; if (ent->done) { continue; } if (RESFLD(ent, rc) == LCB_SUCCESS) { RESFLD(ent, rc) = err; } lcbdur_ent_finish(ent); } lcbdur_unref(dset); break; } case LCBDUR_STATE_IGNORE: break; default: lcb_assert("unexpected state" && 0); break; } (void)sock; (void)which; }
static lcb_error_t dset_ctx_add(lcb_MULTICMD_CTX *mctx, const lcb_CMDBASE *cmd) { lcb_DURSET *dset = CTX_FROM_MULTI(mctx); lcb_DURITEM *ent; int vbid, srvix; if (LCB_KEYBUF_IS_EMPTY(&cmd->key)) { return LCB_EMPTY_KEY; } LCB_SSOBUF_ALLOC(&ent, &dset->entries_, lcb_DURITEM); if (!ent) { return LCB_CLIENT_ENOMEM; } mcreq_map_key(&dset->instance->cmdq, &cmd->key, &cmd->_hashkey, MCREQ_PKT_BASESIZE, &vbid, &srvix); /* ok. now let's initialize the entry..*/ memset(ent, 0, sizeof (*ent)); RESFLD(ent, nkey) = cmd->key.contig.nbytes; ent->reqcas = cmd->cas; ent->parent = dset; ent->vbid = vbid; lcb_string_append(&dset->kvbufs, cmd->key.contig.bytes, cmd->key.contig.nbytes); if (DSET_PROCS(dset)->ent_add) { return DSET_PROCS(dset)->ent_add(dset, ent, (lcb_CMDENDURE*)cmd); } else { return LCB_SUCCESS; } }
/** * Purge all non-complete (i.e. not 'resdone') entries and invoke their * callback, setting the result's error code with the specified error */ static void purge_entries(lcb_durability_set_t *dset, lcb_error_t err) { lcb_size_t ii; dset->us_timeout = 0; dset->next_state = STATE_IGNORE; /** * Each time we call 'ent_set_resdone' we might cause the refcount to drop * to zero, making 'dset' point to freed memory. To avoid this, we bump * up the refcount before the loop and defer the possible free operation * until the end. */ dset_ref(dset); for (ii = 0; ii < dset->nentries; ii++) { lcb_durability_entry_t *ent = dset->entries + ii; if (ent->done) { continue; } RESFLD(ent, err) = err; ent_set_resdone(ent); } dset_unref(dset); }
static lcb_error_t dset_ctx_add(lcb_MULTICMD_CTX *mctx, const lcb_CMDBASE *cmd) { lcb_DURSET *dset = CTX_FROM_MULTI(mctx); lcb_DURITEM *ent; if (LCB_KEYBUF_IS_EMPTY(&cmd->key)) { return LCB_EMPTY_KEY; } /* ensure we have enough space first */ if (dset->nentries == 0) { /* First entry. Optimize */ ent = &dset->single.ent; dset->entries = &dset->single.ent; } else if (dset->nentries == 1) { /* More than a single entry */ dset->ents_alloced = 2; dset->entries = malloc(2 * sizeof(*dset->entries)); if (!dset->entries) { return LCB_CLIENT_ENOMEM; } dset->entries[0] = dset->single.ent; ent = &dset->entries[1]; dset->ht = lcb_hashtable_nc_new(16); if (!dset->ht) { return LCB_CLIENT_ENOMEM; } } else if (dset->nentries < dset->ents_alloced) { ent = &dset->entries[dset->nentries]; } else { lcb_DURITEM *newarr; lcb_SIZE newsize = dset->ents_alloced * 1.5; newarr = realloc(dset->entries, sizeof(*ent) * newsize); if (!newarr) { return LCB_CLIENT_ENOMEM; } dset->entries = newarr; dset->ents_alloced = newsize; ent = &dset->entries[dset->nentries]; } /* ok. now let's initialize the entry..*/ memset(ent, 0, sizeof (*ent)); RESFLD(ent, nkey) = cmd->key.contig.nbytes; ent->hashkey = cmd->hashkey; ent->reqcas = cmd->cas; ent->parent = dset; lcb_string_append(&dset->kvbufs, cmd->key.contig.bytes, cmd->key.contig.nbytes); if (cmd->hashkey.contig.nbytes) { lcb_string_append(&dset->kvbufs, cmd->hashkey.contig.bytes, cmd->hashkey.contig.nbytes); } dset->nentries++; return LCB_SUCCESS; }
void lcbdur_update_item(lcb_DURITEM *item, int flags, int srvix) { lcbdur_SERVINFO *info; lcb_t instance; int is_master; const mc_SERVER *server; if (!flags || item->done) { return; } info = lcbdur_ent_getinfo(item, srvix); if (!info) { lcb_log(LOGARGS(item->parent, DEBUG), "Ignoring response from server %d. Not a master or replica for vBucket %d", srvix, item->vbid); return; } instance = item->parent->instance; is_master = lcbvb_vbmaster(LCBT_VBCONFIG(instance), item->vbid) == srvix; server = LCBT_GET_SERVER(instance, srvix); memset(info, 0, sizeof(*info)); info->server = server; if (flags & LCBDUR_UPDATE_PERSISTED) { info->persisted = 1; RESFLD(item, npersisted)++; if (is_master) { RESFLD(item, persisted_master) = 1; } } if (flags & LCBDUR_UPDATE_REPLICATED) { info->exists = 1; if (is_master) { RESFLD(item, exists_master) = 1; } else { RESFLD(item, nreplicated)++; } } if (lcbdur_ent_check_done(item)) { RESFLD(item, rc) = LCB_SUCCESS; lcbdur_ent_finish(item); } }
/** * Initialize an entry from an API command. */ static void ent_init(const lcb_durability_cmd_t *cmd, lcb_durability_entry_t *ent) { REQFLD(ent, cas) = cmd->v.v0.cas; REQFLD(ent, nkey) = cmd->v.v0.nkey; REQFLD(ent, key) = malloc(REQFLD(ent, nkey)); /** * Copy the request fields to the response fields. This way we only end up * allocating the key once. */ RESFLD(ent, key) = REQFLD(ent, key); RESFLD(ent, nkey) = REQFLD(ent, nkey); memcpy((void *)REQFLD(ent, key), cmd->v.v0.key, REQFLD(ent, nkey)); if (cmd->v.v0.nhashkey) { REQFLD(ent, nhashkey) = cmd->v.v0.nhashkey; REQFLD(ent, hashkey) = malloc(cmd->v.v0.nhashkey); memcpy((void *)REQFLD(ent, hashkey), cmd->v.v0.hashkey, REQFLD(ent, nhashkey)); } }
static lcb_error_t dset_ctx_schedule(lcb_MULTICMD_CTX *mctx, const void *cookie) { size_t ii; lcb_error_t err; lcb_DURSET *dset = CTX_FROM_MULTI(mctx); char *kptr = dset->kvbufs.base; if (!DSET_COUNT(dset)) { lcbdur_destroy(dset); return LCB_EINVAL; } for (ii = 0; ii < DSET_COUNT(dset); ii++) { lcb_DURITEM *ent = DSET_ENTRIES(dset) + ii; RESFLD(ent, key) = kptr; kptr += RESFLD(ent, nkey); } if (DSET_PROCS(dset)->schedule) { err = DSET_PROCS(dset)->schedule(dset); if (err != LCB_SUCCESS) { lcbdur_destroy(dset); return err; } } lcbdur_ref(dset); dset->cookie = cookie; dset->nremaining = DSET_COUNT(dset); dset->ns_timeout = gethrtime() + LCB_US2NS(DSET_OPTFLD(dset, timeout)); lcb_aspend_add(&dset->instance->pendops, LCB_PENDTYPE_DURABILITY, dset); lcbdur_switch_state(dset, LCBDUR_STATE_INIT); return LCB_SUCCESS; }
/** * Called when the criteria is to ensure the key exists somewhow */ static void check_positive_durability(lcb_durability_entry_t *ent, const lcb_observe_resp_t *res) { switch (res->v.v0.status) { case LCB_OBSERVE_NOT_FOUND: case LCB_OBSERVE_LOGICALLY_DELETED: /** * If we get NOT_FOUND from the master, this means the key * simply does not exists (and we don't have to continue polling) */ if (res->v.v0.from_master) { RESFLD(ent, err) = LCB_KEY_ENOENT; ent_set_resdone(ent); } return; case LCB_OBSERVE_PERSISTED: RESFLD(ent, npersisted)++; if (res->v.v0.from_master) { RESFLD(ent, persisted_master) = 1; RESFLD(ent, exists_master) = 1; } else { RESFLD(ent, nreplicated)++; } break; case LCB_OBSERVE_FOUND: if (res->v.v0.from_master) { RESFLD(ent, exists_master) = 1; break; /* don't care */ } RESFLD(ent, nreplicated)++; break; default: RESFLD(ent, err) = LCB_EINTERNAL; ent_set_resdone(ent); break; } }
/** * Called when the criteria is to ensure that the key is deleted somehow */ static void check_negative_durability(lcb_durability_entry_t *ent, const lcb_observe_resp_t *res) { switch (res->v.v0.status) { case LCB_OBSERVE_PERSISTED: case LCB_OBSERVE_FOUND: return; case LCB_OBSERVE_LOGICALLY_DELETED: /** * The key has been removed from cache, but not actually deleted from * disk */ RESFLD(ent, nreplicated)++; if (res->v.v0.from_master) { RESFLD(ent, exists_master) = 1; } break; case LCB_OBSERVE_NOT_FOUND: /** * No knowledge of key. */ RESFLD(ent, npersisted)++; if (res->v.v0.from_master) { RESFLD(ent, persisted_master) = 1; RESFLD(ent, exists_master) = 1; } else { RESFLD(ent, nreplicated)++; } break; default: RESFLD(ent, err) = LCB_EINTERNAL; ent_set_resdone(ent); break; } }
/** * Observe callback. Called internally by libcouchbase's observe handlers */ void lcb_durability_dset_update(lcb_t instance, lcb_DURSET *dset, lcb_error_t err, const lcb_RESPOBSERVE *resp) { lcb_DURITEM *ent; /** * So we have two counters to decrement. One is the global 'done' counter * and the other is the iteration counter. * * The iteration counter is only decremented when we receive a NULL signal * in the callback, whereas the global counter is decremented once, whenever * the entry's criteria have been satisfied */ if (resp->key == NULL) { dset_done_waiting(dset); return; } if (dset->nentries == 1) { ent = &dset->single.ent; } else { ent = genhash_find(dset->ht, resp->key, resp->nkey); } if (ent->done) { /* ignore subsequent errors */ return; } if (err != LCB_SUCCESS) { RESFLD(ent, rc) = err; /* If it's a non-scheduling error then the item will be retried in the * next iteration */ if (err == LCB_SCHEDFAIL_INTERNAL) { ent_set_resdone(ent); } return; } RESFLD(ent, nresponses)++; if (resp->cas && resp->ismaster) { RESFLD(ent, cas) = resp->cas; if (ent->reqcas && ent->reqcas != resp->cas) { RESFLD(ent, rc) = LCB_KEY_EEXISTS; ent_set_resdone(ent); return; } } if (DSET_OPTFLD(ent->parent, check_delete)) { check_negative_durability(ent, resp); } else { check_positive_durability(ent, resp); } if (ent_is_complete(ent)) { /* clear any transient errors */ RESFLD(ent, rc) = LCB_SUCCESS; ent_set_resdone(ent); } (void)instance; }
/** * Schedules a single sweep of observe requests. */ static void poll_once(lcb_durability_set_t *dset) { lcb_size_t ii, oix; lcb_error_t err; /** * We should never be called while an 'iter' operation is still * in progress */ lcb_assert(dset->waiting == 0); dset_ref(dset); for (ii = 0, oix = 0; ii < dset->nentries; ii++) { struct lcb_durability_entry_st *ent = dset->entries + ii; if (ent->done) { continue; } /* reset all the per-iteration fields */ RESFLD(ent, persisted_master) = 0; RESFLD(ent, exists_master) = 0; RESFLD(ent, npersisted) = 0; RESFLD(ent, nreplicated) = 0; RESFLD(ent, cas) = 0; RESFLD(ent, err) = LCB_SUCCESS; dset->valid_entries[oix++] = ent; } lcb_assert(oix == dset->nremaining); err = lcb_observe_ex(dset->instance, dset, dset->nremaining, (const void * const *)dset->valid_entries, LCB_OBSERVE_TYPE_DURABILITY); if (err != LCB_SUCCESS) { for (ii = 0; ii < dset->nentries; ii++) { lcb_durability_entry_t *ent = dset->entries + ii; if (ent->done) { continue; } RESFLD(ent, err) = err; ent_set_resdone(ent); } } else { dset->waiting = 1; dset_ref(dset); } if (dset->waiting && oix) { lcb_uint32_t us_now = (lcb_uint32_t)(gethrtime() / 1000); lcb_uint32_t us_tmo; if (dset->us_timeout > us_now) { us_tmo = dset->us_timeout - us_now; } else { us_tmo = 1; } timer_schedule(dset, us_tmo, STATE_TIMEOUT); } else { purge_entries(dset, LCB_ERROR); } dset_unref(dset); }
/** * Schedules a single sweep of observe requests. */ static void poll_once(lcb_DURSET *dset) { unsigned ii, n_added = 0; lcb_error_t err; lcb_MULTICMD_CTX *mctx = NULL; /** * We should never be called while an 'iter' operation is still * in progress */ lcb_assert(dset->waiting == 0); dset_ref(dset); mctx = lcb_observe_ctx_dur_new(dset->instance); if (!mctx) { err = LCB_CLIENT_ENOMEM; goto GT_ERR; } for (ii = 0; ii < dset->nentries; ii++) { lcb_CMDOBSERVE cmd = { 0 }; struct lcb_durability_entry_st *ent = dset->entries + ii; if (ent->done) { continue; } /* reset all the per-iteration fields */ RESFLD(ent, persisted_master) = 0; RESFLD(ent, exists_master) = 0; RESFLD(ent, npersisted) = 0; RESFLD(ent, nreplicated) = 0; RESFLD(ent, cas) = 0; RESFLD(ent, rc) = LCB_SUCCESS; LCB_KREQ_SIMPLE(&cmd.key, RESFLD(ent, key), RESFLD(ent, nkey)); cmd.hashkey = ent->hashkey; err = mctx->addcmd(mctx, (lcb_CMDBASE *)&cmd); if (err != LCB_SUCCESS) { goto GT_ERR; } n_added ++; } lcb_assert(n_added == dset->nremaining); if (n_added) { lcb_sched_enter(dset->instance); mctx->done(mctx, dset); lcb_sched_leave(dset->instance); } GT_ERR: if (err != LCB_SUCCESS) { if (mctx) { mctx->fail(mctx); } for (ii = 0; ii < dset->nentries; ii++) { lcb_DURITEM *ent = dset->entries + ii; if (ent->done) { continue; } RESFLD(ent, rc) = err; ent_set_resdone(ent); } } else { dset->waiting = 1; dset_ref(dset); } if (dset->waiting && n_added) { lcb_uint32_t us_now = (lcb_uint32_t)(gethrtime() / 1000); lcb_uint32_t us_tmo; if (dset->us_timeout > us_now) { us_tmo = dset->us_timeout - us_now; } else { us_tmo = 1; } timer_schedule(dset, us_tmo, STATE_TIMEOUT); } else { purge_entries(dset, LCB_ERROR); } dset_unref(dset); }
/** * Observe callback. Called internally by libcouchbase's observe handlers */ void lcb_durability_dset_update(lcb_t instance, lcb_durability_set_t *dset, lcb_error_t err, const lcb_observe_resp_t *resp) { lcb_durability_entry_t *ent; /** * So we have two counters to decrement. One is the global 'done' counter * and the other is the iteration counter. * * The iteration counter is only decremented when we receive a NULL signal * in the callback, whereas the global counter is decremented once, whenever * the entry's criteria have been satisfied */ if (resp->v.v0.key == NULL) { dset_done_waiting(dset); return; } if (dset->nentries == 1) { ent = &dset->single.ent; } else { ent = genhash_find(dset->ht, resp->v.v0.key, resp->v.v0.nkey); } if (ent->done) { /* ignore subsequent errors */ return; } if (err != LCB_SUCCESS) { RESFLD(ent, err) = err; return; } RESFLD(ent, nresponses)++; if (resp->v.v0.cas && resp->v.v0.from_master) { RESFLD(ent, cas) = resp->v.v0.cas; if (REQFLD(ent, cas) && REQFLD(ent, cas) != resp->v.v0.cas) { RESFLD(ent, err) = LCB_KEY_EEXISTS; ent_set_resdone(ent); return; } } if (DSET_OPTFLD(ent->parent, check_delete)) { check_negative_durability(ent, resp); } else { check_positive_durability(ent, resp); } if (ent_is_complete(ent)) { /* clear any transient errors */ RESFLD(ent, err) = LCB_SUCCESS; ent_set_resdone(ent); } (void)instance; }