static GCS_BACKEND_OPEN_FN(dummy_open) { long ret = -ENOMEM; dummy_t* dummy = backend->conn; gcs_comp_msg_t* comp; if (!dummy) { gu_debug ("Backend not initialized"); return -EBADFD; } comp = gcs_comp_msg_new (true, false, 0, 1); if (comp) { ret = gcs_comp_msg_add (comp, "11111111-2222-3333-4444-555555555555"); assert (0 == ret); // we have only one member, index = 0 dummy->state = DUMMY_TRANS; // required by gcs_dummy_set_component() ret = gcs_dummy_set_component (backend, comp); // install new component if (ret >= 0) { // queue the message ret = gcs_comp_msg_size(comp); ret = gcs_dummy_inject_msg (backend, comp, ret, GCS_MSG_COMPONENT, GCS_SENDER_NONE); if (ret > 0) ret = 0; } gcs_comp_msg_delete (comp); } gu_debug ("Opened backend connection: %d (%s)", ret, strerror(-ret)); return ret; }
static int group_find_ist_donor_by_state (gcs_group_t* const group, int joiner_idx, gcs_seqno_t ist_seqno, gcs_node_state_t status) { gcs_node_t* joiner = &group->nodes[joiner_idx]; gcs_segment_t joiner_segment = joiner->segment; // find node who is ist potentially possible. // first highest cached seqno local node. // then highest cached seqno remote node. int idx = 0; int local_idx = -1; int remote_idx = -1; for (idx = 0; idx < group->num; idx++) { if (joiner_idx == idx) continue; gcs_node_t* const node = &group->nodes[idx]; gcs_seqno_t const node_cached = gcs_node_cached(node); if (node->status >= status && group_node_is_stateful(group, node) && node_cached != GCS_SEQNO_ILL && node_cached <= (ist_seqno + 1)) { int* const idx_ptr = (joiner_segment == node->segment) ? &local_idx : &remote_idx; if (*idx_ptr == -1 || node_cached >= gcs_node_cached(&group->nodes[*idx_ptr])) { *idx_ptr = idx; } } } if (local_idx >= 0) { gu_debug("local found. name[%s], seqno[%lld]", group->nodes[local_idx].name, (long long)gcs_node_cached(&group->nodes[local_idx])); return local_idx; } if (remote_idx >= 0) { gu_debug("remote found. name[%s], seqno[%lld]", group->nodes[remote_idx].name, (long long)gcs_node_cached(&group->nodes[remote_idx])); return remote_idx; } gu_debug("not found."); return -1; }
long gu_to_interrupt (gu_to_t *to, gu_seqno_t seqno) { long rcode = 0; long err; to_waiter_t *w; assert (seqno >= 0); if ((err = gu_mutex_lock (&to->lock))) { gu_fatal("Mutex lock failed (%d): %s", err, strerror(err)); abort(); } if (seqno >= to->seqno) { if ((w = to_get_waiter (to, seqno)) == NULL) { gu_mutex_unlock(&to->lock); return -EAGAIN; } /* we have a valid waiter now */ switch (w->state) { case HOLDER: gu_debug ("trying to interrupt in use seqno: seqno = %llu, " "TO seqno = %llu", seqno, to->seqno); /* gu_mutex_unlock (&to->lock); */ rcode = -ERANGE; break; case CANCELED: gu_debug ("trying to interrupt canceled seqno: seqno = %llu, " "TO seqno = %llu", seqno, to->seqno); /* gu_mutex_unlock (&to->lock); */ rcode = -ERANGE; break; case WAIT: gu_debug ("signaling to interrupt wait seqno: seqno = %llu, " "TO seqno = %llu", seqno, to->seqno); rcode = to_wake_waiter (w); case RELEASED: w->state = INTERRUPTED; break; case INTERRUPTED: gu_debug ("TO waiter interrupt already seqno: seqno = %llu, " "TO seqno = %llu", seqno, to->seqno); break; } } else { gu_debug ("trying to interrupt used seqno: cancel seqno = %llu, " "TO seqno = %llu", seqno, to->seqno); /* gu_mutex_unlock (&to->lock); */ rcode = -ERANGE; } gu_mutex_unlock (&to->lock); return rcode; }
int gu_conf_set_log_callback (gu_log_cb_t callback) { if (callback) { gu_debug ("Logging function changed by application"); gu_log_cb = callback; } else { gu_debug ("Logging function restored to default"); gu_log_cb = gu_log_cb_default; } return 0; }
static int group_find_ist_donor_by_name_in_string ( gcs_group_t* const group, int joiner_idx, const char* str, int str_len, gcs_seqno_t ist_seqno, gcs_node_state_t status) { assert (str != NULL); const char* begin = str; const char* end; gu_debug("ist_seqno[%lld]", (long long)ist_seqno); // return the highest cached seqno node. int ret = -1; do { end = strchr(begin, ','); int len = 0; if (end == NULL) { len = str_len - (begin - str); } else { len = end - begin; } assert (len >= 0); if (len == 0) break; int idx = group_find_ist_donor_by_name( group, joiner_idx, begin, len, ist_seqno, status); if (idx >= 0) { if (ret == -1 || gcs_node_cached(&group->nodes[idx]) >= gcs_node_cached(&group->nodes[ret])) { ret = idx; } } begin = end + 1; } while (end != NULL); if (ret == -1) { gu_debug("not found"); } else { gu_debug("found. name[%s], seqno[%lld]", group->nodes[ret].name, (long long)gcs_node_cached(&group->nodes[ret])); } return ret; }
/*! Returns new last applied value if it has changes, 0 otherwise */ gcs_seqno_t gcs_group_handle_last_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) { gcs_seqno_t seqno; assert (GCS_MSG_LAST == msg->type); assert (sizeof(gcs_seqno_t) == msg->size); seqno = gcs_seqno_gtoh(*(gcs_seqno_t*)(msg->buf)); // This assert is too restrictive. It requires application to send // last applied messages while holding TO, otherwise there's a race // between threads. // assert (seqno >= group->last_applied); gcs_node_set_last_applied (&group->nodes[msg->sender_idx], seqno); if (msg->sender_idx == group->last_node && seqno > group->last_applied) { /* node that was responsible for the last value, has changed it. * need to recompute it */ gcs_seqno_t old_val = group->last_applied; group_redo_last_applied (group); if (old_val < group->last_applied) { gu_debug ("New COMMIT CUT %lld after %lld from %d", (long long)group->last_applied, (long long)seqno, msg->sender_idx); return group->last_applied; } } return 0; }
int gu_conf_debug_on () { gu_log_max_level = GU_LOG_DEBUG; gu_debug ("Turning debug on"); return 0; }
/*! Sets the new component view. * The same component message should be injected in the queue separately * (see gcs_dummy_inject_msg()) in order to model different race conditions */ long gcs_dummy_set_component (gcs_backend_t* backend, const gcs_comp_msg_t* comp) { dummy_t* dummy = backend->conn; long new_num = gcs_comp_msg_num (comp); long i; assert (dummy->state > DUMMY_CLOSED); if (dummy->memb_num != new_num) { void* tmp = gu_realloc (dummy->memb, new_num * sizeof(gcs_comp_memb_t)); if (NULL == tmp) return -ENOMEM; dummy->memb = tmp; dummy->memb_num = new_num; } for (i = 0; i < dummy->memb_num; i++) { strcpy ((char*)&dummy->memb[i], gcs_comp_msg_id (comp, i)); } dummy->my_idx = gcs_comp_msg_self(comp); dummy->state = gcs_comp_msg_primary(comp) ? DUMMY_PRIM : DUMMY_NON_PRIM; gu_debug ("Setting state to %s", DUMMY_PRIM == dummy->state ? "DUMMY_PRIM" : "DUMMY_NON_PRIM"); return 0; }
GuChoiceMark gu_choice_mark(GuChoice* ch) { gu_assert(ch->path_idx <= gu_buf_length(ch->path)); gu_debug("%p@%d: mark", ch, ch->path_idx); return (GuChoiceMark){ch->path_idx}; }
int gcs_group_handle_sync_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) { int const sender_idx = msg->sender_idx; gcs_node_t* sender = &group->nodes[sender_idx]; assert (GCS_MSG_SYNC == msg->type); if (GCS_NODE_STATE_JOINED == sender->status || /* #454 - at this layer we jump directly from DONOR to SYNCED */ (0 == group->last_applied_proto_ver && GCS_NODE_STATE_DONOR == sender->status)) { sender->status = GCS_NODE_STATE_SYNCED; sender->count_last_applied = true; group_redo_last_applied (group);//from now on this node must be counted gu_info ("Member %d.%d (%s) synced with group.", sender_idx, sender->segment, sender->name); return (sender_idx == group->my_idx); } else { if (GCS_NODE_STATE_SYNCED != sender->status) { gu_warn ("SYNC message sender from non-JOINED %d.%d (%s). Ignored.", sender_idx, sender->segment, sender->name); } else { gu_debug ("Redundant SYNC message from %d.%d (%s).", sender_idx, sender->segment, sender->name); } return 0; } }
int gu_conf_self_tstamp_on () { gu_debug ("Turning self timestamping on"); gu_log_self_tstamp = true; return 0; }
int gu_conf_self_tstamp_off () { gu_debug ("Turning self timestamping off"); gu_log_self_tstamp = false; return 0; }
int gu_conf_debug_off () { gu_debug ("Turning debug off"); gu_log_max_level = GU_LOG_INFO; return 0; }
void gu_choice_reset(GuChoice* ch, GuChoiceMark mark) { gu_assert(ch->path_idx <= gu_buf_length(ch->path)); gu_debug("%p@%d: reset %d", ch, ch->path_idx, mark.path_idx); gu_require(mark.path_idx <= ch->path_idx ); ch->path_idx = mark.path_idx; }
static int group_find_ist_donor (gcs_group_t* const group, int str_version, int joiner_idx, const char* str, int str_len, gcs_seqno_t ist_seqno, gcs_node_state_t status) { int idx = -1; gcs_seqno_t conf_seqno = group->quorum.act_id; gcs_seqno_t lowest_cached_seqno = group_lowest_cached_seqno(group); if (lowest_cached_seqno == GCS_SEQNO_ILL) { gu_debug("fallback to sst. lowest_cached_seqno == GCS_SEQNO_ILL"); return -1; } gcs_seqno_t const max_cached_range = conf_seqno - lowest_cached_seqno; gcs_seqno_t safety_gap = max_cached_range >> 7; /* 1.0 / 128 ~= 0.008 */ safety_gap = safety_gap < (1 << 20) ? safety_gap : (1 << 20); /* Be sensible and don't reserve more than 1M */ gcs_seqno_t safe_ist_seqno = lowest_cached_seqno + safety_gap; gu_debug("ist_seqno[%lld], lowest_cached_seqno[%lld]," "conf_seqno[%lld], safe_ist_seqno[%lld]", (long long)ist_seqno, (long long)lowest_cached_seqno, (long long)conf_seqno, (long long)safe_ist_seqno); if (ist_seqno < safe_ist_seqno) { // unsafe to perform ist. gu_debug("fallback to sst. ist_seqno < safe_ist_seqno"); return -1; } if (str_len) { // find ist donor by name. idx = group_find_ist_donor_by_name_in_string( group, joiner_idx, str, str_len, ist_seqno, status); if (idx >= 0) return idx; } // find ist donor by status. idx = group_find_ist_donor_by_state( group, joiner_idx, ist_seqno, status); if (idx >= 0) return idx; return -1; }
int gu_conf_set_log_file (FILE *file) { gu_debug ("Log file changed by application"); if (file) { gu_log_file = file; } else { gu_log_file = stderr; } return 0; }
static GCS_BACKEND_RECV_FN(dummy_recv) { long ret = 0; dummy_t* conn = backend->conn; msg->sender_idx = GCS_SENDER_NONE; msg->type = GCS_MSG_ERROR; assert (conn); /* skip it if we already have popped a message from the queue * in the previous call */ if (gu_likely(DUMMY_CLOSED <= conn->state)) { int err; dummy_msg_t** ptr = gu_fifo_get_head (conn->gc_q, &err); if (gu_likely(ptr != NULL)) { dummy_msg_t* dmsg = *ptr; assert (NULL != dmsg); msg->type = dmsg->type; msg->sender_idx = dmsg->sender_idx; ret = dmsg->len; msg->size = ret; if (gu_likely(dmsg->len <= msg->buf_len)) { gu_fifo_pop_head (conn->gc_q); memcpy (msg->buf, dmsg->buf, dmsg->len); dummy_msg_destroy (dmsg); } else { // supplied recv buffer too short, leave the message in queue memcpy (msg->buf, dmsg->buf, msg->buf_len); gu_fifo_release (conn->gc_q); } } else { ret = -EBADFD; // closing gu_debug ("Returning %d: %s", ret, strerror(-ret)); } } else { ret = -EBADFD; } return ret; }
gcs_group_state_t gcs_group_handle_state_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) { if (GCS_GROUP_WAIT_STATE_MSG == group->state) { gcs_state_msg_t* state = gcs_state_msg_read (msg->buf, msg->size); if (state) { const gu_uuid_t* state_uuid = gcs_state_msg_uuid (state); if (!gu_uuid_compare(&group->state_uuid, state_uuid)) { gu_info ("STATE EXCHANGE: got state msg: "GU_UUID_FORMAT " from %d (%s)", GU_UUID_ARGS(state_uuid), msg->sender_idx, gcs_state_msg_name(state)); if (gu_log_debug) group_print_state_debug(state); gcs_node_record_state (&group->nodes[msg->sender_idx], state); group_post_state_exchange (group); } else { gu_debug ("STATE EXCHANGE: stray state msg: "GU_UUID_FORMAT " from node %ld (%s), current state UUID: " GU_UUID_FORMAT, GU_UUID_ARGS(state_uuid), msg->sender_idx, gcs_state_msg_name(state), GU_UUID_ARGS(&group->state_uuid)); if (gu_log_debug) group_print_state_debug(state); gcs_state_msg_destroy (state); } } else { gu_warn ("Could not parse state message from node %d", msg->sender_idx, group->nodes[msg->sender_idx].name); } } return group->state; }
int gu_choice_next(GuChoice* ch, int n_choices) { gu_assert(n_choices >= 0); gu_assert(ch->path_idx <= gu_buf_length(ch->path)); if (n_choices == 0) { return -1; } int i = 0; if (gu_buf_length(ch->path) > ch->path_idx) { i = (int) gu_buf_get(ch->path, size_t, ch->path_idx); gu_assert(i <= n_choices); } else { gu_buf_push(ch->path, size_t, n_choices); i = n_choices; } int ret = (i == 0) ? -1 : n_choices - i; gu_debug("%p@%d: %d", ch, ch->path_idx, ret); ch->path_idx++; return ret; }
gcs_group_state_t gcs_group_handle_comp_msg (gcs_group_t* group, const gcs_comp_msg_t* comp) { long new_idx, old_idx; gcs_node_t* new_nodes = NULL; ulong new_memb = 0; const bool prim_comp = gcs_comp_msg_primary (comp); const bool bootstrap = gcs_comp_msg_bootstrap(comp); const long new_my_idx = gcs_comp_msg_self (comp); const long new_nodes_num = gcs_comp_msg_num (comp); group_check_comp_msg (prim_comp, new_my_idx, new_nodes_num); if (new_my_idx >= 0) { gu_info ("New COMPONENT: primary = %s, bootstrap = %s, my_idx = %ld, " "memb_num = %ld", prim_comp ? "yes" : "no", bootstrap ? "yes" : "no", new_my_idx, new_nodes_num); new_nodes = group_nodes_init (group, comp); if (!new_nodes) { gu_fatal ("Could not allocate memory for %ld-node component.", gcs_comp_msg_num (comp)); assert(0); return (gcs_group_state_t)-ENOMEM; } if (GCS_GROUP_PRIMARY == group->state) { gu_debug ("#281: Saving %s over %s", gcs_node_state_to_str(group->nodes[group->my_idx].status), gcs_node_state_to_str(group->prim_state)); group->prim_state = group->nodes[group->my_idx].status; } } else { // Self-leave message gu_info ("Received self-leave message."); assert (0 == new_nodes_num); assert (!prim_comp); } if (prim_comp) { /* Got PRIMARY COMPONENT - Hooray! */ assert (new_my_idx >= 0); if (group->state == GCS_GROUP_PRIMARY) { /* we come from previous primary configuration, relax */ } else if (bootstrap) { /* Is there need to initialize something else in this case? */ group->nodes[group->my_idx].bootstrap = true; } else { const bool first_component = #ifndef GCS_CORE_TESTING (1 == group->num) && !strcmp (NODE_NO_ID, group->nodes[0].id); #else (1 == group->num); #endif if (1 == new_nodes_num && first_component) { /* bootstrap new configuration */ assert (GCS_GROUP_NON_PRIMARY == group->state); assert (1 == group->num); assert (0 == group->my_idx); // This bootstraps initial primary component for state exchange gu_uuid_generate (&group->prim_uuid, NULL, 0); group->prim_seqno = 0; group->prim_num = 1; group->state = GCS_GROUP_PRIMARY; if (group->act_id < 0) { // no history provided: start a new one group->act_id = GCS_SEQNO_NIL; gu_uuid_generate (&group->group_uuid, NULL, 0); gu_info ("Starting new group from scratch: "GU_UUID_FORMAT, GU_UUID_ARGS(&group->group_uuid)); } // the following should be removed under #474 group->nodes[0].status = GCS_NODE_STATE_JOINED; /* initialize node ID to the one given by the backend - this way * we'll be recognized as coming from prev. conf. in node array * remap below */ strncpy ((char*)group->nodes[0].id, new_nodes[0].id, sizeof (new_nodes[0].id) - 1); group->nodes[0].segment = new_nodes[0].segment; } } } else { group_go_non_primary (group); } /* Remap old node array to new one to preserve action continuity */ for (new_idx = 0; new_idx < new_nodes_num; new_idx++) { /* find member index in old component by unique member id */ for (old_idx = 0; old_idx < group->num; old_idx++) { // just scan through old group if (!strcmp(group->nodes[old_idx].id, new_nodes[new_idx].id)) { /* the node was in previous configuration with us */ /* move node context to new node array */ gcs_node_move (&new_nodes[new_idx], &group->nodes[old_idx]); break; } } /* if wasn't found in new configuration, new member - * need to do state exchange */ new_memb |= (old_idx == group->num); } /* free old nodes array */ group_nodes_free (group); group->my_idx = new_my_idx; group->num = new_nodes_num; group->nodes = new_nodes; if (gcs_comp_msg_primary(comp) || bootstrap) { /* TODO: for now pretend that we always have new nodes and perform * state exchange because old states can carry outdated node status. * (also protocol voting needs to be redone) * However this means aborting ongoing actions. Find a way to avoid * this extra state exchange. Generate new state messages on behalf * of other nodes? see #238 */ new_memb = true; /* if new nodes joined, reset ongoing actions and state messages */ if (new_memb) { group_nodes_reset (group); group->state = GCS_GROUP_WAIT_STATE_UUID; group->state_uuid = GU_UUID_NIL; // prepare for state exchange } else { if (GCS_GROUP_PRIMARY == group->state) { /* since we don't have any new nodes since last PRIMARY, we skip state exchange */ group_post_state_exchange (group); } } group_redo_last_applied (group); } return group->state; }
static void gu_map_resize(GuMap* map) { GuMapData* data = &map->data; GuMapData old_data = *data; size_t req_entries = gu_twin_prime_sup(GU_MAX(11, map->data.n_occupied * 4 / 3 + 1)); size_t key_size = map->key_size; size_t key_alloc = 0; data->keys = gu_mem_buf_alloc(req_entries * key_size, &key_alloc); size_t value_size = map->value_size; size_t value_alloc = 0; if (value_size) { data->values = gu_mem_buf_alloc(req_entries * value_size, &value_alloc); memset(data->values, 0, value_alloc); } data->n_entries = gu_twin_prime_inf(value_size ? GU_MIN(key_alloc / key_size, value_alloc / value_size) : key_alloc / key_size); switch (map->kind) { case GU_MAP_GENERIC: case GU_MAP_WORD: memset(data->keys, 0, key_alloc); break; case GU_MAP_ADDR: for (size_t i = 0; i < data->n_entries; i++) { ((const void**)data->keys)[i] = NULL; } break; default: gu_impossible(); } gu_assert(data->n_entries > data->n_occupied); gu_debug("Resized to %d entries", data->n_entries); data->n_occupied = 0; data->zero_idx = SIZE_MAX; for (size_t i = 0; i < old_data.n_entries; i++) { if (gu_map_entry_is_free(map, &old_data, i)) { continue; } void* old_key = &old_data.keys[i * key_size]; if (map->kind == GU_MAP_ADDR) { old_key = *(void**)old_key; } void* old_value = &old_data.values[i * value_size]; memcpy(gu_map_insert(map, old_key), old_value, map->value_size); } gu_mem_buf_free(old_data.keys); if (value_size) { gu_mem_buf_free(old_data.values); } }
/*! Processes state messages and sets group parameters accordingly */ static void group_post_state_exchange (gcs_group_t* group) { const gcs_state_msg_t* states[group->num]; gcs_state_quorum_t* quorum = &group->quorum; bool new_exchange = gu_uuid_compare (&group->state_uuid, &GU_UUID_NIL); long i; /* Collect state messages from nodes. */ /* Looping here every time is suboptimal, but simply counting state messages * is not straightforward too: nodes may disappear, so the final count may * include messages from the disappeared nodes. * Let's put it this way: looping here is reliable and not that expensive.*/ for (i = 0; i < group->num; i++) { states[i] = group->nodes[i].state_msg; if (NULL == states[i] || (new_exchange && gu_uuid_compare (&group->state_uuid, gcs_state_msg_uuid(states[i])))) return; // not all states from THIS state exch. received, wait } gu_debug ("STATE EXCHANGE: "GU_UUID_FORMAT" complete.", GU_UUID_ARGS(&group->state_uuid)); gcs_state_msg_get_quorum (states, group->num, quorum); if (quorum->version >= 0) { if (quorum->version < 2) { group->last_applied_proto_ver = 0; } else { group->last_applied_proto_ver = 1; } } else { gu_fatal ("Negative quorum version: %d", quorum->version); abort(); } // Update each node state based on quorum outcome: // is it up to date, does it need SST and stuff for (i = 0; i < group->num; i++) { gcs_node_update_status (&group->nodes[i], quorum); } if (quorum->primary) { // primary configuration if (new_exchange) { // new state exchange happened group->state = GCS_GROUP_PRIMARY; group->act_id = quorum->act_id; group->conf_id = quorum->conf_id + 1; group->group_uuid = quorum->group_uuid; group->prim_uuid = group->state_uuid; group->state_uuid = GU_UUID_NIL; } else { // no state exchange happend, processing old state messages assert (GCS_GROUP_PRIMARY == group->state); group->conf_id++; } group->prim_seqno = group->conf_id; group->prim_num = 0; for (i = 0; i < group->num; i++) { group->prim_num += gcs_node_is_joined (group->nodes[i].status); } assert (group->prim_num > 0); } else { // non-primary configuration group_go_non_primary (group); } gu_info ("Quorum results:" "\n\tversion = %u," "\n\tcomponent = %s," "\n\tconf_id = %lld," "\n\tmembers = %d/%d (joined/total)," "\n\tact_id = %lld," "\n\tlast_appl. = %lld," "\n\tprotocols = %d/%d/%d (gcs/repl/appl)," "\n\tgroup UUID = "GU_UUID_FORMAT, quorum->version, quorum->primary ? "PRIMARY" : "NON-PRIMARY", quorum->conf_id, group->prim_num, group->num, quorum->act_id, group->last_applied, quorum->gcs_proto_ver, quorum->repl_proto_ver, quorum->appl_proto_ver, GU_UUID_ARGS(&quorum->group_uuid)); group_check_donor(group); }
/*! * Handle action fragment * * Unless a whole action is returned, contents of act is undefined * * In order to optimize branch prediction used gu_likely macros and odered and * nested if/else blocks according to branch probability. * * @return 0 - success, * size of action - success, full action received, * negative - error. * * TODO: this function is too long, figure out a way to factor it into several * smaller ones. Note that it is called for every GCS_MSG_ACTION message * so it should be optimal. */ ssize_t gcs_defrag_handle_frag (gcs_defrag_t* df, const gcs_act_frag_t* frg, struct gcs_act* act, bool local) { if (df->received) { /* another fragment of existing action */ df->frag_no++; /* detect possible error condition */ if (gu_unlikely((df->sent_id != frg->act_id) || (df->frag_no != frg->frag_no))) { if (local && df->reset && (df->sent_id == frg->act_id) && (0 == frg->frag_no)) { /* df->sent_id was aborted halfway and is being taken care of * by the sender thread. Forget about it. * Reinit counters and continue with the new action. * Note that for local actions no memory allocation is made.*/ gu_debug ("Local action %lld reset.", frg->act_id); df->frag_no = 0; df->received = 0; df->tail = df->head; df->reset = false; if (df->size != frg->act_size) { df->size = frg->act_size; #ifndef GCS_FOR_GARB if (df->cache !=NULL) { gcache_free (df->cache, df->head); } else { free ((void*)df->head); } DF_ALLOC(); #endif /* GCS_FOR_GARB */ } } else { gu_error ("Unordered fragment received. Protocol error."); gu_error ("Expected: %llu:%ld, received: %llu:%ld", df->sent_id, df->frag_no, frg->act_id, frg->frag_no); gu_error ("Contents: '%.*s'", frg->frag_len, (char*)frg->frag); df->frag_no--; // revert counter in hope that we get good frag assert(0); return -EPROTO; } } } else { /* new action */ if (gu_likely(0 == frg->frag_no)) { df->size = frg->act_size; df->sent_id = frg->act_id; df->reset = false; #ifndef GCS_FOR_GARB DF_ALLOC(); #else /* we don't store actions locally at all */ df->head = NULL; df->tail = df->head; #endif } else { /* not a first fragment */ if (!local && df->reset) { /* can happen after configuration change, just ignore this message calmly */ gu_debug ("Ignoring fragment %lld:%ld after action reset", frg->act_id, frg->frag_no); return 0; } else { ((char*)frg->frag)[frg->frag_len - 1] = '\0'; gu_error ("Unordered fragment received. Protocol error."); gu_error ("Expected: any:0(first), received: %lld:%ld", frg->act_id, frg->frag_no); gu_error ("Contents: '%s', local: %s, reset: %s", (char*)frg->frag, local ? "yes" : "no", df->reset ? "yes" : "no"); assert(0); return -EPROTO; } } } df->received += frg->frag_len; assert (df->received <= df->size); #ifndef GCS_FOR_GARB assert (df->tail); memcpy (df->tail, frg->frag, frg->frag_len); df->tail += frg->frag_len; #else /* we skip memcpy since have not allocated any buffer */ assert (NULL == df->tail); assert (NULL == df->head); #endif if (df->received == df->size) { act->buf = df->head; act->buf_len = df->received; gcs_defrag_init (df, df->cache); return act->buf_len; } else { return 0; } }