static void handle_missing_ack(consensus_component* comp,void* data){ missing_ack* msg = data; request_record* origin = (request_record*)msg->data; SYS_LOG(comp,"Node %d Handle Missing Ack From Node %d.\n", comp->node_id,msg->node_id); if(view_stamp_comp(comp->highest_committed_vs,&msg->missing_vs)>=0){ goto handle_missing_ack_exit; }else{ db_key_type record_no = vstol(&msg->missing_vs); request_record* record_data = NULL; size_t data_size; retrieve_record(comp->db_ptr,sizeof(record_no),&record_no,&data_size,(void**)&record_data); if(record_data!=NULL){ goto handle_missing_ack_exit; } record_data =(request_record*)malloc(REQ_RECORD_SIZE(origin)); if(record_data==NULL){ goto handle_missing_ack_exit; } gettimeofday(&record_data->created_time,NULL); record_data->data_size = origin->data_size; memcpy(record_data->data,origin->data,origin->data_size); store_record(comp->db_ptr,sizeof(record_no),&record_no,REQ_RECORD_SIZE(record_data),record_data); } try_to_execute(comp); handle_missing_ack_exit: return; };
static void deliver_msg_data(consensus_component* comp,view_stamp* vs){ // in order to accelerate deliver process of the program // we may just give the record number instead of the real data // to the proxy, and then the proxy will take the overhead of database operation db_key_type vstokey = vstol(vs); if(comp->deliver_mode==1) { request_record* data = NULL; size_t data_size=0; retrieve_record(comp->db_ptr,sizeof(db_key_type),&vstokey,&data_size,(void**)&data); SYS_LOG(comp,"Node %d Deliver View Stamp %u : %u To The User.\n", comp->node_id,vs->view_id,vs->req_id); STAT_LOG(comp,"Request:%lu\n",vstokey); if(NULL!=data){ if(comp->ucb!=NULL){ comp->ucb(data->data_size,data->data,comp->up_para); }else{ SYS_LOG(comp,"No Such Call Back Func.\n"); } } }else{ STAT_LOG(comp,"Request %lu.\n",vstokey); if(comp->ucb!=NULL){ comp->ucb(sizeof(db_key_type),&vstokey,comp->up_para); }else{ SYS_LOG(comp,"No Such Call Back Func.\n"); } } return; }
static void handle_accept_ack(consensus_component* comp,void* data){ accept_ack* msg = data; // if currently the node is not the leader, then it should ignore all the // accept ack, because that can must be the msg from previous view SYS_LOG(comp,"Node %d Handle Accept Ack From Node %u.\n", comp->node_id,msg->node_id); if(comp->my_role!=LEADER){ goto handle_accept_ack_exit; } // the request has reached quorum if(view_stamp_comp(&msg->msg_vs,comp->highest_committed_vs)<=0){ goto handle_accept_ack_exit; } db_key_type record_no = vstol(&msg->msg_vs); request_record* record_data = NULL; size_t data_size; retrieve_record(comp->db_ptr,sizeof(record_no),&record_no,&data_size,(void**)&record_data); if(record_data==NULL){ SYS_LOG(comp,"Received Ack To Non-Exist Record %lu.\n", record_no); goto handle_accept_ack_exit; } update_record(record_data,msg->node_id); // we do not care about whether the update is successful, otherwise this can // be treated as a message loss store_record(comp->db_ptr,sizeof(record_no),&record_no,REQ_RECORD_SIZE(record_data),record_data); handle_accept_ack_exit: try_to_execute(comp); return; };
void mgr_on_accept(int fd, event_manager* ev_mgr) { if (internal_threads(ev_mgr->excluded_threads, pthread_self())) return; uint32_t leader_id = get_leader_id(ev_mgr->con_node); if (ev_mgr->node_id == leader_id) { leader_tcp_pair* new_conn = malloc(sizeof(leader_tcp_pair)); memset(new_conn,0,sizeof(leader_tcp_pair)); new_conn->key = fd; HASH_ADD_INT(ev_mgr->leader_tcp_map, key, new_conn); rsm_op(ev_mgr->con_node, 0, NULL, P_TCP_CONNECT, &new_conn->vs); } else { request_record* retrieve_data = NULL; size_t data_size; while (retrieve_data == NULL){ retrieve_record(ev_mgr->db_ptr, sizeof(db_key_type), &ev_mgr->cur_rec, &data_size, (void**)&retrieve_data); } replica_tcp_pair* ret = NULL; HASH_FIND(hh, ev_mgr->replica_tcp_map, &retrieve_data->clt_id, sizeof(view_stamp), ret); ret->s_p = fd; ret->accepted = 1; } return; }
// leader has another responsibility to update the highest request that can be executed, // and if the leader is also synchronous, it can execute the record in this stage static void leader_try_to_execute(consensus_component* comp){ SYS_LOG(comp, "highest_seen_req_id %lu.\n", comp->highest_seen_vs->req_id); SYS_LOG(comp, "highest_seen_view_id %lu.\n", comp->highest_seen_vs->view_id); SYS_LOG(comp, "highest_to_commit_vs_req_id %lu.\n", comp->highest_to_commit_vs->req_id); SYS_LOG(comp, "highest_to_commit_vs_view_id %lu.\n", comp->highest_to_commit_vs->view_id); db_key_type start; db_key_type end = vstol(comp->highest_seen_vs);; size_t data_size; view_stamp temp_boundary; view_boundary* boundary_record = NULL; if(comp->highest_seen_vs->view_id != comp->highest_to_commit_vs->view_id){ // address the boundary assert(comp->highest_to_commit_vs->view_id + 1 == comp->highest_seen_vs->view_id); comp->highest_to_commit_vs->view_id += 1; comp->highest_to_commit_vs->req_id = 0; comp->highest_committed_vs->view_id = comp->highest_to_commit_vs->view_id; comp->highest_committed_vs->req_id = comp->highest_to_commit_vs->req_id; start = vstol(comp->highest_to_commit_vs); } else{ start = vstol(comp->highest_to_commit_vs)+1; } int exec_flag = (!view_stamp_comp(comp->highest_committed_vs,comp->highest_to_commit_vs)); request_record* record_data = NULL; SYS_LOG(comp,"The Leader Tries To Execute.\n"); SYS_LOG(comp,"The Start Value Is %lu.\n",start); SYS_LOG(comp,"The End Value Is %lu.\n",end); for(db_key_type index=start;index<=end;index++){ retrieve_record(comp->db_ptr,sizeof(index),&index,&data_size,(void**)&record_data); assert(record_data!=NULL && "The Record Should Be Inserted By The Node Itself!"); if(reached_quorum(record_data,comp->group_size)){ view_stamp temp = ltovs(index); SYS_LOG(comp,"Node %d : View Stamp %u : %u Has Reached Quorum.\n", comp->node_id,temp.view_id,temp.req_id); SYS_LOG(comp,"Before Node %d Inc Execute %u : %u.\n", comp->node_id, comp->highest_to_commit_vs->view_id, comp->highest_to_commit_vs->req_id); view_stamp_inc(comp->highest_to_commit_vs); SYS_LOG(comp,"After Node %d Inc Execute %u : %u.\n", comp->node_id, comp->highest_to_commit_vs->view_id, comp->highest_to_commit_vs->req_id); if(exec_flag){ view_stamp vs = ltovs(index); deliver_msg_data(comp,&vs); view_stamp_inc(comp->highest_committed_vs); } }else{ return; } } }
static void update_state(db_key_type index,void* arg){ event_manager* ev_mgr = arg; request_record* retrieve_data = NULL; size_t data_size; retrieve_record(ev_mgr->db_ptr, sizeof(index), &index, &data_size, (void**)&retrieve_data); ev_mgr->cur_rec = index; FILE* output = NULL; if(ev_mgr->req_log){ output = ev_mgr->req_log_file; } switch(retrieve_data->type){ case P_TCP_CONNECT: if(output!=NULL){ fprintf(output,"Operation: Connects.\n"); } do_action_tcp_connect(retrieve_data->clt_id,arg); break; case P_UDP_CONNECT: if(output!=NULL){ fprintf(output,"Operation: Connects.\n"); } do_action_udp_connect(retrieve_data->clt_id,arg); break; case P_SEND: if(output!=NULL){ fprintf(output,"Operation: Sends data.\n"); } do_action_send(retrieve_data,arg); break; case P_CLOSE: if(output!=NULL){ fprintf(output,"Operation: Closes.\n"); } do_action_close(retrieve_data->clt_id,arg); break; case P_NOP: if(output!=NULL){ fprintf(output,"Operation: NOP.\n"); } break; // nop is only for sending the close() consensus result to the replicas default: break; } return; }
static void* build_missing_ack(consensus_component* comp,view_stamp* vs){ missing_ack* msg = NULL; SYS_LOG(comp,"In Missing Ack, The View Stamp Is %u : %u.\n", vs->view_id,vs->req_id); db_key_type record_no = vstol(vs); request_record* record_data = NULL; size_t data_size; retrieve_record(comp->db_ptr,sizeof(record_no),&record_no,&data_size,(void**)&record_data); if(NULL!=record_data){ int memsize = MISSING_ACK_SIZE(record_data); msg=(missing_ack*)malloc(memsize); if(NULL!=msg){ msg->node_id = comp->node_id; msg->data_size = memsize; msg->header.msg_type = MISSING_ACK; memcpy(msg->data,record_data,memsize); msg->missing_vs = *vs; } } return msg; };
void consensus_make_progress(struct consensus_component_t* comp){ if(LEADER!=comp->my_role){ goto make_progress_exit; } leader_try_to_execute(comp); SYS_LOG(comp,"Let's Make Progress.\n"); if((view_stamp_comp(comp->highest_committed_vs,comp->highest_seen_vs)<0)&& (comp->highest_seen_vs->view_id==comp->cur_view->view_id)){ view_stamp temp; temp.view_id = comp->cur_view->view_id; temp.req_id = 0; if(view_stamp_comp(&temp,comp->highest_committed_vs)<0){ temp = *(comp->highest_committed_vs); } temp.req_id++; record_index_type start = vstol(&temp); record_index_type end = vstol(comp->highest_seen_vs); for(record_index_type index = start;index<=end;index++){ request_record* record_data = NULL; size_t data_size=0; view_stamp temp_vs = ltovs(index); retrieve_record(comp->db_ptr,sizeof(db_key_type),&index,&data_size,(void**)&record_data); if(!reached_quorum(record_data,comp->group_size)){ accept_req* msg = build_accept_req(comp,REQ_RECORD_SIZE(record_data),record_data,&temp_vs); if(NULL==msg){ continue; }else{ comp->uc(comp->my_node,ACCEPT_REQ_SIZE(msg),msg,-1); free(msg); } } } } force_exec* msg = build_force_exec(comp); if(NULL==msg){goto make_progress_exit;} comp->uc(comp->my_node,FORCE_EXEC_SIZE,msg,-1); free(msg); make_progress_exit: return; };
void *handle_accept_req(void* arg) { consensus_component* comp = arg; db_key_type start; db_key_type end; db_key_type index; size_t data_size; request_record* retrieve_data = NULL; int sock; struct timeval start_time; struct timeval end_time; unsigned long e_usec; while (1) { log_entry* new_entry = (log_entry*)((char*)shared_memory.shm[comp->node_id] + shared_memory.log->tail); if (new_entry->req_canbe_exed.view_id != 0)//TODO atmoic opeartion { gettimeofday(&start, 0); if(new_entry->msg_vs.view_id < comp->cur_view.view_id){ // TODO //goto reloop; } // if we this message is not from the current leader if(new_entry->msg_vs.view_id == comp->cur_view.view_id && new_entry->node_id != comp->cur_view.leader_id){ // TODO //goto reloop; } // update highest seen request if(view_stamp_comp(new_entry->msg_vs, comp->highest_seen_vs) > 0){ comp->highest_seen_vs = new_entry->msg_vs; } db_key_type record_no = vstol(new_entry->msg_vs); request_record* record_data = (request_record*)malloc(new_entry->data_size + sizeof(request_record)); gettimeofday(&record_data->created_time, NULL); record_data->data_size = new_entry->data_size; memcpy(record_data->data, new_entry->data, new_entry->data_size); // record the data persistently store_record(comp->db_ptr, sizeof(record_no), &record_no, REQ_RECORD_SIZE(record_data), record_data); uint64_t offset = shared_memory.tail + sizeof(accept_ack) * comp->node_id; shared_memory.tail = shared_memory.tail + log_entry_len(new_entry); accept_ack* reply = (accept_ack*)((char*)new_entry + ACCEPT_ACK_SIZE * comp->node_id); reply->node_id = comp->node_id; reply->msg_vs.view_id = new_entry->msg_vs.view_id; reply->msg_vs.req_id = new_entry->msg_vs.req_id; memcpy((void*)((char*)shared_memory.shm[new_entry->node_id] + offset), reply, ACCEPT_ACK_SIZE); free(record_data); if(view_stamp_comp(new_entry->req_canbe_exed, comp->committed) > 0) { sock = socket(AF_INET, SOCK_STREAM, 0); connect(sock, (struct sockaddr*)&comp->sys_addr.c_addr, comp->sys_addr.c_sock_len); //TODO: why? Broken pipe. Maybe the server closes the socket start = vstol(comp->committed)+1; end = vstol(new_entry->req_canbe_exed); for(index = start; index <= end; index++) { retrieve_record(comp->db_ptr, sizeof(index), &index, &data_size, (void**)&retrieve_data); send(sock, retrieve_data->data, retrieve_data->data_size, 0); } comp->committed = new_entry->req_canbe_exed; } gettimeofday(&end, 0); e_usec = ((end.tv_sec * 1000000) + end.tv_usec) - ((start.tv_sec * 1000000) + start.tv_usec); CON_LOG(comp, "%lu\n", e_usec); } } };
static void try_to_execute(consensus_component* comp){ // there we have assumption, for the currently leader,whose commited request // and highest request to execute must be in the same view, otherwise, the // leader cannot be the leader SYS_LOG(comp,"Node %d Try To Execute.\n", comp->node_id); if(comp->cur_view->view_id==0){ SYS_LOG(comp,"Node %d Currently Is A NULL Node\n", comp->node_id); goto try_to_execute_exit; } if(LEADER==comp->my_role){ leader_try_to_execute(comp); } db_key_type start = vstol(comp->highest_committed_vs)+1; db_key_type end; view_stamp temp_boundary; view_boundary* boundary_record = NULL; size_t data_size; if(comp->highest_committed_vs->view_id!=comp->highest_to_commit_vs->view_id){ SYS_LOG(comp, "highest_to_commit_vs_req_id %lu.\n", comp->highest_to_commit_vs->req_id); SYS_LOG(comp, "highest_to_commit_vs_view_id %lu.\n", comp->highest_to_commit_vs->view_id); SYS_LOG(comp, "highest_committed_vs_req_id %lu.\n", comp->highest_committed_vs->req_id); SYS_LOG(comp, "highest_committed_vs_view_id %lu.\n", comp->highest_committed_vs->view_id); //address the boundary view_stamp bound; bound.view_id = comp->highest_committed_vs->view_id+1; bound.req_id = 0; db_key_type bound_record_no = vstol(&bound); retrieve_record(comp->db_ptr,sizeof(bound_record_no),&bound_record_no,&data_size,(void**)&boundary_record); if(NULL==boundary_record){ SYS_LOG(comp, "Missing bounday_record.\n"); send_missing_req(comp,&bound); goto try_to_execute_exit; } temp_boundary.view_id = boundary_record->view_id; temp_boundary.req_id = boundary_record->req_id; SYS_LOG(comp, "boundary_record_req_id %lu.\n", boundary_record->req_id); SYS_LOG(comp, "boundary_record_view_id %lu.\n", boundary_record->view_id); end = vstol(&temp_boundary); }else{ end = vstol(comp->highest_to_commit_vs); } SYS_LOG(comp,"The End Value Is %lu.\n", end); request_record* record_data = NULL; // we can only execute thins in sequence int exec_flag = 1; view_stamp missing_vs; for(db_key_type index = start;index<=end;index++){ missing_vs = ltovs(index); if(0!=retrieve_record(comp->db_ptr,sizeof(index),&index, &data_size,(void**)&record_data)){ exec_flag = 0; send_missing_req(comp,&missing_vs); } if(exec_flag){ deliver_msg_data(comp,&missing_vs); // record_data->is_closed = 1; // store_record(comp->db_ptr,sizeof(index),&index,REQ_RECORD_SIZE(record_data),record_data); view_stamp_inc(comp->highest_committed_vs); } record_data = NULL; } if(NULL!=boundary_record){ temp_boundary.view_id = boundary_record->view_id; temp_boundary.req_id = boundary_record->req_id; db_key_type op1 = vstol(comp->highest_committed_vs); db_key_type op2 = vstol(&temp_boundary); if(op1==op2){ cross_view(comp->highest_committed_vs); } } try_to_execute_exit: return; };