示例#1
0
static void handle_missing_ack(consensus_component* comp,void* data){
    
    missing_ack* msg = data;
    request_record* origin = (request_record*)msg->data;
    SYS_LOG(comp,"Node %d Handle Missing Ack From Node %d.\n",
            comp->node_id,msg->node_id);
    if(view_stamp_comp(comp->highest_committed_vs,&msg->missing_vs)>=0){
        goto handle_missing_ack_exit;
    }else{
       db_key_type record_no = vstol(&msg->missing_vs);
       request_record* record_data = NULL;
       size_t data_size;
       retrieve_record(comp->db_ptr,sizeof(record_no),&record_no,&data_size,(void**)&record_data);

       if(record_data!=NULL){
           goto handle_missing_ack_exit;
       }

       record_data =(request_record*)malloc(REQ_RECORD_SIZE(origin));

       if(record_data==NULL){
           goto handle_missing_ack_exit;
       }

       gettimeofday(&record_data->created_time,NULL);
       record_data->data_size = origin->data_size;
       memcpy(record_data->data,origin->data,origin->data_size);
       store_record(comp->db_ptr,sizeof(record_no),&record_no,REQ_RECORD_SIZE(record_data),record_data);
    }
    try_to_execute(comp);
handle_missing_ack_exit:
    
    return;
};
示例#2
0
static void handle_accept_ack(consensus_component* comp,void* data){
    accept_ack* msg = data;
    // if currently the node is not the leader, then it should ignore all the
    // accept ack, because that can must be the msg from previous view
    SYS_LOG(comp,"Node %d Handle Accept Ack From Node %u.\n",
            comp->node_id,msg->node_id);
    if(comp->my_role!=LEADER){
        goto handle_accept_ack_exit;
    }
    // the request has reached quorum
    if(view_stamp_comp(&msg->msg_vs,comp->highest_committed_vs)<=0){
        goto handle_accept_ack_exit;
    }
    db_key_type record_no = vstol(&msg->msg_vs);
    request_record* record_data = NULL;
    size_t data_size;
    retrieve_record(comp->db_ptr,sizeof(record_no),&record_no,&data_size,(void**)&record_data);
    if(record_data==NULL){
        SYS_LOG(comp,"Received Ack To Non-Exist Record %lu.\n",
                record_no);
        goto handle_accept_ack_exit;
    }
    update_record(record_data,msg->node_id);
    // we do not care about whether the update is successful, otherwise this can
    // be treated as a message loss
    store_record(comp->db_ptr,sizeof(record_no),&record_no,REQ_RECORD_SIZE(record_data),record_data);
handle_accept_ack_exit:
    try_to_execute(comp);
    return;
};
示例#3
0
// leader has another responsibility to update the highest request that can be executed,
// and if the leader is also synchronous, it can execute the record in this stage
static void leader_try_to_execute(consensus_component* comp){
    SYS_LOG(comp, "highest_seen_req_id %lu.\n", comp->highest_seen_vs->req_id);
    SYS_LOG(comp, "highest_seen_view_id %lu.\n", comp->highest_seen_vs->view_id);
    SYS_LOG(comp, "highest_to_commit_vs_req_id %lu.\n", comp->highest_to_commit_vs->req_id);
    SYS_LOG(comp, "highest_to_commit_vs_view_id %lu.\n", comp->highest_to_commit_vs->view_id);
    db_key_type start;
    db_key_type end = vstol(comp->highest_seen_vs);;

    size_t data_size;
    view_stamp temp_boundary;
    view_boundary* boundary_record = NULL;
    if(comp->highest_seen_vs->view_id != comp->highest_to_commit_vs->view_id){
        // address the boundary
        
        assert(comp->highest_to_commit_vs->view_id + 1 == comp->highest_seen_vs->view_id);
        comp->highest_to_commit_vs->view_id += 1;
        comp->highest_to_commit_vs->req_id = 0;
        comp->highest_committed_vs->view_id = comp->highest_to_commit_vs->view_id;
        comp->highest_committed_vs->req_id = comp->highest_to_commit_vs->req_id;
        start = vstol(comp->highest_to_commit_vs); 
    } else{
        start = vstol(comp->highest_to_commit_vs)+1;
    }

    int exec_flag = (!view_stamp_comp(comp->highest_committed_vs,comp->highest_to_commit_vs));
    request_record* record_data = NULL;
    SYS_LOG(comp,"The Leader Tries To Execute.\n");
    SYS_LOG(comp,"The Start Value Is %lu.\n",start);
    SYS_LOG(comp,"The End Value Is %lu.\n",end);
    for(db_key_type index=start;index<=end;index++){
        retrieve_record(comp->db_ptr,sizeof(index),&index,&data_size,(void**)&record_data);
        assert(record_data!=NULL && "The Record Should Be Inserted By The Node Itself!");
        if(reached_quorum(record_data,comp->group_size)){
            view_stamp temp = ltovs(index);
            SYS_LOG(comp,"Node %d : View Stamp %u : %u Has Reached Quorum.\n",
                    comp->node_id,temp.view_id,temp.req_id);
            
            SYS_LOG(comp,"Before Node %d Inc Execute  %u : %u.\n",
                    comp->node_id,
                    comp->highest_to_commit_vs->view_id,
                    comp->highest_to_commit_vs->req_id);
            view_stamp_inc(comp->highest_to_commit_vs);
            SYS_LOG(comp,"After Node %d Inc Execute  %u : %u.\n",
                    comp->node_id,
                    comp->highest_to_commit_vs->view_id,
                    comp->highest_to_commit_vs->req_id);

            if(exec_flag){
                view_stamp vs = ltovs(index);
                deliver_msg_data(comp,&vs);
                view_stamp_inc(comp->highest_committed_vs);
            }
        }else{
            return;
        }
    }
}
示例#4
0
static void handle_force_exec(consensus_component* comp,void* data){
    force_exec* msg = data;
    if(msg->node_id!=comp->cur_view->leader_id){
        goto handle_force_exec_exit;
    }
    if(view_stamp_comp(comp->highest_to_commit_vs,&msg->highest_committed_op)<0){
        *(comp->highest_to_commit_vs)=msg->highest_committed_op;
        try_to_execute(comp);
    }
handle_force_exec_exit:
    return;
};
示例#5
0
void consensus_make_progress(struct consensus_component_t* comp){
    if(LEADER!=comp->my_role){
        goto make_progress_exit;
    }
    leader_try_to_execute(comp);
    SYS_LOG(comp,"Let's Make Progress.\n");
    if((view_stamp_comp(comp->highest_committed_vs,comp->highest_seen_vs)<0)&& (comp->highest_seen_vs->view_id==comp->cur_view->view_id)){
        view_stamp temp;
        temp.view_id = comp->cur_view->view_id;
        temp.req_id = 0;
        if(view_stamp_comp(&temp,comp->highest_committed_vs)<0){
            temp = *(comp->highest_committed_vs);
        }
        temp.req_id++;
        record_index_type start =  vstol(&temp);
        record_index_type end = vstol(comp->highest_seen_vs);
        for(record_index_type index = start;index<=end;index++){
            request_record* record_data = NULL;
            size_t data_size=0;
            view_stamp temp_vs = ltovs(index);
            retrieve_record(comp->db_ptr,sizeof(db_key_type),&index,&data_size,(void**)&record_data);
            if(!reached_quorum(record_data,comp->group_size)){
                accept_req* msg = build_accept_req(comp,REQ_RECORD_SIZE(record_data),record_data,&temp_vs);
                if(NULL==msg){
                    continue;
                }else{
                    comp->uc(comp->my_node,ACCEPT_REQ_SIZE(msg),msg,-1);
                    free(msg);
                }
            }
        } 
    }
    force_exec* msg = build_force_exec(comp); 
    if(NULL==msg){goto make_progress_exit;}
    comp->uc(comp->my_node,FORCE_EXEC_SIZE,msg,-1);
    free(msg);
make_progress_exit:
    return;
};
示例#6
0
void *handle_accept_req(void* arg)
{
    consensus_component* comp = arg;

    db_key_type start;
    db_key_type end;
    db_key_type index;

    size_t data_size;
    request_record* retrieve_data = NULL;

    int sock;

    struct timeval start_time;
    struct timeval end_time;
    unsigned long e_usec;
    
    while (1)
    {
        log_entry* new_entry = (log_entry*)((char*)shared_memory.shm[comp->node_id] + shared_memory.log->tail);
        
        if (new_entry->req_canbe_exed.view_id != 0)//TODO atmoic opeartion
        {
            gettimeofday(&start, 0);
            if(new_entry->msg_vs.view_id < comp->cur_view.view_id){
                // TODO
                //goto reloop;
            }
            // if we this message is not from the current leader
            if(new_entry->msg_vs.view_id == comp->cur_view.view_id && new_entry->node_id != comp->cur_view.leader_id){
                // TODO
                //goto reloop;
            }

            // update highest seen request
            if(view_stamp_comp(new_entry->msg_vs, comp->highest_seen_vs) > 0){
                comp->highest_seen_vs = new_entry->msg_vs;
            }

            db_key_type record_no = vstol(new_entry->msg_vs);
            request_record* record_data = (request_record*)malloc(new_entry->data_size + sizeof(request_record));

            gettimeofday(&record_data->created_time, NULL);
            record_data->data_size = new_entry->data_size;
            memcpy(record_data->data, new_entry->data, new_entry->data_size);

            // record the data persistently 
            store_record(comp->db_ptr, sizeof(record_no), &record_no, REQ_RECORD_SIZE(record_data), record_data);
            uint64_t offset = shared_memory.tail + sizeof(accept_ack) * comp->node_id;
            shared_memory.tail = shared_memory.tail + log_entry_len(new_entry);

            accept_ack* reply = (accept_ack*)((char*)new_entry + ACCEPT_ACK_SIZE * comp->node_id);
            reply->node_id = comp->node_id;
            reply->msg_vs.view_id = new_entry->msg_vs.view_id;
            reply->msg_vs.req_id = new_entry->msg_vs.req_id;

            memcpy((void*)((char*)shared_memory.shm[new_entry->node_id] + offset), reply, ACCEPT_ACK_SIZE);

            free(record_data);
            if(view_stamp_comp(new_entry->req_canbe_exed, comp->committed) > 0)
            {
                sock = socket(AF_INET, SOCK_STREAM, 0);
                connect(sock, (struct sockaddr*)&comp->sys_addr.c_addr, comp->sys_addr.c_sock_len); //TODO: why? Broken pipe. Maybe the server closes the socket
                start = vstol(comp->committed)+1;
                end = vstol(new_entry->req_canbe_exed);
                for(index = start; index <= end; index++)
                {
                    retrieve_record(comp->db_ptr, sizeof(index), &index, &data_size, (void**)&retrieve_data);
                    send(sock, retrieve_data->data, retrieve_data->data_size, 0);
                }
                comp->committed = new_entry->req_canbe_exed;
            }
            gettimeofday(&end, 0);
            e_usec = ((end.tv_sec * 1000000) + end.tv_usec) - ((start.tv_sec * 1000000) + start.tv_usec);
            CON_LOG(comp, "%lu\n", e_usec);
        }
    }
};
示例#7
0
static void handle_accept_req(consensus_component* comp,void* data){
    SYS_LOG(comp,"Node %d Handle Accept Req.\n",
            comp->node_id);
    accept_req* msg = data;
    if(msg->msg_vs.view_id< comp->cur_view->view_id){
        goto handle_accept_req_exit;
    }
    // if we this message is not from the current leader
    if(msg->msg_vs.view_id == comp->cur_view->view_id && 
            msg->node_id!=comp->cur_view->leader_id){
        SYS_LOG(comp, "Msg come from node %ld, which is not the current leader %ld.\n",
                msg->node_id, comp->cur_view->leader_id);
        goto handle_accept_req_exit;
    }
    // if we have committed the operation, then safely ignore it
    if(view_stamp_comp(&msg->msg_vs,comp->highest_committed_vs)<=0){
        SYS_LOG(comp, "I've already committed the operation. I'll ignore this one.\n");
        goto handle_accept_req_exit;
    }else{
        // update highest seen request
        if(view_stamp_comp(&msg->msg_vs,comp->highest_seen_vs)>0){
            *(comp->highest_seen_vs) = msg->msg_vs;
        }
        // update highest requests that can be executed
        //
        SYS_LOG(comp,"Now Node %d Sees Request %u : %u .\n",
                comp->node_id,
                msg->req_canbe_exed.view_id,
                msg->req_canbe_exed.req_id);

        if(view_stamp_comp(&msg->req_canbe_exed,
                    comp->highest_to_commit_vs)>0){

            *(comp->highest_to_commit_vs) = msg->req_canbe_exed;
            SYS_LOG(comp,"Now Node %d Can Execute Request %u : %u .\n",
                    comp->node_id,
                    comp->highest_to_commit_vs->view_id,
                    comp->highest_to_commit_vs->req_id);
        }

        db_key_type record_no = vstol(&msg->msg_vs);
        request_record* origin_data = (request_record*)msg->data;
        request_record* record_data = (request_record*)malloc(
                REQ_RECORD_SIZE(origin_data));
        if(record_data==NULL){
            goto handle_accept_req_exit;
        }
        gettimeofday(&record_data->created_time,NULL);
        record_data->is_closed = origin_data->is_closed;
        record_data->data_size = origin_data->data_size;
        memcpy(record_data->data,origin_data->data,
                origin_data->data_size);

        // record the data persistently 
        if(store_record(comp->db_ptr,sizeof(record_no),&record_no,
                    REQ_RECORD_SIZE(record_data),record_data)!=0){
            goto handle_accept_req_exit;
        }
        // build the reply to the leader
        accept_ack* reply = build_accept_ack(comp,&msg->msg_vs);
        if(NULL==reply){
            goto handle_accept_req_exit;
        }
        comp->uc(comp->my_node,ACCEPT_ACK_SIZE,reply,msg->node_id);
        free(reply);
    }
handle_accept_req_exit:
    try_to_execute(comp);
    return;
};
示例#8
0
void *handle_accept_req(void* arg)
{
    consensus_component* comp = arg;

    db_key_type start;
    db_key_type end;
    db_key_type index;
    
    dare_log_entry_t* entry;

    set_affinity(1);

    for (;;)
    {
        if (comp->cur_view->leader_id != *comp->node_id)
        {
            comp->uc(comp->up_para);

            entry = log_get_entry(SRV_DATA->log, &SRV_DATA->log->end);

            if (entry->data_size != 0)
            {
                char* dummy = (char*)((char*)entry + log_entry_len(entry) - 1);
                if (*dummy == DUMMY_END) // atmoic opeartion
                {
#ifdef MEASURE_LATENCY
                    clock_handler c_k;
                    clock_init(&c_k);
                    clock_add(&c_k);
#endif
                    if(entry->msg_vs.view_id < comp->cur_view->view_id){
                    // TODO
                    //goto reloop;
                    }
                    // if we this message is not from the current leader
                    if(entry->msg_vs.view_id == comp->cur_view->view_id && entry->node_id != comp->cur_view->leader_id){
                    // TODO
                    //goto reloop;
                    }

                    // update highest seen request
                    if(view_stamp_comp(&entry->msg_vs, comp->highest_seen_vs) > 0){
                        *(comp->highest_seen_vs) = entry->msg_vs;
                    }

                    db_key_type record_no = vstol(&entry->msg_vs);
                    // record the data persistently
                    request_record* record_data = (request_record*)((char*)entry + offsetof(dare_log_entry_t, data_size));

                    store_record(comp->db_ptr, sizeof(record_no), &record_no, REQ_RECORD_SIZE(record_data) - 1, record_data);

#ifdef MEASURE_LATENCY
                    clock_add(&c_k);
#endif
                    SRV_DATA->log->tail = SRV_DATA->log->end;
                    SRV_DATA->log->end += log_entry_len(entry);
                    uint32_t my_id = *comp->node_id;
                    uint32_t offset = (uint32_t)(offsetof(dare_log_t, entries) + SRV_DATA->log->tail + ACCEPT_ACK_SIZE * my_id);

                    accept_ack* reply = (accept_ack*)((char*)entry + ACCEPT_ACK_SIZE * my_id);
                    reply->node_id = my_id;
                    reply->msg_vs.view_id = entry->msg_vs.view_id;
                    reply->msg_vs.req_id = entry->msg_vs.req_id;
                    
                    if (entry->type == P_OUTPUT)
                    {
                        // up = get_mapping_fd() is defined in ev_mgr.c
                        int fd = comp->ug(entry->clt_id, comp->up_para);
                        // consider entry->data as a pointer.
                        uint64_t hash = get_output_hash(fd, *(long*)entry->data);
                        reply->hash = hash;    
                    }

                    rem_mem_t rm;
                    dare_ib_ep_t *ep = (dare_ib_ep_t*)SRV_DATA->config.servers[entry->node_id].ep;
                    memset(&rm, 0, sizeof(rem_mem_t));
                    uint32_t *send_count_ptr = &(ep->rc_ep.rc_qp.send_count);
                    int send_flags, poll_completion = 0;

                    if((*send_count_ptr & S_DEPTH_) == 0)
                        send_flags = IBV_SEND_SIGNALED;
                    else
                        send_flags = 0;

                    if ((*send_count_ptr & S_DEPTH_) == S_DEPTH_)
                        poll_completion = 1;

                    (*send_count_ptr)++;

                    rm.raddr = ep->rc_ep.rmt_mr.raddr + offset;
                    rm.rkey = ep->rc_ep.rmt_mr.rkey;

                    post_send(entry->node_id, reply, ACCEPT_ACK_SIZE, IBDEV->lcl_mr, IBV_WR_RDMA_WRITE, &rm, send_flags, poll_completion);

                    if(view_stamp_comp(&entry->req_canbe_exed, comp->highest_committed_vs) > 0)
                    {
                        start = vstol(comp->highest_committed_vs)+1;
                        end = vstol(&entry->req_canbe_exed);
                        for(index = start; index <= end; index++)
                        {
                            comp->ucb(index,comp->up_para);
                        }
                        *(comp->highest_committed_vs) = entry->req_canbe_exed;
                    }
#ifdef MEASURE_LATENCY
                    clock_add(&c_k);
                    clock_display(comp->sys_log_file, &c_k);
#endif
                }   
            }
        }
    }
};