/** * callback for the internal read request triggered by a truncate potential failure case: - socket_ref is out of range - connection is down @param buffer : pointer to the ruc_buffer that cointains the response @param socket_ref : non significant @param user_param_p : pointer to the root context @retval 0 : successfully submitted to the transport layer @retval < 0 error, the caller is intended to release the buffer */ int rozofs_storcli_internal_read_before_truncate_rsp_cbk(void *buffer,uint32_t socket_ref,void *user_param) { int errcode = 0; rozofs_storcli_ctx_t *working_ctx_p = (rozofs_storcli_ctx_t*)user_param; storcli_truncate_arg_t * storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg; XDR xdrs; uint8_t *payload; char *data; int position; int bufsize; struct rpc_msg rpc_reply; storcli_status_ret_t rozofs_status; int data_len; int error; rpc_reply.acpted_rply.ar_results.proc = NULL; /* ** decode the read internal read reply */ payload = (uint8_t*) ruc_buf_getPayload(buffer); payload += sizeof(uint32_t); /* skip length*/ /* ** OK now decode the received message */ bufsize = ruc_buf_getPayloadLen(buffer); bufsize -= sizeof(uint32_t); /* skip length*/ xdrmem_create(&xdrs,(char*)payload,bufsize,XDR_DECODE); error = 0; while (1) { /* ** decode the rpc part */ if (rozofs_xdr_replymsg(&xdrs,&rpc_reply) != TRUE) { errno = EPROTO; error = 1; break; } /* ** decode the status of the operation */ if (xdr_storcli_status_ret_t(&xdrs,&rozofs_status)!= TRUE) { errno = EPROTO; error = 1; break; } /* ** check th estatus of the operation */ if ( rozofs_status.status != STORCLI_SUCCESS ) { error = 0; break; } { int alignment; /* ** skip the alignment */ if (xdr_int(&xdrs, &alignment) != TRUE) { errno = EPROTO; STORCLI_ERR_PROF(read_prj_err); error = 1; break; } } /* ** Now get the length of the part that has been read */ if (xdr_int(&xdrs, &data_len) != TRUE) { errno = EPROTO; error = 1; break; } break; } if (error) { severe("error while decoding rpc reply"); goto failure; } position = XDR_GETPOS(&xdrs); data = (char*)(payload+position); /* ** check the status of the read operation */ if (rozofs_status.status != STORCLI_SUCCESS) { data = NULL; } else { /*, ** No data returned */ if (data_len == 0) { data = NULL; } else if (storcli_truncate_rq_p->last_seg <= data_len) { memset(data+storcli_truncate_rq_p->last_seg, 0, ROZOFS_BSIZE-storcli_truncate_rq_p->last_seg); } else { memset(data+data_len, 0, ROZOFS_BSIZE-data_len); } } rozofs_storcli_truncate_req_processing_exec(working_ctx_p, data); ruc_buf_freeBuffer(buffer); return 0 ; failure: ruc_buf_freeBuffer(buffer); /* ** check if the lock is asserted to prevent direct call to callback */ if (working_ctx_p->write_ctx_lock == 1) return 0; /* ** write failure */ rozofs_storcli_write_reply_error(working_ctx_p,errcode); /* ** release the transaction root context */ working_ctx_p->xmitBuf = NULL; STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return 0 ; }
/* ** That function is called when all the projection are ready to be sent @param working_ctx_p: pointer to the root context associated with the top level write request */ void rozofs_storcli_write_repair_req_processing(rozofs_storcli_ctx_t *working_ctx_p) { storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg; uint8_t layout = storcli_read_rq_p->layout; uint8_t rozofs_forward; uint8_t projection_id; int error=0; int ret; rozofs_storcli_projection_ctx_t *prj_cxt_p = working_ctx_p->prj_ctx; uint8_t bsize = storcli_read_rq_p->bsize; int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize); sp_write_repair_arg_no_bins_t *request; sp_write_repair_arg_no_bins_t repair_prj_args; sp_write_repair2_arg_no_bins_t *request2; sp_write_repair2_arg_no_bins_t repair2_prj_args; rozofs_forward = rozofs_get_rozofs_forward(layout); /* ** check if the buffer is still valid: we might face the situation where the rozofsmount ** time-out and re-allocate the write buffer located in shared memory for another ** transaction (either read or write: ** the control must take place only where here is the presence of a shared memory for the write */ error = 0; if (working_ctx_p->shared_mem_p!= NULL) { uint32_t *xid_p = (uint32_t*)working_ctx_p->shared_mem_p; if (*xid_p != working_ctx_p->src_transaction_id) { /* ** the source has aborted the request */ error = EPROTO; } } /* ** send back the response of the read request towards rozofsmount */ rozofs_storcli_read_reply_success(working_ctx_p); /* ** allocate a sequence number for the working context: ** This is mandatory to avoid any confusion with a late response of the previous read request */ working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum(); /* ** check if it make sense to send the repaired blocks */ if (error) { /* ** the requester has released the buffer and it could be possible that the ** rozofsmount uses it for another purpose, so the data that have been repaired ** might be wrong, so don't take the right to write wrong data for which we can can ** a good crc !! */ goto fail; } /* ** We have enough storage, so initiate the transaction towards the storage for each ** projection */ for (projection_id = 0; projection_id < rozofs_forward; projection_id++) { void *xmit_buf; int ret; /* ** skip the projections for which no error has been detected */ if (storcli_storage_supports_repair2) { if (ROZOFS_BITMAP64_TEST_ALL0(working_ctx_p->prj_ctx[projection_id].crc_err_bitmap)) continue; } else { if (working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0] == 0) continue; } xmit_buf = prj_cxt_p[projection_id].prj_buf; if (xmit_buf == NULL) { /* ** fatal error since the ressource control already took place */ error = EIO; goto fail; } /* ** fill partially the common header */ if (storcli_storage_supports_repair2) { request2 = &repair2_prj_args; request2->cid = storcli_read_rq_p->cid; request2->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); request2->layout = storcli_read_rq_p->layout; request2->bsize = storcli_read_rq_p->bsize; /* ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only */ if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request2->spare = 1; else request2->spare = 0; memcpy(request2->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t)); memcpy(request2->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t)); //CRCrequest->proj_id = projection_id; request2->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request2->sid,rozofs_forward); request2->bid = storcli_read_rq_p->bid; request2->bitmap[0] = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0]; request2->bitmap[1] = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[1]; request2->bitmap[2] = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[2]; int nb_blocks = ROZOFS_BITMAP64_NB_BIT1(request2->bitmap); request2->nb_proj = nb_blocks; /* ** set the length of the bins part: need to compute the number of blocks */ int bins_len = (prj_size_in_msg * nb_blocks); request2->len = bins_len; /**< bins length MUST be in bytes !!! */ uint32_t lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len); /* ** caution we might have a direct reply if there is a direct error at load balancing group while ** ateempting to send the RPC message-> typically a disconnection of the TCP connection ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called ** prior returning from rozofs_sorcli_send_rq_common') ** anticipate the status of the xmit state of the projection and lock the section to ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() ** --> need to take care because the write context is released after the reply error sent to rozofsmount */ working_ctx_p->write_ctx_lock = 1; prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG; ret = rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR2, (xdrproc_t) xdr_sp_write_repair2_arg_no_bins_t, (caddr_t) request2, xmit_buf, working_ctx_p->read_seqnum, (uint32_t) projection_id, bins_len, rozofs_storcli_write_repair_req_processing_cbk, (void*)working_ctx_p); } else { request = &repair_prj_args; request->cid = storcli_read_rq_p->cid; request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); request->layout = storcli_read_rq_p->layout; request->bsize = storcli_read_rq_p->bsize; /* ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only */ if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1; else request->spare = 0; memcpy(request->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t)); memcpy(request->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t)); //CRCrequest->proj_id = projection_id; request->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request->sid,rozofs_forward); request->bid = storcli_read_rq_p->bid; request->bitmap = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0]; int nb_blocks = ROZOFS_BITMAP64_NB_BIT1_FUNC((uint8_t*)&request->bitmap,8); request->nb_proj = nb_blocks; /* ** set the length of the bins part: need to compute the number of blocks */ int bins_len = (prj_size_in_msg * nb_blocks); request->len = bins_len; /**< bins length MUST be in bytes !!! */ uint32_t lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len); /* ** caution we might have a direct reply if there is a direct error at load balancing group while ** ateempting to send the RPC message-> typically a disconnection of the TCP connection ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called ** prior returning from rozofs_sorcli_send_rq_common') ** anticipate the status of the xmit state of the projection and lock the section to ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() ** --> need to take care because the write context is released after the reply error sent to rozofsmount */ working_ctx_p->write_ctx_lock = 1; prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG; ret = rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR, (xdrproc_t) xdr_sp_write_repair_arg_no_bins_t, (caddr_t) request, xmit_buf, working_ctx_p->read_seqnum, (uint32_t) projection_id, bins_len, rozofs_storcli_write_repair_req_processing_cbk, (void*)working_ctx_p); } working_ctx_p->write_ctx_lock = 0; if (ret < 0) { /* ** there is no retry, just keep on with a potential other projection to repair */ STORCLI_ERR_PROF(repair_prj_err); STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0); prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_ERROR; continue; } else { /* ** check if the state has not been changed: -> it might be possible to get a direct error */ if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR) { /* ** it looks like that we cannot repair that preojection, check if there is some other */ STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0); } } } /* ** check if there some write repair request pending, in such a case we wait for the end of the repair ** (answer from the storage node */ ret = rozofs_storcli_all_prj_write_repair_check(storcli_read_rq_p->layout, working_ctx_p->prj_ctx); if (ret == 0) { /* ** there is some pending write */ return; } fail: /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,repair,0); rozofs_storcli_release_context(working_ctx_p); return; }
/* ** That function is called when all the projection are ready to be sent @param working_ctx_p: pointer to the root context associated with the top level write request @param data : pointer to the data of the last block to truncate */ void rozofs_storcli_truncate_req_processing_exec(rozofs_storcli_ctx_t *working_ctx_p, char * data) { storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg; uint8_t layout = storcli_truncate_rq_p->layout; uint32_t bsize = storcli_truncate_rq_p->bsize; uint8_t rozofs_forward; uint8_t rozofs_safe; uint8_t projection_id; int storage_idx; int error=0; rozofs_storcli_lbg_prj_assoc_t *lbg_assoc_p = working_ctx_p->lbg_assoc_tb; rozofs_storcli_projection_ctx_t *prj_cxt_p = working_ctx_p->prj_ctx; rozofs_forward = rozofs_get_rozofs_forward(layout); rozofs_safe = rozofs_get_rozofs_safe(layout); /* ** set the current state of each load balancing group belonging to the rozofs_safe group */ for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) { /* ** Check the state of the load Balancing group */ rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p, storage_idx, NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id)); } /* ** Now find out a selectable lbg_id for each projection */ for (projection_id = 0; projection_id < rozofs_forward; projection_id++) { if (rozofs_storcli_select_storage_idx_for_write ( working_ctx_p,rozofs_forward, rozofs_safe,projection_id) < 0) { /* ** there is no enough valid storage !! */ STORCLI_ERR_PROF(truncate_sid_miss); error = EIO; goto fail; } } /* ** Let's transform the data to write */ working_ctx_p->truncate_bins_len = 0; if (data != NULL) { STORCLI_START_KPI(storcli_kpi_transform_forward); rozofs_storcli_transform_forward(working_ctx_p->prj_ctx, layout, bsize, 0, 1, working_ctx_p->timestamp, storcli_truncate_rq_p->last_seg, data); STORCLI_STOP_KPI(storcli_kpi_transform_forward,0); working_ctx_p->truncate_bins_len = rozofs_get_max_psize_in_msg(layout,bsize); ; } /* ** We have enough storage, so initiate the transaction towards the storage for each ** projection */ for (projection_id = 0; projection_id < rozofs_forward; projection_id++) { sp_truncate_arg_no_bins_t *request; sp_truncate_arg_no_bins_t truncate_prj_args; void *xmit_buf; int ret; xmit_buf = prj_cxt_p[projection_id].prj_buf; if (xmit_buf == NULL) { /* ** fatal error since the ressource control already took place */ error = EIO; goto fatal; } /* ** fill partially the common header */ retry: request = &truncate_prj_args; request->cid = storcli_truncate_rq_p->cid; request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); request->layout = layout; request->bsize = bsize; if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1; else request->spare = 0; memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t)); memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t)); request->proj_id = projection_id; request->bid = storcli_truncate_rq_p->bid; request->last_seg = storcli_truncate_rq_p->last_seg; request->last_timestamp = working_ctx_p->timestamp; request->len = working_ctx_p->truncate_bins_len; uint32_t lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0); /* ** caution we might have a direct reply if there is a direct error at load balancing group while ** ateempting to send the RPC message-> typically a disconnection of the TCP connection ** As a consequence the response fct 'rozofs_storcli_truncate_req_processing_cbk) can be called ** prior returning from rozofs_sorcli_send_rq_common') ** anticipate the status of the xmit state of the projection and lock the section to ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() ** --> need to take care because the write context is released after the reply error sent to rozofsmount */ working_ctx_p->write_ctx_lock = 1; prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG; ret = rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE, (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request, xmit_buf, working_ctx_p->read_seqnum, (uint32_t) projection_id, working_ctx_p->truncate_bins_len, rozofs_storcli_truncate_req_processing_cbk, (void*)working_ctx_p); working_ctx_p->write_ctx_lock = 0; if (ret < 0) { /* ** the communication with the storage seems to be wrong (more than TCP connection temporary down ** attempt to select a new storage ** */ if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0) { /* ** Out of storage !!-> too many storages are down */ goto fatal; } /* ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!! */ //#warning: it is assumed that xmit buffer has not been release, need to double check!! goto retry; } else { /* ** check if the state has not been changed: -> it might be possible to get a direct error */ if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR) { error = prj_cxt_p[projection_id].errcode; goto fatal; } } } return; fail: /* ** we fall in that case when we run out of resource-> that case is a BUG !! */ rozofs_storcli_write_reply_error(working_ctx_p,error); /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return; fatal: /* ** we fall in that case when we run out of resource-> that case is a BUG !! */ rozofs_storcli_write_reply_error(working_ctx_p,error); /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return; }