/** * display a synchro entry file information */ char *geocli_display_one_file_info(char *p,geo_fid_entry_t *info_p) { uint8_t * pFid; int idx; uint8_t rozofs_safe = rozofs_get_rozofs_safe(info_p->layout); pFid = (uint8_t *) info_p->fid; p+=sprintf(p," %d |",info_p->layout); /* ** display the fid */ p += sprintf(p,"%2.2x%2.2x%2.2x%2.2x-%2.2x%2.2x-%2.2x%2.2x-%2.2x%2.2x-%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x |", pFid[0],pFid[1],pFid[2],pFid[3],pFid[4],pFid[5],pFid[6],pFid[7], pFid[8],pFid[9],pFid[10],pFid[11],pFid[12],pFid[13],pFid[14],pFid[15]); /* ** offset start and end */ p +=sprintf(p,"%10"PRIu64" |",info_p->off_start); p +=sprintf(p,"%10"PRIu64" |",info_p->off_end); /* ** File only */ p+=sprintf(p,"%3d |",info_p->cid); p += sprintf(p, "%3.3d", info_p->sids[0]); for (idx = 1; idx < rozofs_safe; idx++) { p += sprintf(p,"-%3.3d", info_p->sids[idx]); } p += sprintf(p,"\n"); return p; }
/** * The purpose of that function is to return TRUE if there are enough delete response received for rebuilding a projection for future reading @param layout : layout association with the file @param prj_cxt_p: pointer to the projection context (working array) @param *errcode: pointer to global error code to return @retval 1 if there are enough received projection @retval 0 when there is enough projection */ static inline int rozofs_storcli_all_prj_delete_check(uint8_t layout,rozofs_storcli_projection_ctx_t *prj_cxt_p,int *errcode_p) { /* ** Get the rozofs_safe value for the layout */ uint8_t rozofs_safe = rozofs_get_rozofs_safe(layout); int i; int received = 0; *errcode_p = 0; for (i = 0; i <rozofs_safe; i++,prj_cxt_p++) { if (prj_cxt_p->prj_state == ROZOFS_PRJ_WR_DONE) { received++; continue; } if (prj_cxt_p->prj_state == ROZOFS_PRJ_WR_ERROR) { *errcode_p = prj_cxt_p->errcode; received++; continue; } } if (received == rozofs_safe) return 1; return 0; }
char *storage_map_distribution(storage_t * st, uint8_t layout, sid_t dist_set[ROZOFS_SAFE_MAX], uint8_t spare, char *path) { int i = 0; char build_path[FILENAME_MAX]; DEBUG_FUNCTION; strncpy(path, st->root, FILENAME_MAX); strcat(path, "/"); sprintf(build_path, "layout_%u/spare_%u/", layout, spare); strcat(path, build_path); uint8_t rozofs_safe = rozofs_get_rozofs_safe(layout); for (i = 0; i < rozofs_safe; i++) { char build_path_2[FILENAME_MAX]; sprintf(build_path_2, "%.3u", dist_set[i]); strcat(path, build_path_2); if (i != (rozofs_safe - 1)) strcat(path, "-"); } strcat(path, "/"); return path; }
void rozofs_storcli_truncate_projection_retry(rozofs_storcli_ctx_t *working_ctx_p,uint8_t projection_id,int same_storage_retry_acceptable) { uint8_t rozofs_safe; uint8_t rozofs_forward; uint8_t layout; storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg; int error=0; int storage_idx; rozofs_storcli_projection_ctx_t *prj_cxt_p = working_ctx_p->prj_ctx; rozofs_storcli_lbg_prj_assoc_t *lbg_assoc_p = working_ctx_p->lbg_assoc_tb; layout = storcli_truncate_rq_p->layout; rozofs_safe = rozofs_get_rozofs_safe(layout); rozofs_forward = rozofs_get_rozofs_forward(layout); /* ** Now update the state of each load balancing group since it might be possible ** that some experience a state change */ for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) { /* ** Check the state of the load Balancing group */ rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p, storage_idx, NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id)); } /** * attempt to select a new storage */ if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0) { /* ** Cannot select a new storage: OK so now double check if the retry on the same storage is ** acceptable.When it is the case, check if the max retry has not been yet reached ** Otherwise, we are in deep shit-> reject the read request */ if (same_storage_retry_acceptable == 0) { error = EIO; prj_cxt_p[projection_id].errcode = error; goto reject; } if (++prj_cxt_p[projection_id].retry_cpt >= ROZOFS_STORCLI_MAX_RETRY) { error = EIO; prj_cxt_p[projection_id].errcode = error; goto reject; } } /* ** we are lucky since either a get a new storage or the retry counter is not exhausted */ sp_truncate_arg_no_bins_t *request; sp_truncate_arg_no_bins_t truncate_prj_args; void *xmit_buf; int ret; xmit_buf = prj_cxt_p[projection_id].prj_buf; if (xmit_buf == NULL) { /* ** fatal error since the ressource control already took place */ error = EFAULT; prj_cxt_p[projection_id].errcode = error; goto fatal; } /* ** fill partially the common header */ retry: request = &truncate_prj_args; request->cid = storcli_truncate_rq_p->cid; request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); request->layout = layout; if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1; else request->spare = 0; memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t)); memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t)); request->proj_id = projection_id; request->bid = storcli_truncate_rq_p->bid; request->last_seg = storcli_truncate_rq_p->last_seg; request->last_timestamp = working_ctx_p->timestamp; /* ** Bins len has been saved in the working context */ request->len = working_ctx_p->truncate_bins_len; uint32_t lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); /* ** increment the lock since it might be possible that this procedure is called after a synchronous transaction failu failure ** while the system is still in the initial procedure that triggers the writing of the projection. So it might be possible that ** the lock is already asserted ** as for the initial case, we need to anticipate the xmit state of the projection since the ERROR status might be set ** on a synchronous transaction failure. If that state is set after a positive submission towards the lbg, we might ** overwrite the ERROR state with the IN_PRG state. */ working_ctx_p->write_ctx_lock++; prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG; STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0); ret = rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE, (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request, xmit_buf, working_ctx_p->read_seqnum, (uint32_t) projection_id, working_ctx_p->truncate_bins_len, rozofs_storcli_truncate_req_processing_cbk, (void*)working_ctx_p); working_ctx_p->write_ctx_lock--; if (ret < 0) { /* ** the communication with the storage seems to be wrong (more than TCP connection temporary down ** attempt to select a new storage ** */ STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0); if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0) { /* ** Out of storage !!-> too many storages are down */ goto fatal; } /* ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!! */ goto retry; } /* ** OK, the buffer has been accepted by the load balancing group, check if there was a direct failure for ** that transaction */ if ( prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR) { error = prj_cxt_p[projection_id].errcode; goto fatal; } return; /* **_____________________________________________ ** Exception cases **_____________________________________________ */ reject: if (working_ctx_p->write_ctx_lock != 0) return; /* ** we fall in that case when we run out of storage */ rozofs_storcli_write_reply_error(working_ctx_p,error); /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return; fatal: /* ** caution -> reply error is only generated if the ctx_lock is 0 */ if (working_ctx_p->write_ctx_lock != 0) return; /* ** we fall in that case when we run out of resource-> that case is a BUG !! */ rozofs_storcli_write_reply_error(working_ctx_p,error); /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return; }
/* ** That function is called when all the projection are ready to be sent @param working_ctx_p: pointer to the root context associated with the top level write request @param data : pointer to the data of the last block to truncate */ void rozofs_storcli_truncate_req_processing_exec(rozofs_storcli_ctx_t *working_ctx_p, char * data) { storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg; uint8_t layout = storcli_truncate_rq_p->layout; uint8_t rozofs_forward; uint8_t rozofs_safe; uint8_t projection_id; int storage_idx; int error=0; rozofs_storcli_lbg_prj_assoc_t *lbg_assoc_p = working_ctx_p->lbg_assoc_tb; rozofs_storcli_projection_ctx_t *prj_cxt_p = working_ctx_p->prj_ctx; rozofs_forward = rozofs_get_rozofs_forward(layout); rozofs_safe = rozofs_get_rozofs_safe(layout); /* ** set the current state of each load balancing group belonging to the rozofs_safe group */ for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) { /* ** Check the state of the load Balancing group */ rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p, storage_idx, NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id)); } /* ** Now find out a selectable lbg_id for each projection */ for (projection_id = 0; projection_id < rozofs_forward; projection_id++) { if (rozofs_storcli_select_storage_idx_for_write ( working_ctx_p,rozofs_forward, rozofs_safe,projection_id) < 0) { /* ** there is no enough valid storage !! */ error = EIO; goto fail; } } /* ** Let's transform the data to write */ working_ctx_p->truncate_bins_len = 0; if (data != NULL) { STORCLI_START_KPI(storcli_kpi_transform_forward); rozofs_storcli_transform_forward(working_ctx_p->prj_ctx, layout, 0, 1, working_ctx_p->timestamp, storcli_truncate_rq_p->last_seg, data); STORCLI_STOP_KPI(storcli_kpi_transform_forward,0); working_ctx_p->truncate_bins_len = rozofs_get_max_psize(layout)*sizeof(bin_t) + sizeof(rozofs_stor_bins_hdr_t); } /* ** We have enough storage, so initiate the transaction towards the storage for each ** projection */ for (projection_id = 0; projection_id < rozofs_forward; projection_id++) { sp_truncate_arg_no_bins_t *request; sp_truncate_arg_no_bins_t truncate_prj_args; void *xmit_buf; int ret; xmit_buf = prj_cxt_p[projection_id].prj_buf; if (xmit_buf == NULL) { /* ** fatal error since the ressource control already took place */ error = EIO; goto fatal; } /* ** fill partially the common header */ retry: request = &truncate_prj_args; request->cid = storcli_truncate_rq_p->cid; request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); request->layout = layout; if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1; else request->spare = 0; memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t)); memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t)); request->proj_id = projection_id; request->bid = storcli_truncate_rq_p->bid; request->last_seg = storcli_truncate_rq_p->last_seg; request->last_timestamp = working_ctx_p->timestamp; request->len = working_ctx_p->truncate_bins_len; uint32_t lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx); STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0); /* ** caution we might have a direct reply if there is a direct error at load balancing group while ** ateempting to send the RPC message-> typically a disconnection of the TCP connection ** As a consequence the response fct 'rozofs_storcli_truncate_req_processing_cbk) can be called ** prior returning from rozofs_sorcli_send_rq_common') ** anticipate the status of the xmit state of the projection and lock the section to ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() ** --> need to take care because the write context is released after the reply error sent to rozofsmount */ working_ctx_p->write_ctx_lock = 1; prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG; ret = rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE, (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request, xmit_buf, working_ctx_p->read_seqnum, (uint32_t) projection_id, working_ctx_p->truncate_bins_len, rozofs_storcli_truncate_req_processing_cbk, (void*)working_ctx_p); working_ctx_p->write_ctx_lock = 0; if (ret < 0) { /* ** the communication with the storage seems to be wrong (more than TCP connection temporary down ** attempt to select a new storage ** */ if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0) { /* ** Out of storage !!-> too many storages are down */ goto fatal; } /* ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!! */ //#warning: it is assumed that xmit buffer has not been release, need to double check!! goto retry; } else { /* ** check if the state has not been changed: -> it might be possible to get a direct error */ if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR) { error = prj_cxt_p[projection_id].errcode; goto fatal; } } } return; fail: /* ** we fall in that case when we run out of resource-> that case is a BUG !! */ rozofs_storcli_write_reply_error(working_ctx_p,error); /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return; fatal: /* ** we fall in that case when we run out of resource-> that case is a BUG !! */ rozofs_storcli_write_reply_error(working_ctx_p,error); /* ** release the root transaction context */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); rozofs_storcli_release_context(working_ctx_p); return; }
/** Initial truncate request @param socket_ctx_p: pointer to the af unix socket @param socketId: reference of the socket (not used) @param rozofs_storcli_remote_rsp_cbk: callback for sending out the response @retval : TRUE-> xmit ready event expected @retval : FALSE-> xmit ready event not expected */ void rozofs_storcli_truncate_req_init(uint32_t socket_ctx_idx, void *recv_buf,rozofs_storcli_resp_pf_t rozofs_storcli_remote_rsp_cbk) { rozofs_rpc_call_hdr_with_sz_t *com_hdr_p; rozofs_storcli_ctx_t *working_ctx_p = NULL; int i; uint32_t msg_len; /* length of the rpc messsage including the header length */ storcli_truncate_arg_t *storcli_truncate_rq_p = NULL; rozofs_rpc_call_hdr_t hdr; /* structure that contains the rpc header in host format */ int len; /* effective length of application message */ uint8_t *pmsg; /* pointer to the first available byte in the application message */ uint32_t header_len; XDR xdrs; int errcode = EINVAL; /* ** allocate a context for the duration of the write */ working_ctx_p = rozofs_storcli_alloc_context(); if (working_ctx_p == NULL) { /* ** that situation MUST not occur since there the same number of receive buffer and working context!! */ severe("out of working read/write saved context"); goto failure; } storcli_truncate_rq_p = &working_ctx_p->storcli_truncate_arg; STORCLI_START_NORTH_PROF(working_ctx_p,truncate,0); /* ** Get the full length of the message and adjust it the the length of the applicative part (RPC header+application msg) */ msg_len = ruc_buf_getPayloadLen(recv_buf); msg_len -=sizeof(uint32_t); /* ** save the reference of the received socket since it will be needed for sending back the ** response */ working_ctx_p->socketRef = socket_ctx_idx; working_ctx_p->user_param = NULL; working_ctx_p->recv_buf = recv_buf; working_ctx_p->response_cbk = rozofs_storcli_remote_rsp_cbk; /* ** Get the payload of the receive buffer and set the pointer to the array that describes the write request */ com_hdr_p = (rozofs_rpc_call_hdr_with_sz_t*) ruc_buf_getPayload(recv_buf); memcpy(&hdr,&com_hdr_p->hdr,sizeof(rozofs_rpc_call_hdr_t)); /* ** swap the rpc header */ scv_call_hdr_ntoh(&hdr); pmsg = rozofs_storcli_set_ptr_on_nfs_call_msg((char*)&com_hdr_p->hdr,&header_len); if (pmsg == NULL) { errcode = EFAULT; goto failure; } /* ** map the memory on the first applicative RPC byte available and prepare to decode: ** notice that we will not call XDR_FREE since the application MUST ** provide a pointer for storing the file handle */ len = msg_len - header_len; xdrmem_create(&xdrs,(char*)pmsg,len,XDR_DECODE); /* ** store the source transaction id needed for the reply */ working_ctx_p->src_transaction_id = hdr.hdr.xid; /* ** decode the RPC message of the truncate request */ if (xdr_storcli_truncate_arg_t(&xdrs,storcli_truncate_rq_p) == FALSE) { /* ** decoding error */ errcode = EFAULT; severe("rpc trucnate request decoding error"); goto failure; } /* ** init of the load balancing group/ projection association table: ** That table is ordered: the first corresponds to the storage associated with projection 0, second with 1, etc.. ** When build that table, we MUST consider the value of the base which is associated with the distribution */ uint8_t rozofs_safe = rozofs_get_rozofs_safe(storcli_truncate_rq_p->layout); int lbg_in_distribution = 0; for (i = 0; i <rozofs_safe ; i ++) { /* ** Get the load balancing group associated with the sid */ int lbg_id = rozofs_storcli_get_lbg_for_sid(storcli_truncate_rq_p->cid,storcli_truncate_rq_p->dist_set[i]); if (lbg_id < 0) { /* ** there is no associated between the sid and the lbg. It is typically the case ** when a new cluster has been added to the configuration and the client does not ** know yet the configuration change */ severe("sid is unknown !! %d\n",storcli_truncate_rq_p->dist_set[i]); continue; } rozofs_storcli_lbg_prj_insert_lbg_and_sid(working_ctx_p->lbg_assoc_tb,lbg_in_distribution, lbg_id, storcli_truncate_rq_p->dist_set[i]); rozofs_storcli_lbg_prj_insert_lbg_state(working_ctx_p->lbg_assoc_tb, lbg_in_distribution, NORTH_LBG_GET_STATE(working_ctx_p->lbg_assoc_tb[lbg_in_distribution].lbg_id)); lbg_in_distribution++; if (lbg_in_distribution == rozofs_safe) break; } /* ** allocate a small buffer that will be used for sending the response to the truncate request */ working_ctx_p->xmitBuf = ruc_buf_getBuffer(ROZOFS_STORCLI_NORTH_SMALL_POOL); if (working_ctx_p == NULL) { /* ** that situation MUST not occur since there the same number of receive buffer and working context!! */ errcode = ENOMEM; severe("out of small buffer"); goto failure; } /* ** allocate a sequence number for the working context (same aas for read) */ working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum(); /* ** set now the working variable specific for handling the truncate ** we re-use the structure used for writing even if nothing is written */ uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_truncate_rq_p->layout); for (i = 0; i < forward_projection; i++) { working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_READ_IDLE; working_ctx_p->prj_ctx[i].prj_buf = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL); if (working_ctx_p->prj_ctx[i].prj_buf == NULL) { /* ** that situation MUST not occur since there the same number of receive buffer and working context!! */ errcode = ENOMEM; severe("out of large buffer"); goto failure; } /* ** increment inuse counter on each buffer since we might need to re-use that packet in case ** of retransmission */ working_ctx_p->prj_ctx[i].inuse_valid = 1; ruc_buf_inuse_increment(working_ctx_p->prj_ctx[i].prj_buf); /* ** set the pointer to the bins */ int position = rozofs_storcli_get_position_of_first_byte2write_in_truncate(); uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); working_ctx_p->prj_ctx[i].bins = (bin_t*)(pbuf+position); } /* ** Prepare for request serialization */ memcpy(working_ctx_p->fid_key, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t)); working_ctx_p->opcode_key = STORCLI_TRUNCATE; { /** * lock all the file for a truncate */ uint64_t nb_blocks = 0; nb_blocks--; int ret; ret = stc_rng_insert((void*)working_ctx_p, STORCLI_READ,working_ctx_p->fid_key, 0,nb_blocks, &working_ctx_p->sched_idx); if (ret == 0) { /* ** there is a current request that is processed with the same fid and there is a collision */ return; } /* ** no request pending with that fid, so we can process it right away */ return rozofs_storcli_truncate_req_processing(working_ctx_p); } /* **_____________________________________________ ** Exception cases **_____________________________________________ */ /* ** there was a failure while attempting to allocate a memory ressource. */ failure: /* ** send back the response with the appropriated error code. ** note: The received buffer (rev_buf) is ** intended to be released by this service in case of error or the TCP transmitter ** once it has been passed to the TCP stack. */ rozofs_storcli_reply_error_with_recv_buf(socket_ctx_idx,recv_buf,NULL,rozofs_storcli_remote_rsp_cbk,errcode); /* ** check if the root context was allocated. Free it if is exist */ if (working_ctx_p != NULL) { /* ** remove the reference to the recvbuf to avoid releasing it twice */ STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0); working_ctx_p->recv_buf = NULL; rozofs_storcli_release_context(working_ctx_p); } return; }
/** Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE aligned. The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer The number_of_blocks is the number of ROZOFS_BSIZE that must be transform Notice that the first_block_idx offset applies to the output transform buffer only not to the input buffer pointed by "data". * * @param *prj_ctx_p: pointer to the working array of the projection * @param first_block_idx: index of the first block to transform * @param number_of_blocks: number of blocks to write * @param *data: pointer to the source data that must be transformed @param *number_of_blocks_p: pointer to the array where the function returns number of blocks on which the transform was applied @param *rozofs_storcli_prj_idx_table: pointer to the array used for storing the projections index for inverse process * * @return: the length written on success, -1 otherwise (errno is set) */ int rozofs_storcli_transform_inverse(rozofs_storcli_projection_ctx_t *prj_ctx_p, uint8_t layout, uint32_t bsize, uint32_t first_block_idx, uint32_t number_of_blocks, rozofs_storcli_inverse_block_t *block_ctx_p, char *data, uint32_t *number_of_blocks_p, uint8_t *rozofs_storcli_prj_idx_table) { int block_idx; uint16_t projection_id = 0; int prj_ctx_idx; *number_of_blocks_p = 0; uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout); uint8_t rozofs_forward = rozofs_get_rozofs_forward(layout); uint8_t rozofs_safe = rozofs_get_rozofs_safe(layout); uint32_t bbytes = ROZOFS_BSIZE_BYTES(bsize); int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize); /* ** Proceed the inverse data transform for the nb_projections2read blocks. */ for (block_idx = 0; block_idx < number_of_blocks; block_idx++) { if (block_ctx_p[block_idx].state == ROZOFS_BLK_TRANSFORM_DONE) { /* ** transformation has already been done for that block of ROZOFS_BSIZE siz ** check the next one */ continue; } /* ** Check the case of the file that has no data (there is a hole in the file), this is indicated by ** reporting a timestamp of 0 */ if ((block_ctx_p[block_idx].timestamp == 0) && (block_ctx_p[block_idx].effective_length == bbytes )) { /* ** clear the memory */ ROZOFS_STORCLI_STATS(ROZOFS_STORCLI_EMPTY_READ); memset( data + (bbytes * (first_block_idx + block_idx)),0,bbytes); block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE; continue; } if ((block_ctx_p[block_idx].timestamp == 0) && (block_ctx_p[block_idx].effective_length == 0 )) { /* ** we have reached end of file */ block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE; *number_of_blocks_p = (block_idx++); return 0; } /* ** Here we have to take care, since the index of the projection_id use to address ** prj_ctx_p is NOT the real projection_id. The projection ID is found in the header of ** each bins, so for a set of projections pointed by bins, we might have a different ** projection id in the header of the projections contains in the bins array that has ** been read!! */ transform_inverse_proc(&rozofs_storcli_prj_idx_table[ROZOFS_SAFE_MAX_STORCLI*block_idx], prj_ctx_p, prj_size_in_msg, layout, bbytes, first_block_idx, block_idx, data); /* ** indicate that transform has been done for the projection */ block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE; /* ** check the case of a block that is not full: need to zero's that part */ if (block_ctx_p[block_idx].effective_length < bbytes) { /* ** clear the memory */ char *raz_p = data + (bbytes * (first_block_idx + block_idx)) + block_ctx_p[block_idx].effective_length; memset( raz_p,0,(bbytes-block_ctx_p[block_idx].effective_length) ); } } /* ** now the inverse transform is finished, release the allocated ressources used for ** rebuild */ *number_of_blocks_p = number_of_blocks; /* ** Check whether a block should be repaired */ rozofs_storcli_check_block_2_repair(prj_ctx_p, rozofs_inverse, rozofs_forward, rozofs_safe, prj_size_in_msg, number_of_blocks, block_ctx_p); return 0; }
inline int rozofs_storcli_transform_inverse_check(rozofs_storcli_projection_ctx_t *prj_ctx_p, uint8_t layout, uint32_t block_idx, uint8_t *prj_idx_tb_p, uint64_t *timestamp_p, uint16_t *effective_len_p) { uint8_t prj_ctx_idx; uint8_t nb_projection_with_same_timestamp = 0; uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout); uint8_t rozofs_safe = rozofs_get_rozofs_safe(layout); int ret; int eof = 1; *timestamp_p = 0; *effective_len_p = 0; rozofs_storcli_timestamp_ctx_t ref_ctx; rozofs_storcli_timestamp_ctx_t *ref_ctx_p = &ref_ctx; rozofs_storcli_timestamp_ctx_t rozofs_storcli_timestamp_tb[ROZOFS_SAFE_MAX]; uint8_t rozofs_storcli_timestamp_next_free_idx=0; ref_ctx_p->count = 0; /* ** clean data used for tracking projection to rebuild */ rozofs_storcli_timestamp_ctx_t *p = &rozofs_storcli_timestamp_tb[rozofs_storcli_timestamp_next_free_idx]; p->timestamp = 0; p->count = 0; for (prj_ctx_idx = 0; prj_ctx_idx < rozofs_safe; prj_ctx_idx++) { if (prj_ctx_p[prj_ctx_idx].prj_state != ROZOFS_PRJ_READ_DONE) { /* ** that projection context does not contain valid data, so skip it */ continue; } /* ** Get the pointer to the projection header */ rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)&prj_ctx_p[prj_ctx_idx].block_hdr_tab[block_idx]; /* ** skip the invalid blocks */ if ((rozofs_bins_hdr_p->s.timestamp == 0) && (rozofs_bins_hdr_p->s.effective_length==0)) continue; if (ref_ctx_p->count == 0) { /* ** first projection found */ eof = 0; ref_ctx_p->timestamp = rozofs_bins_hdr_p->s.timestamp; ref_ctx_p->effective_length = rozofs_bins_hdr_p->s.effective_length; ref_ctx_p->count++; prj_idx_tb_p[nb_projection_with_same_timestamp++] = prj_ctx_idx; continue; } /* ** the entry is not empty check if the timestamp and the effective length of the block belonging to ** projection prj_ctx_idx matches */ if ((rozofs_bins_hdr_p->s.timestamp == ref_ctx_p->timestamp) &&(rozofs_bins_hdr_p->s.effective_length == ref_ctx_p->effective_length)) { /* ** there is a match, store the projection index and check if we have reach rozofs_inverse blocks with the ** same timestamp and length */ ref_ctx_p->count++; prj_idx_tb_p[nb_projection_with_same_timestamp++] = prj_ctx_idx; if (nb_projection_with_same_timestamp == rozofs_inverse) { /* ** ok we have found all the projection for the best case */ *timestamp_p = ref_ctx_p->timestamp; *effective_len_p = ref_ctx_p->effective_length; /* ** Mark the projection that MUST be rebuilt */ if (rozofs_storcli_timestamp_next_free_idx) { rozofs_storcli_mark_projection2rebuild(prj_ctx_p, rozofs_storcli_timestamp_tb, rozofs_storcli_timestamp_next_free_idx+1, rozofs_storcli_timestamp_next_free_idx); } return (int)rozofs_inverse; } continue; } /* ** Either the length of the timestamp does not match ** log the reference of the projection index in order to address a potential rebuild of the ** projection */ p->prj_idx_tb[p->count]= prj_ctx_idx; p->count++; if (rozofs_storcli_timestamp_next_free_idx == 0) { rozofs_storcli_timestamp_next_free_idx = 1; } } /* ** check th eof case */ if (eof) return 0; /* ** unlucky, we did not find rozof_inverse projections with the same timestamp ** so we have to find out the projection(s) that are out of sequence */ ret = rozofs_storcli_transform_inverse_check_timestamp_tb( prj_ctx_p, layout, block_idx, prj_idx_tb_p, timestamp_p, effective_len_p); return ret; }
inline int rozofs_storcli_transform_inverse_check_timestamp_tb(rozofs_storcli_projection_ctx_t *prj_ctx_p, uint8_t layout, uint32_t block_idx, uint8_t *prj_idx_tb_p, uint64_t *timestamp_p, uint16_t *effective_len_p) { uint8_t prj_ctx_idx; uint8_t timestamp_entry; *timestamp_p = 0; uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout); uint8_t rozofs_safe = rozofs_get_rozofs_safe(layout); rozofs_storcli_timestamp_ctx_t *p; int eof = 1; rozofs_storcli_timestamp_ctx_t rozofs_storcli_timestamp_tb[ROZOFS_SAFE_MAX]; uint8_t rozofs_storcli_timestamp_next_free_idx=0; for (prj_ctx_idx = 0; prj_ctx_idx < rozofs_safe; prj_ctx_idx++) { if (prj_ctx_p[prj_ctx_idx].prj_state != ROZOFS_PRJ_READ_DONE) { /* ** that projection context does not contain valid data, so skip it */ continue; } /* ** Get the pointer to the projection header */ rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)&prj_ctx_p[prj_ctx_idx].block_hdr_tab[block_idx]; /* ** check if the current block of the projection contains valid data. The block is invalid when the timestamp and the ** effective length are 0. That situation can occur when a storage was in fault at the writing time, so we can face ** the situation where the projections read on the different storages do not return the same number of block. */ if ((rozofs_bins_hdr_p->s.timestamp == 0)&&(rozofs_bins_hdr_p->s.effective_length == 0)) continue; if (rozofs_storcli_timestamp_next_free_idx == 0) { /* ** first entry */ eof = 0; p = &rozofs_storcli_timestamp_tb[rozofs_storcli_timestamp_next_free_idx]; p->timestamp = rozofs_bins_hdr_p->s.timestamp; p->effective_length = rozofs_bins_hdr_p->s.effective_length; p->count = 0; p->prj_idx_tb[p->count]= prj_ctx_idx; p->count++; rozofs_storcli_timestamp_next_free_idx++; continue; } /* ** more than 1 entry in the timestamp table */ for(timestamp_entry = 0; timestamp_entry < rozofs_storcli_timestamp_next_free_idx;timestamp_entry++) { p = &rozofs_storcli_timestamp_tb[timestamp_entry]; if ((rozofs_bins_hdr_p->s.timestamp != p->timestamp) || (rozofs_bins_hdr_p->s.effective_length != p->effective_length)) continue; /* ** same timestamp and length: register the projection index and check if we have reached rozofs_inverse projections ** to stop the search */ p->prj_idx_tb[p->count]= prj_ctx_idx; p->count++; if (p->count == rozofs_inverse) { /* ** OK we have the right number of projection so we can leave */ memcpy(prj_idx_tb_p,p->prj_idx_tb,rozofs_inverse); /* ** assert the timestamp that is common to all projections used to rebuild that block */ *timestamp_p = p->timestamp; *effective_len_p = p->effective_length; /* ** Mark the projection that MUST be rebuilt */ rozofs_storcli_mark_projection2rebuild(prj_ctx_p,rozofs_storcli_timestamp_tb,timestamp_entry,rozofs_storcli_timestamp_next_free_idx); return 1; } /* ** try next */ } /* ** that timestamp does not exist, so create an entry for it */ p = &rozofs_storcli_timestamp_tb[rozofs_storcli_timestamp_next_free_idx]; p->timestamp = rozofs_bins_hdr_p->s.timestamp; p->effective_length = rozofs_bins_hdr_p->s.effective_length; p->count = 0; p->prj_idx_tb[p->count]= prj_ctx_idx; p->count++; rozofs_storcli_timestamp_next_free_idx++; } /* ** take care of the case where we try to read after the end of file */ if (eof) return 0; /* ** unlucky, we did not find rozof_inverse projections with the same timestamp ** we need to read one more projection unless we already attempt to read rozofs_safe ** projection or we run out of storage that are up among the set of rozofs_safe storage */ return -1; }