/**"
* The purpose of that function is to return TRUE if there are enough projection received for
  rebuilding the associated initial message
  
  @param layout : layout association with the file
  @param prj_cxt_p: pointer to the projection context (working array)
  
  @retval 1 if there are enough received projection
  @retval 0 when there is enough projection
*/
static inline int rozofs_storcli_all_prj_write_repair_check(uint8_t layout,rozofs_storcli_projection_ctx_t *prj_cxt_p)
{
  /*
  ** Get the rozofs_forward value for the layout
  */
  uint8_t   rozofs_forward = rozofs_get_rozofs_forward(layout);
  int i;
  
  for (i = 0; i <rozofs_forward; i++,prj_cxt_p++)
  {
    if (prj_cxt_p->prj_state == ROZOFS_PRJ_WR_IN_PRG) 
    {
      return 0;
    }
  }
  return 1;
}
Esempio n. 2
0
/**
* The purpose of that function is to return TRUE if there are enough truncate response received for
  rebuilding a projection for future reading 
  
  @param layout : layout association with the file
  @param prj_cxt_p: pointer to the projection context (working array)
  @param *distribution: pointer to the resulting distribution--> obsolete
  
  @retval 1 if there are enough received projection
  @retval 0 when there is enough projection
*/
static inline int rozofs_storcli_all_prj_truncate_check(uint8_t layout,rozofs_storcli_projection_ctx_t *prj_cxt_p,dist_t *distribution)
{
  /*
  ** Get the rozofs_forward value for the layout
  */
  uint8_t   rozofs_forward = rozofs_get_rozofs_forward(layout);
  int i;
  int received = 0;
  
  for (i = 0; i <rozofs_forward; i++,prj_cxt_p++)
  {
    if (prj_cxt_p->prj_state == ROZOFS_PRJ_WR_DONE) 
    {
      received++;
    }
    if (received == rozofs_forward) return 1;   
  }
  return 0;
}
/*
** That function is called when all the projection are ready to be sent

 @param working_ctx_p: pointer to the root context associated with the top level write request

*/
void rozofs_storcli_write_repair_req_processing(rozofs_storcli_ctx_t *working_ctx_p)
{

  storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg;
  uint8_t layout = storcli_read_rq_p->layout;
  uint8_t   rozofs_forward;
  uint8_t   projection_id;
  int       error=0;
  int       ret;
  rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
  uint8_t  bsize  = storcli_read_rq_p->bsize;
  int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize);
  sp_write_repair_arg_no_bins_t  *request; 
  sp_write_repair_arg_no_bins_t   repair_prj_args;
  sp_write_repair2_arg_no_bins_t *request2; 
  sp_write_repair2_arg_no_bins_t  repair2_prj_args;
      
  rozofs_forward = rozofs_get_rozofs_forward(layout);
  
  /*
  ** check if the buffer is still valid: we might face the situation where the rozofsmount
  ** time-out and re-allocate the write buffer located in shared memory for another
  ** transaction (either read or write:
  ** the control must take place only where here is the presence of a shared memory for the write
  */
  error  = 0;
  if (working_ctx_p->shared_mem_p!= NULL)
  {
      uint32_t *xid_p = (uint32_t*)working_ctx_p->shared_mem_p;
      if (*xid_p !=  working_ctx_p->src_transaction_id)
      {
        /*
        ** the source has aborted the request
        */
        error = EPROTO;
      }      
  } 
  /*
  ** send back the response of the read request towards rozofsmount
  */
  rozofs_storcli_read_reply_success(working_ctx_p);
   /*
   ** allocate a sequence number for the working context:
   **   This is mandatory to avoid any confusion with a late response of the previous read request
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
  /*
  ** check if it make sense to send the repaired blocks
  */
  if (error)
  {
    /*
    ** the requester has released the buffer and it could be possible that the
    ** rozofsmount uses it for another purpose, so the data that have been repaired
    ** might be wrong, so don't take the right to write wrong data for which we can can 
    ** a good crc !!
    */
    goto fail;
  }
  
  /*
  ** We have enough storage, so initiate the transaction towards the storage for each
  ** projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
     void  *xmit_buf;  
     int ret;  
	 
     /*
     ** skip the projections for which no error has been detected 
     */
     if (storcli_storage_supports_repair2) {
	   if (ROZOFS_BITMAP64_TEST_ALL0(working_ctx_p->prj_ctx[projection_id].crc_err_bitmap)) continue;
	 }
	 else {
	   if (working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0] == 0)  continue;
	 } 
	 
	 
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */       
       error = EIO;
       goto fail;     
     }
     /*
     ** fill partially the common header
     */
	 if (storcli_storage_supports_repair2) {
       request2   = &repair2_prj_args;
       request2->cid = storcli_read_rq_p->cid;
       request2->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       request2->layout        = storcli_read_rq_p->layout;
       request2->bsize         = storcli_read_rq_p->bsize;
       /*
       ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only
       */
       if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request2->spare = 1;
       else request2->spare = 0;
       memcpy(request2->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t));
       memcpy(request2->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t));
  //CRCrequest->proj_id = projection_id;
       request2->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request2->sid,rozofs_forward);
       request2->bid     = storcli_read_rq_p->bid;
       request2->bitmap[0]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0];     
       request2->bitmap[1]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[1];     
       request2->bitmap[2]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[2];     
       int nb_blocks       = ROZOFS_BITMAP64_NB_BIT1(request2->bitmap);
       request2->nb_proj    = nb_blocks;     

       /*
       ** set the length of the bins part: need to compute the number of blocks
       */

       int bins_len = (prj_size_in_msg * nb_blocks);
       request2->len = bins_len; /**< bins length MUST be in bytes !!! */
       uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len);
       /*
       ** caution we might have a direct reply if there is a direct error at load balancing group while
       ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
       ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called
       ** prior returning from rozofs_sorcli_send_rq_common')
       ** anticipate the status of the xmit state of the projection and lock the section to
       ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
       ** --> need to take care because the write context is released after the reply error sent to rozofsmount
       */
       working_ctx_p->write_ctx_lock = 1;
       prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;

       ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR2,
                                           (xdrproc_t) xdr_sp_write_repair2_arg_no_bins_t, (caddr_t) request2,
                                        	xmit_buf,
                                        	working_ctx_p->read_seqnum,
                                        	(uint32_t) projection_id,
                                        	bins_len,
                                        	rozofs_storcli_write_repair_req_processing_cbk,
                                           (void*)working_ctx_p);
     }
	 else {
	 
       request   = &repair_prj_args;
       request->cid = storcli_read_rq_p->cid;
       request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       request->layout        = storcli_read_rq_p->layout;
       request->bsize         = storcli_read_rq_p->bsize;
       /*
       ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only
       */
       if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
       else request->spare = 0;
       memcpy(request->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t));
       memcpy(request->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t));
  //CRCrequest->proj_id = projection_id;
       request->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request->sid,rozofs_forward);
       request->bid     = storcli_read_rq_p->bid;
       request->bitmap  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0];     
       int nb_blocks       = ROZOFS_BITMAP64_NB_BIT1_FUNC((uint8_t*)&request->bitmap,8);
       request->nb_proj    = nb_blocks;     

       /*
       ** set the length of the bins part: need to compute the number of blocks
       */

       int bins_len = (prj_size_in_msg * nb_blocks);
       request->len = bins_len; /**< bins length MUST be in bytes !!! */
       uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len);
       /*
       ** caution we might have a direct reply if there is a direct error at load balancing group while
       ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
       ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called
       ** prior returning from rozofs_sorcli_send_rq_common')
       ** anticipate the status of the xmit state of the projection and lock the section to
       ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
       ** --> need to take care because the write context is released after the reply error sent to rozofsmount
       */
       working_ctx_p->write_ctx_lock = 1;
       prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;

       ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR,
                                           (xdrproc_t) xdr_sp_write_repair_arg_no_bins_t, (caddr_t) request,
                                        	xmit_buf,
                                        	working_ctx_p->read_seqnum,
                                        	(uint32_t) projection_id,
                                        	bins_len,
                                        	rozofs_storcli_write_repair_req_processing_cbk,
                                           (void*)working_ctx_p);	   
	 }										   

     working_ctx_p->write_ctx_lock = 0;
     if (ret < 0)
     {
        /*
	** there is no retry, just keep on with a potential other projection to repair
	*/
        STORCLI_ERR_PROF(repair_prj_err);
        STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0);
	prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_ERROR;
	continue;
     } 
     else
     {
       /*
       ** check if the state has not been changed: -> it might be possible to get a direct error
       */
       if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
       {
          /*
	  ** it looks like that we cannot repair that preojection, check if there is some other
	  */
          STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0);

       }      
     }
   }
   /*
   ** check if there some write repair request pending, in such a case we wait for the end of the repair
   ** (answer from the storage node
   */
    ret = rozofs_storcli_all_prj_write_repair_check(storcli_read_rq_p->layout,
                                                    working_ctx_p->prj_ctx);
    if (ret == 0)
    {
       /*
       ** there is some pending write
       */
       return;
    }   
  
fail:
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,repair,0);
     rozofs_storcli_release_context(working_ctx_p);  
  return;

}
/**
  Initial write repair request


  Here it is assumed that storclo is working with the context that has been allocated 
  @param  working_ctx_p: pointer to the working context of a read transaction
 
   @retval : TRUE-> xmit ready event expected
  @retval : FALSE-> xmit  ready event not expected
*/
void rozofs_storcli_repair_req_init(rozofs_storcli_ctx_t *working_ctx_p)
{
   int i;
   storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg;

   STORCLI_START_NORTH_PROF(working_ctx_p,repair,0);

   /*
   ** set the pointer to to first available data (decoded data)
   */
   working_ctx_p->data_write_p  = working_ctx_p->data_read_p; 
   /*
   ** set now the working variable specific for handling the write
   ** We need one large buffer per projection that will be written on storage
   ** we keep the buffer that have been allocated for the read.
   */
   uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_read_rq_p->layout);
   for (i = 0; i < forward_projection; i++)
   {
     working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_WR_IDLE;
     if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
     {
       working_ctx_p->prj_ctx[i].prj_buf   = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
       if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
       {
	 /*
	 ** that situation MUST not occur since there the same number of receive buffer and working context!!
	 */
	 severe("out of large buffer");
	 goto failure;
       }
     }
     /*
     ** set the pointer to the bins
     */
     int position;
	 // For compatibility between new clients and old storages
	 if (storcli_storage_supports_repair2) {
 	   position = rozofs_storcli_repair2_get_position_of_first_byte2write();
	 }
	 else {
 	   position = rozofs_storcli_repair_get_position_of_first_byte2write();	   
	 }	
     uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); 

     working_ctx_p->prj_ctx[i].bins       = (bin_t*)(pbuf+position);   
   }	
   /*
   **  now regenerate the projections that were in error
   */
   rozofs_storcli_transform_forward_repair(working_ctx_p,
                                           storcli_read_rq_p->layout,
                                           storcli_read_rq_p->nb_proj,
                                           (char *)working_ctx_p->data_write_p);    			
   /*
   ** starts the sending of the repaired projections
   */
   rozofs_storcli_write_repair_req_processing(working_ctx_p);
   return;


failure:
  /*
  ** send back the response of the read request towards rozofsmount
  */
  rozofs_storcli_read_reply_success(working_ctx_p);
  /*
  ** release the root transaction context
  */
  STORCLI_STOP_NORTH_PROF(working_ctx_p,repair,0);
  rozofs_storcli_release_context(working_ctx_p);  
}
/** 
  Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE
  aligned.
  The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer
  The number_of_blocks is the number of ROZOFS_BSIZE that must be transform
  Notice that the first_block_idx offset applies to the output transform buffer only
  not to the input buffer pointed by "data".
  
 * 
 * @param *working_ctx_p: storcli working context
 * @param number_of_blocks: number of blocks to write
 * @param *data: pointer to the source data that must be transformed
 *
 * @return: the length written on success, -1 otherwise (errno is set)
 */
 void rozofs_storcli_transform_forward_repair(rozofs_storcli_ctx_t *working_ctx_p,
                                	      uint8_t layout,
                                	      uint32_t number_of_blocks,
                                	      char *data) 
 {
    projection_t rozofs_fwd_projections[ROZOFS_SAFE_MAX_STORCLI];
    projection_t *projections; // Table of projections used to transform data
    uint16_t projection_id = 0;
    uint32_t i = 0;    
    uint8_t rozofs_forward = rozofs_get_rozofs_forward(layout);
    uint8_t rozofs_safe    = rozofs_get_rozofs_forward(layout);
    uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout);
    rozofs_storcli_projection_ctx_t *prj_ctx_p = &working_ctx_p->prj_ctx[0];
    int empty_block = 0;
    uint8_t sid;
    int moj_prj_id;
    int block_idx;
    int k;
    storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg;
    uint8_t  bsize  = storcli_read_rq_p->bsize;
    uint32_t bbytes = ROZOFS_BSIZE_BYTES(bsize);
    int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize);
              
    projections = rozofs_fwd_projections;

    // For each projection
    for (projection_id = 0; projection_id < rozofs_forward; projection_id++) {
        projections[projection_id].angle.p =  rozofs_get_angles_p(layout,projection_id);
        projections[projection_id].angle.q =  rozofs_get_angles_q(layout,projection_id);
        projections[projection_id].size    =  rozofs_get_128bits_psizes(layout,bsize,projection_id);
    }
    /*
    ** now go through all projection set to find out if there is something to regenerate
    */
    for (k = 0; k < rozofs_safe; k++)
    {
	block_idx = 0;
       if (ROZOFS_BITMAP64_TEST_ALL0(prj_ctx_p[k].crc_err_bitmap)) continue;
       /*
       **  Get the sid associated with the projection context
       */
       sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,
                                                      prj_ctx_p[k].stor_idx);
       /*
       ** Get the reference of the Mojette projection_id
       */
       moj_prj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,sid,rozofs_forward);
       if  (moj_prj_id < 0)
       {
          /*
	  ** it is the reference of a spare sid, so go to the next projection context
	  */
	  continue;
       }
       for (i = 0; i < number_of_blocks; i++) 
       {
          if (ROZOFS_BITMAP64_TEST0(i,prj_ctx_p[k].crc_err_bitmap)) 
	  {
	    /*
	    ** nothing to generate for that block
	    */
	    continue;
	  }
	  /*
	  ** check for empty block
	  */
          empty_block = rozofs_data_block_check_empty(data + (i * bbytes), bbytes);
	  /**
	  * regenerate the projection for the block for which a crc error has been detected
	  */
//CRC     projections[moj_prj_id].bins = prj_ctx_p[moj_prj_id].bins + 
          projections[moj_prj_id].bins = prj_ctx_p[k].bins + 
                                         (prj_size_in_msg/sizeof(bin_t)* (0+block_idx));
          rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)projections[moj_prj_id].bins;
          /*
          ** check if the user data block is empty: if the data block is empty no need to transform
          */
          if (empty_block)
          {
            rozofs_bins_hdr_p->s.projection_id = 0;
            rozofs_bins_hdr_p->s.timestamp     = 0;          
            rozofs_bins_hdr_p->s.effective_length = 0;    
            rozofs_bins_hdr_p->s.filler = 0;    
            rozofs_bins_hdr_p->s.version = 0;
	    block_idx++;    
            continue;   
          }	 
          /*
          ** fill the header of the projection
          */
          rozofs_bins_hdr_p->s.projection_id     = moj_prj_id;
//CRC     rozofs_bins_hdr_p->s.timestamp         = working_ctx_p->block_ctx_table[block_idx].timestamp;       
          rozofs_bins_hdr_p->s.timestamp         = working_ctx_p->block_ctx_table[i].timestamp; 
//CRC     rozofs_bins_hdr_p->s.effective_length  = working_ctx_p->block_ctx_table[block_idx].effective_length;
          rozofs_bins_hdr_p->s.effective_length  = working_ctx_p->block_ctx_table[i].effective_length;
          rozofs_bins_hdr_p->s.filler = 0;    
          rozofs_bins_hdr_p->s.version = 0;    	 
          /*
          ** update the pointer to point out the first bins
          */
          projections[moj_prj_id].bins += sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t);
          /*
          ** do not apply transform for empty block
          */
          if (empty_block == 0)
          {
	  	    
            /*
            ** Apply the erasure code transform for the block i
            */
            transform128_forward_one_proj((pxl_t *) (data + (i * bbytes)),
                    rozofs_inverse,
                    bbytes / rozofs_inverse / sizeof (pxl_t),
                    moj_prj_id, projections);
            /*
	    ** add the footer at the end of the repaired projection
	    */
            rozofs_stor_bins_footer_t *rozofs_bins_foot_p;
            rozofs_bins_foot_p = (rozofs_stor_bins_footer_t*) (projections[moj_prj_id].bins
	                                                      + rozofs_get_psizes(layout,bsize,moj_prj_id));
//CRC       rozofs_bins_foot_p->timestamp      = working_ctx_p->block_ctx_table[block_idx].timestamp;
            rozofs_bins_foot_p->timestamp      = rozofs_bins_hdr_p->s.timestamp;	
          }
	  block_idx++;    	  
        }
    }
}
Esempio n. 6
0
void rozofs_storcli_truncate_projection_retry(rozofs_storcli_ctx_t *working_ctx_p,uint8_t projection_id,int same_storage_retry_acceptable)
{
    uint8_t   rozofs_safe;
    uint8_t   rozofs_forward;
    uint8_t   layout;
    storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg;
    int error=0;
    int storage_idx;

    rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
    rozofs_storcli_lbg_prj_assoc_t  *lbg_assoc_p = working_ctx_p->lbg_assoc_tb;

    layout         = storcli_truncate_rq_p->layout;
    rozofs_safe    = rozofs_get_rozofs_safe(layout);
    rozofs_forward = rozofs_get_rozofs_forward(layout);
    /*
    ** Now update the state of each load balancing group since it might be possible
    ** that some experience a state change
    */
    for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) 
    {
      /*
      ** Check the state of the load Balancing group
      */
      rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p,
                                              storage_idx,
                                              NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id));      
    }    
    /**
    * attempt to select a new storage
    */
    if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
    {
      /*
      ** Cannot select a new storage: OK so now double check if the retry on the same storage is
      ** acceptable.When it is the case, check if the max retry has not been yet reached
      ** Otherwise, we are in deep shit-> reject the read request
      */
      if (same_storage_retry_acceptable == 0) 
      {
        error = EIO;
        prj_cxt_p[projection_id].errcode = error;
        goto reject;      
      }
      if (++prj_cxt_p[projection_id].retry_cpt >= ROZOFS_STORCLI_MAX_RETRY)
      {
        error = EIO;
        prj_cxt_p[projection_id].errcode = error;
        goto reject;          
      }
    } 
    /*
    ** we are lucky since either a get a new storage or the retry counter is not exhausted
    */
     sp_truncate_arg_no_bins_t *request; 
     sp_truncate_arg_no_bins_t  truncate_prj_args;
     void  *xmit_buf;  
     int ret;  
      
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */
       error = EFAULT;
       prj_cxt_p[projection_id].errcode = error;
       goto fatal;     
     }
     /*
     ** fill partially the common header
     */
retry:
     request   = &truncate_prj_args;
     request->cid = storcli_truncate_rq_p->cid;
     request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     request->layout        = layout;
     if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
     else request->spare = 0;
     memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t));
     memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
     request->proj_id        = projection_id;
     request->bid            = storcli_truncate_rq_p->bid;
     request->last_seg       = storcli_truncate_rq_p->last_seg;
     request->last_timestamp = working_ctx_p->timestamp;


     /*
     ** Bins len has been saved in the working context
     */
     request->len = working_ctx_p->truncate_bins_len;

     uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     /*
     **  increment the lock since it might be possible that this procedure is called after a synchronous transaction failu failure
     ** while the system is still in the initial procedure that triggers the writing of the projection. So it might be possible that
     ** the lock is already asserted
     ** as for the initial case, we need to anticipate the xmit state of the projection since the ERROR status might be set 
     ** on a synchronous transaction failure. If that state is set after a positive submission towards the lbg, we might
     ** overwrite the ERROR state with the IN_PRG state.
     */
     working_ctx_p->write_ctx_lock++;
     prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;
     
     STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
     ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE,
                                         (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request,
                                          xmit_buf,
                                          working_ctx_p->read_seqnum,
                                          (uint32_t) projection_id,
                                          working_ctx_p->truncate_bins_len,
                                          rozofs_storcli_truncate_req_processing_cbk,
                                         (void*)working_ctx_p);
     working_ctx_p->write_ctx_lock--;
     if (ret < 0)
     {
       /*
       ** the communication with the storage seems to be wrong (more than TCP connection temporary down
       ** attempt to select a new storage
       **
       */
       STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
       if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
       {
         /*
         ** Out of storage !!-> too many storages are down
         */
         goto fatal;
       } 
       /*
       ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!!
       */
       goto retry;
     }
     /*
     ** OK, the buffer has been accepted by the load balancing group, check if there was a direct failure for
     ** that transaction
     */
     if ( prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
     {
        error = prj_cxt_p[projection_id].errcode;
        goto fatal;     
     }    
    return;
    /*
    **_____________________________________________
    **  Exception cases
    **_____________________________________________
    */      
    
reject:  
     if (working_ctx_p->write_ctx_lock != 0) return;
     /*
     ** we fall in that case when we run out of  storage
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
    STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
    rozofs_storcli_release_context(working_ctx_p);  
     return; 
      
fatal:
     /*
     ** caution -> reply error is only generated if the ctx_lock is 0
     */
     if (working_ctx_p->write_ctx_lock != 0) return;
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  
     return; 

}
Esempio n. 7
0
/**
  Initial truncate request
    
  @param socket_ctx_p: pointer to the af unix socket
  @param socketId: reference of the socket (not used)
  @param rozofs_storcli_remote_rsp_cbk: callback for sending out the response
 
   @retval : TRUE-> xmit ready event expected
  @retval : FALSE-> xmit  ready event not expected
*/
void rozofs_storcli_truncate_req_init(uint32_t  socket_ctx_idx, void *recv_buf,rozofs_storcli_resp_pf_t rozofs_storcli_remote_rsp_cbk)
{
   rozofs_rpc_call_hdr_with_sz_t    *com_hdr_p;
   rozofs_storcli_ctx_t *working_ctx_p = NULL;
   int i;
   uint32_t  msg_len;  /* length of the rpc messsage including the header length */
   storcli_truncate_arg_t *storcli_truncate_rq_p = NULL;
   rozofs_rpc_call_hdr_t   hdr;   /* structure that contains the rpc header in host format */
   int      len;       /* effective length of application message               */
   uint8_t  *pmsg;     /* pointer to the first available byte in the application message */
   uint32_t header_len;
   XDR xdrs;
   int errcode = EINVAL;
   /*
   ** allocate a context for the duration of the write
   */
   working_ctx_p = rozofs_storcli_alloc_context();
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     severe("out of working read/write saved context");
     goto failure;
   }
   storcli_truncate_rq_p = &working_ctx_p->storcli_truncate_arg;
   STORCLI_START_NORTH_PROF(working_ctx_p,truncate,0);

   
   /*
   ** Get the full length of the message and adjust it the the length of the applicative part (RPC header+application msg)
   */
   msg_len = ruc_buf_getPayloadLen(recv_buf);
   msg_len -=sizeof(uint32_t);

   /*
   ** save the reference of the received socket since it will be needed for sending back the
   ** response
   */
   working_ctx_p->socketRef    = socket_ctx_idx;
   working_ctx_p->user_param   = NULL;
   working_ctx_p->recv_buf     = recv_buf;
   working_ctx_p->response_cbk = rozofs_storcli_remote_rsp_cbk;
   /*
   ** Get the payload of the receive buffer and set the pointer to the array that describes the write request
   */
   com_hdr_p  = (rozofs_rpc_call_hdr_with_sz_t*) ruc_buf_getPayload(recv_buf);   
   memcpy(&hdr,&com_hdr_p->hdr,sizeof(rozofs_rpc_call_hdr_t));
   /*
   ** swap the rpc header
   */
   scv_call_hdr_ntoh(&hdr);
   pmsg = rozofs_storcli_set_ptr_on_nfs_call_msg((char*)&com_hdr_p->hdr,&header_len);
   if (pmsg == NULL)
   {
     errcode = EFAULT;
     goto failure;
   }
   /*
   ** map the memory on the first applicative RPC byte available and prepare to decode:
   ** notice that we will not call XDR_FREE since the application MUST
   ** provide a pointer for storing the file handle
   */
   len = msg_len - header_len;    
   xdrmem_create(&xdrs,(char*)pmsg,len,XDR_DECODE); 
   /*
   ** store the source transaction id needed for the reply
   */
   working_ctx_p->src_transaction_id =  hdr.hdr.xid;
   /*
   ** decode the RPC message of the truncate request
   */
   if (xdr_storcli_truncate_arg_t(&xdrs,storcli_truncate_rq_p) == FALSE)
   {
      /*
      ** decoding error
      */
      errcode = EFAULT;
      severe("rpc trucnate request decoding error");
      goto failure;
      
   }   
   /*
   ** init of the load balancing group/ projection association table:
   ** That table is ordered: the first corresponds to the storage associated with projection 0, second with 1, etc..
   ** When build that table, we MUST consider the value of the base which is associated with the distribution
   */

   
   uint8_t   rozofs_safe = rozofs_get_rozofs_safe(storcli_truncate_rq_p->layout);
   int lbg_in_distribution = 0;
   for (i = 0; i  <rozofs_safe ; i ++)
   {
    /*
    ** Get the load balancing group associated with the sid
    */
    int lbg_id = rozofs_storcli_get_lbg_for_sid(storcli_truncate_rq_p->cid,storcli_truncate_rq_p->dist_set[i]);
    if (lbg_id < 0)
    {
      /*
      ** there is no associated between the sid and the lbg. It is typically the case
      ** when a new cluster has been added to the configuration and the client does not
      ** know yet the configuration change
      */
      severe("sid is unknown !! %d\n",storcli_truncate_rq_p->dist_set[i]);
      continue;    
    }
     rozofs_storcli_lbg_prj_insert_lbg_and_sid(working_ctx_p->lbg_assoc_tb,lbg_in_distribution,
                                                lbg_id,
                                                storcli_truncate_rq_p->dist_set[i]);  

     rozofs_storcli_lbg_prj_insert_lbg_state(working_ctx_p->lbg_assoc_tb,
                                             lbg_in_distribution,
                                             NORTH_LBG_GET_STATE(working_ctx_p->lbg_assoc_tb[lbg_in_distribution].lbg_id));    
     lbg_in_distribution++;
     if (lbg_in_distribution == rozofs_safe) break;

   }
   /*
   ** allocate a small buffer that will be used for sending the response to the truncate request
   */
   working_ctx_p->xmitBuf = ruc_buf_getBuffer(ROZOFS_STORCLI_NORTH_SMALL_POOL);
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     errcode = ENOMEM;
     severe("out of small buffer");
     goto failure;
   }
   /*
   ** allocate a sequence number for the working context (same aas for read)
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
   /*
   ** set now the working variable specific for handling the truncate
   ** we re-use the structure used for writing even if nothing is written
   */
   uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_truncate_rq_p->layout);
   for (i = 0; i < forward_projection; i++)
   {
     working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_READ_IDLE;
     working_ctx_p->prj_ctx[i].prj_buf   = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
     if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
     {
       /*
       ** that situation MUST not occur since there the same number of receive buffer and working context!!
       */
       errcode = ENOMEM;
       severe("out of large buffer");
       goto failure;
     }
     /*
     ** increment inuse counter on each buffer since we might need to re-use that packet in case
     ** of retransmission
     */
     working_ctx_p->prj_ctx[i].inuse_valid = 1;
     ruc_buf_inuse_increment(working_ctx_p->prj_ctx[i].prj_buf);
     /*
     ** set the pointer to the bins
     */
     int position = rozofs_storcli_get_position_of_first_byte2write_in_truncate();
     uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); 

     working_ctx_p->prj_ctx[i].bins       = (bin_t*)(pbuf+position); 
   }
   		
   /*
   ** Prepare for request serialization
   */
   memcpy(working_ctx_p->fid_key, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
   working_ctx_p->opcode_key = STORCLI_TRUNCATE;
   {
       /**
        * lock all the file for a truncate
        */
       uint64_t nb_blocks = 0;
       nb_blocks--;
       int ret;
       ret = stc_rng_insert((void*)working_ctx_p,
               STORCLI_READ,working_ctx_p->fid_key,
               0,nb_blocks,
               &working_ctx_p->sched_idx);
       if (ret == 0)
       {
           /*
            ** there is a current request that is processed with the same fid and there is a collision
            */
           return;
       }
       /*
        ** no request pending with that fid, so we can process it right away
        */
       return rozofs_storcli_truncate_req_processing(working_ctx_p);
   }

    /*
    **_____________________________________________
    **  Exception cases
    **_____________________________________________
    */      
       

    /*
    ** there was a failure while attempting to allocate a memory ressource.
    */
failure:
     /*
     ** send back the response with the appropriated error code. 
     ** note: The received buffer (rev_buf)  is
     ** intended to be released by this service in case of error or the TCP transmitter
     ** once it has been passed to the TCP stack.
     */
     rozofs_storcli_reply_error_with_recv_buf(socket_ctx_idx,recv_buf,NULL,rozofs_storcli_remote_rsp_cbk,errcode);
     /*
     ** check if the root context was allocated. Free it if is exist
     */
     if (working_ctx_p != NULL) 
     {
        /*
        ** remove the reference to the recvbuf to avoid releasing it twice
        */
       STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
       working_ctx_p->recv_buf   = NULL;
       rozofs_storcli_release_context(working_ctx_p);
     }
     return;
}
Esempio n. 8
0
/*
** That function is called when all the projection are ready to be sent

 @param working_ctx_p: pointer to the root context associated with the top level write request
 @param data         : pointer to the data of the last block to truncate

*/
void rozofs_storcli_truncate_req_processing_exec(rozofs_storcli_ctx_t *working_ctx_p, char * data)
{

  storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg;
  uint8_t layout = storcli_truncate_rq_p->layout;
  uint8_t   rozofs_forward;
  uint8_t   rozofs_safe;
  uint8_t   projection_id;
  int       storage_idx;
  int       error=0;
  rozofs_storcli_lbg_prj_assoc_t  *lbg_assoc_p = working_ctx_p->lbg_assoc_tb;
  rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
  
  rozofs_forward = rozofs_get_rozofs_forward(layout);
  rozofs_safe    = rozofs_get_rozofs_safe(layout);
  

  /*
  ** set the current state of each load balancing group belonging to the rozofs_safe group
  */
  for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) 
  {
    /*
    ** Check the state of the load Balancing group
    */
    rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p,
                                            storage_idx,
                                            NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id));      
  }
  /*
  ** Now find out a selectable lbg_id for each projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
    if (rozofs_storcli_select_storage_idx_for_write ( working_ctx_p,rozofs_forward, rozofs_safe,projection_id) < 0)
    {
       /*
       ** there is no enough valid storage !!
       */
       error = EIO;
       goto fail;
    }
  }  
  
  
  /*
  ** Let's transform the data to write
  */
  working_ctx_p->truncate_bins_len = 0;
  if (data != NULL) {
    STORCLI_START_KPI(storcli_kpi_transform_forward);

    rozofs_storcli_transform_forward(working_ctx_p->prj_ctx,  
                                     layout,
                                     0, 
                                     1, 
                                     working_ctx_p->timestamp,
                                     storcli_truncate_rq_p->last_seg,
                                     data);  
    STORCLI_STOP_KPI(storcli_kpi_transform_forward,0);
    working_ctx_p->truncate_bins_len = rozofs_get_max_psize(layout)*sizeof(bin_t) + sizeof(rozofs_stor_bins_hdr_t);
  } 
  
  /*
  ** We have enough storage, so initiate the transaction towards the storage for each
  ** projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
     sp_truncate_arg_no_bins_t *request; 
     sp_truncate_arg_no_bins_t  truncate_prj_args;
     void  *xmit_buf;  
     int ret;  
      
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */       
       error = EIO;
       goto fatal;     
     }
     /*
     ** fill partially the common header
     */
retry:
     request   = &truncate_prj_args;
     request->cid = storcli_truncate_rq_p->cid;
     request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     request->layout        = layout;
     if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
     else request->spare = 0;
     memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t));
     memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
     request->proj_id        = projection_id;
     request->bid            = storcli_truncate_rq_p->bid;
     request->last_seg       = storcli_truncate_rq_p->last_seg;
     request->last_timestamp = working_ctx_p->timestamp;

     request->len = working_ctx_p->truncate_bins_len;

     uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
     /*
     ** caution we might have a direct reply if there is a direct error at load balancing group while
     ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
     ** As a consequence the response fct 'rozofs_storcli_truncate_req_processing_cbk) can be called
     ** prior returning from rozofs_sorcli_send_rq_common')
     ** anticipate the status of the xmit state of the projection and lock the section to
     ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
     ** --> need to take care because the write context is released after the reply error sent to rozofsmount
     */
     working_ctx_p->write_ctx_lock = 1;
     prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;
     
     ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE,
                                         (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request,
                                          xmit_buf,
                                          working_ctx_p->read_seqnum,
                                          (uint32_t) projection_id,
                                          working_ctx_p->truncate_bins_len,
                                          rozofs_storcli_truncate_req_processing_cbk,
                                         (void*)working_ctx_p);
     working_ctx_p->write_ctx_lock = 0;
     if (ret < 0)
     {
       /*
       ** the communication with the storage seems to be wrong (more than TCP connection temporary down
       ** attempt to select a new storage
       **
       */
       if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
       {
         /*
         ** Out of storage !!-> too many storages are down
         */
         goto fatal;
       } 
       /*
       ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!!
       */
//#warning: it is assumed that xmit buffer has not been release, need to double check!!        
       goto retry;
     } 
     else
     {
       /*
       ** check if the state has not been changed: -> it might be possible to get a direct error
       */
       if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
       {
          error = prj_cxt_p[projection_id].errcode;
          goto fatal;       
       }
     }

   }

  return;
  
fail:
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  
     return;

fatal:
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  

  return;

}
Esempio n. 9
0
/** 
  Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE
  aligned.
  The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer
  The number_of_blocks is the number of ROZOFS_BSIZE that must be transform
  Notice that the first_block_idx offset applies to the output transform buffer only
  not to the input buffer pointed by "data".
  
 * 
 * @param *prj_ctx_p: pointer to the working array of the projection
 * @param first_block_idx: index of the first block to transform
 * @param number_of_blocks: number of blocks to write
 * @param *data: pointer to the source data that must be transformed
   @param *number_of_blocks_p: pointer to the array where the function returns number of blocks on which the transform was applied
  @param *rozofs_storcli_prj_idx_table: pointer to the array used for storing the projections index for inverse process
 *
 * @return: the length written on success, -1 otherwise (errno is set)
 */
 int rozofs_storcli_transform_inverse(rozofs_storcli_projection_ctx_t *prj_ctx_p,  
                                       uint8_t layout, uint32_t bsize,
                                       uint32_t first_block_idx, 
                                       uint32_t number_of_blocks, 
                                       rozofs_storcli_inverse_block_t *block_ctx_p,
                                       char *data,
                                       uint32_t *number_of_blocks_p,
				       uint8_t  *rozofs_storcli_prj_idx_table) 

 {

    int block_idx;
    uint16_t projection_id = 0;
    int prj_ctx_idx;
    *number_of_blocks_p = 0;    
    uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout);
    uint8_t rozofs_forward = rozofs_get_rozofs_forward(layout);
    uint8_t rozofs_safe = rozofs_get_rozofs_safe(layout);        
    uint32_t bbytes = ROZOFS_BSIZE_BYTES(bsize);

    int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize);

    /*
    ** Proceed the inverse data transform for the nb_projections2read blocks.
    */
    for (block_idx = 0; block_idx < number_of_blocks; block_idx++) {
        if (block_ctx_p[block_idx].state == ROZOFS_BLK_TRANSFORM_DONE)
        {
          /*
          ** transformation has already been done for that block of ROZOFS_BSIZE siz
          ** check the next one
          */
          continue;        
        }
        /*
        ** Check the case of the file that has no data (there is a hole in the file), this is indicated by
        ** reporting a timestamp of 0
        */
        if ((block_ctx_p[block_idx].timestamp == 0)  && (block_ctx_p[block_idx].effective_length == bbytes ))
        {
          /*
          ** clear the memory
          */
          ROZOFS_STORCLI_STATS(ROZOFS_STORCLI_EMPTY_READ);
          memset( data + (bbytes * (first_block_idx + block_idx)),0,bbytes);
          block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE;
          continue;
        
        }	                                                              
        if ((block_ctx_p[block_idx].timestamp == 0)  && (block_ctx_p[block_idx].effective_length == 0 ))
        {
          /*
          ** we have reached end of file
          */
          block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE;
          *number_of_blocks_p = (block_idx++);
          
          return 0;        
        }      
	
        /*
        ** Here we have to take care, since the index of the projection_id use to address
        ** prj_ctx_p is NOT the real projection_id. The projection ID is found in the header of
        ** each bins, so for a set of projections pointed by bins, we might have a different
        ** projection id in the header of the projections contains in the bins array that has
        ** been read!!
        */
	transform_inverse_proc(&rozofs_storcli_prj_idx_table[ROZOFS_SAFE_MAX_STORCLI*block_idx],
			       prj_ctx_p,
			       prj_size_in_msg,
			       layout,
			       bbytes,
			       first_block_idx,
			       block_idx,
			       data);
        /*
        ** indicate that transform has been done for the projection
        */
        block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE;
        /*
        ** check the case of a block that is not full: need to zero's that part
        */
        if (block_ctx_p[block_idx].effective_length < bbytes)
        {
           /*
           ** clear the memory
           */
           char *raz_p = data + (bbytes * (first_block_idx + block_idx)) + block_ctx_p[block_idx].effective_length;
           memset( raz_p,0,(bbytes-block_ctx_p[block_idx].effective_length) );
        }
    }
    /*
    ** now the inverse transform is finished, release the allocated ressources used for
    ** rebuild
    */
    *number_of_blocks_p = number_of_blocks;
    
    /*
    ** Check whether a block should be repaired
    */
    rozofs_storcli_check_block_2_repair(prj_ctx_p, rozofs_inverse, rozofs_forward, rozofs_safe, prj_size_in_msg, number_of_blocks, block_ctx_p);
    
    return 0;   
}
Esempio n. 10
0
/** 
  Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE
  aligned.
  The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer
  The number_of_blocks is the number of ROZOFS_BSIZE that must be transform
  Notice that the first_block_idx offset applies to the output transform buffer only
  not to the input buffer pointed by "data".
  
 * 
 * @param *prj_ctx_p: pointer to the working array of the projection
 * @param first_block_idx: index of the first block to transform
 * @param number_of_blocks: number of blocks to write
 * @param timestamp: date in microseconds
   @param last_block_size: effective length of the last block
 * @param *data: pointer to the source data that must be transformed
 *
 * @return: the length written on success, -1 otherwise (errno is set)
 */
 int rozofs_storcli_transform_forward(rozofs_storcli_projection_ctx_t *prj_ctx_p,  
                                       uint8_t layout,uint32_t bsize,
                                       uint32_t first_block_idx, 
                                       uint32_t number_of_blocks,
                                       uint64_t timestamp, 
                                       uint16_t last_block_size,
                                       char *data) 
 {

    projection_t rozofs_fwd_projections[ROZOFS_SAFE_MAX_STORCLI];
    projection_t *projections; // Table of projections used to transform data
    uint16_t projection_id = 0;
    uint32_t i = 0;    
    uint8_t rozofs_forward = rozofs_get_rozofs_forward(layout);
    uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout);
    int empty_block = 0;
    uint32_t bbytes = ROZOFS_BSIZE_BYTES(bsize);

    projections = rozofs_fwd_projections;
    int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize);
    
    /* Transform the data */
    // For each block to send
    for (i = 0; i < number_of_blocks; i++) 
    {
         empty_block = rozofs_data_block_check_empty(data + (i * bbytes), bbytes);

        // seek bins for each projection
        for (projection_id = 0; projection_id < rozofs_forward; projection_id++) 
        {
          /*
          ** Indicates the memory area where the transformed data must be stored
          */
          projections[projection_id].bins = prj_ctx_p[projection_id].bins 
	                                  + (prj_size_in_msg/sizeof(bin_t)) * (first_block_idx+i);
          rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)projections[projection_id].bins;
          rozofs_stor_bins_footer_t *rozofs_bins_foot_p = (rozofs_stor_bins_footer_t*)
	                                                  ((bin_t*)(rozofs_bins_hdr_p+1)+
							  rozofs_get_psizes(layout,bsize,projection_id));
          /*
          ** check if the user data block is empty: if the data block is empty no need to transform
          */
          if (empty_block)
          {
            rozofs_bins_hdr_p->s.projection_id = 0;
            rozofs_bins_hdr_p->s.timestamp     = 0;          
            rozofs_bins_hdr_p->s.effective_length = 0;    
            rozofs_bins_hdr_p->s.filler = 0;    
            rozofs_bins_hdr_p->s.version = 0;    
            continue;   
          }
          /*
          ** fill the header of the projection
          */
          rozofs_bins_hdr_p->s.projection_id = projection_id;
          rozofs_bins_hdr_p->s.timestamp     = timestamp;
          rozofs_bins_hdr_p->s.filler = 0;    
          rozofs_bins_hdr_p->s.version = 0;   
	  /*
          ** set the effective size of the block. It is always ROZOFS_BSIZE except for the last block
          */
          if (i == (number_of_blocks-1))
          {
            rozofs_bins_hdr_p->s.effective_length = last_block_size;
          }
          else
          {
            rozofs_bins_hdr_p->s.effective_length = bbytes;          
          } 
          /*
          ** update the pointer to point out the first bins
          */
          projections[projection_id].bins += sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t);
	  rozofs_bins_foot_p = (rozofs_stor_bins_footer_t*) (projections[projection_id].bins
	                                                      + rozofs_get_psizes(layout,bsize,projection_id));
          rozofs_bins_foot_p->timestamp      = timestamp;                                                     
        }
        /*
        ** do not apply transform for empty block
        */
        if (empty_block == 0)
        {
	  transform_forward_proc(layout,data + (i * bbytes),bbytes,projections);
	} 

    }
    return 0;
}
Esempio n. 11
0
/** 
  Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE
  aligned.
  The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer
  The number_of_blocks is the number of ROZOFS_BSIZE that must be transform
  Notice that the first_block_idx offset applies to the output transform buffer only
  not to the input buffer pointed by "data".
  
 * 
 * @param *prj_ctx_p: pointer to the working array of the projection
 * @param first_block_idx: index of the first block to transform
 * @param number_of_blocks: number of blocks to write
 * @param timestamp: date in microseconds
   @param last_block_size: effective length of the last block
 * @param *data: pointer to the source data that must be transformed
 *
 * @return: the length written on success, -1 otherwise (errno is set)
 */
 int rozofs_storcli_transform_forward(rozofs_storcli_projection_ctx_t *prj_ctx_p,  
                                       uint8_t layout,
                                       uint32_t first_block_idx, 
                                       uint32_t number_of_blocks,
                                       uint64_t timestamp, 
                                       uint16_t last_block_size,
                                       char *data) 
 {
    projection_t rozofs_fwd_projections[ROZOFS_SAFE_MAX];
    projection_t *projections; // Table of projections used to transform data
    uint16_t projection_id = 0;
    uint32_t i = 0;    
    uint8_t rozofs_forward = rozofs_get_rozofs_forward(layout);
    uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout);
    int empty_block = 0;

    projections = rozofs_fwd_projections;

    // For each projection
    for (projection_id = 0; projection_id < rozofs_forward; projection_id++) {
        projections[projection_id].angle.p =  rozofs_get_angles_p(layout,projection_id);
        projections[projection_id].angle.q =  rozofs_get_angles_q(layout,projection_id);
        projections[projection_id].size    =  rozofs_get_psizes(layout,projection_id);
    }

    /* Transform the data */
    // For each block to send
    for (i = 0; i < number_of_blocks; i++) 
    {
         empty_block = rozofs_data_block_check_empty(data + (i * ROZOFS_BSIZE), ROZOFS_BSIZE);

        // seek bins for each projection
        for (projection_id = 0; projection_id < rozofs_forward; projection_id++) 
        {
          /*
          ** Indicates the memory area where the transformed data must be stored
          */
          projections[projection_id].bins = prj_ctx_p[projection_id].bins +
                                           ((rozofs_get_max_psize(layout)+(sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t)))* (first_block_idx+i));
          rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)projections[projection_id].bins;
          /*
          ** check if the user data block is empty: if the data block is empty no need to transform
          */
          if (empty_block)
          {
            rozofs_bins_hdr_p->s.projection_id = 0;
            rozofs_bins_hdr_p->s.timestamp     = 0;          
            rozofs_bins_hdr_p->s.effective_length = 0;    
            continue;   
          }
          /*
          ** fill the header of the projection
          */
          rozofs_bins_hdr_p->s.projection_id = projection_id;
          rozofs_bins_hdr_p->s.timestamp     = timestamp;
          /*
          ** set the effective size of the block. It is always ROZOFS_BSIZE except for the last block
          */
          if (i == (number_of_blocks-1))
          {
            rozofs_bins_hdr_p->s.effective_length = last_block_size;
          }
          else
          {
            rozofs_bins_hdr_p->s.effective_length = ROZOFS_BSIZE;          
          } 
          /*
          ** update the pointer to point out the first bins
          */
          projections[projection_id].bins += sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t);
                                                     
        }
        /*
        ** do not apply transform for empty block
        */
        if (empty_block == 0)
        {
          /*
          ** Apply the erasure code transform for the block i+first_block_idx
          */
          transform_forward((pxl_t *) (data + (i * ROZOFS_BSIZE)),
                  rozofs_inverse,
                  ROZOFS_BSIZE / rozofs_inverse / sizeof (pxl_t),
                  rozofs_forward, projections);
        }
    }

    return 0;
}
Esempio n. 12
0
void read_chunk_file(uuid_t fid, char * path, rozofs_stor_bins_file_hdr_vall_t * hdr, int spare, uint64_t firstBlock) {
  uint16_t rozofs_disk_psize;
  int      fd;
  rozofs_stor_bins_hdr_t * pH;
  int      nb_read;
  uint32_t bbytes = ROZOFS_BSIZE_BYTES(hdr->v0.bsize);
  char     crc32_string[32];
  uint64_t offset;
  
  if (dump_data == 0) {
    printf ("+------------+------------------+------------+----+------+-------+--------------------------------------------\n");
    printf ("| %10s | %16s | %10s | %2s | %4s | %5s | %s\n", "block#","file offset", "prj offset", "pj", "size", "crc32", "date");
    printf ("+------------+------------------+------------+----+------+-------+--------------------------------------------\n");
  }

  // Open bins file
  fd = open(path, ROZOFS_ST_NO_CREATE_FILE_FLAG, ROZOFS_ST_BINS_FILE_MODE_RO);
  if (fd < 0) {
    printf("open(%s) %s\n",path,strerror(errno));
    return;	
  }

  /*
  ** Retrieve the projection size on disk
  */
  rozofs_disk_psize = rozofs_get_max_psize_in_msg(hdr->v0.layout,hdr->v0.bsize);
  if (spare==0) {
  
    /* Header version 1. Find the sid in  the distribution */
    if (hdr->v0.version == 2) {
      int fwd = rozofs_get_rozofs_forward(hdr->v2.layout);
      int idx;
      for (idx=0; idx< fwd;idx++) {
	if (hdr->v2.distrib[idx] != hdr->v2.sid) continue;
	rozofs_disk_psize = rozofs_get_psizes_on_disk(hdr->v2.layout,hdr->v2.bsize,idx);
	break; 
      }
    }  
    else if (hdr->v0.version == 1) {
      int fwd = rozofs_get_rozofs_forward(hdr->v1.layout);
      int idx;
      for (idx=0; idx< fwd;idx++) {
	if (hdr->v1.dist_set_current[idx] != hdr->v1.sid) continue;
	rozofs_disk_psize = rozofs_get_psizes_on_disk(hdr->v1.layout,hdr->v1.bsize,idx);
	break; 
      }
    }  
    /* Projection id given as parameter */
    else if (prjid != -1) {
      rozofs_disk_psize = rozofs_get_psizes_on_disk(hdr->v0.layout,hdr->v0.bsize,prjid);
    }
    
    /*�Version 0 without projection given as parameter*/
    else {
      // Read 1rst block
      nb_read = pread(fd, buffer, sizeof(rozofs_stor_bins_hdr_t), 0);
      if (nb_read<0) {
	printf("pread(%s) %s\n",path,strerror(errno));
	return;      
      }
      pH = (rozofs_stor_bins_hdr_t*)buffer;
      if (pH->s.timestamp == 0) {
	printf("Can not tell projection id\n");
	return;            
      }
      rozofs_disk_psize = rozofs_get_psizes_on_disk(hdr->v0.layout,hdr->v0.bsize,pH->s.projection_id);
    }
  }
  

  /*
  ** Where to start reading from 
  */
  if (first == 0) { 
    offset = 0;
  }
  else {
    if (first <= firstBlock) {
      offset = 0;
    }
    else {
      offset = (first-firstBlock)*rozofs_disk_psize;
    }
  }
  
  int idx;
  nb_read = 1;
  uint64_t bid;
  
  /*
  ** Reading blocks
  */  
  while (nb_read) {
  
    // Read nb_proj * (projection + header)
    nb_read = pread(fd, buffer, rozofs_disk_psize*32, offset);
    if (nb_read<0) {
      printf("pread(%s) %s\n",path,strerror(errno));
      close(fd);
      return;         
    }
    
    nb_read = (nb_read / rozofs_disk_psize);
    
    pH = (rozofs_stor_bins_hdr_t*) buffer;
    for (idx=0; idx<nb_read; idx++) {
    
      pH = (rozofs_stor_bins_hdr_t*) &buffer[idx*rozofs_disk_psize];
      
      bid = (offset/rozofs_disk_psize)+idx+firstBlock;
      
      if (bid < first) continue;
      if (bid > last)  break;
     
      uint32_t save_crc32 = pH->s.filler;
      pH->s.filler = 0;
      uint32_t crc32=0;

      if (save_crc32 == 0) {
        sprintf(crc32_string,"NONE");
      }
      else {
        crc32 = fid2crc32((uint32_t *)fid)+bid-firstBlock;
        crc32 = crc32c(crc32,(char *) pH, rozofs_disk_psize);
	if (crc32 != save_crc32) sprintf(crc32_string,"ERROR");
	else                     sprintf(crc32_string,"OK");
	
      }
      pH->s.filler = save_crc32;
      	
      if (dump_data == 0) {
      
	printf ("| %10llu | %16llu | %10llu | %2d | %4d | %5s | %s\n",
        	(long long unsigned int)bid,
        	(long long unsigned int)bbytes * bid,
        	(long long unsigned int)offset+(idx*rozofs_disk_psize),
		pH->s.projection_id,
		pH->s.effective_length, 
		crc32_string,  
		ts2string(pH->s.timestamp));
       }		
       else {
	printf("_________________________________________________________________________________________\n");
	printf("Block# %llu / file offset %llu / projection offset %llu\n", 
        	(unsigned long long)bid, (unsigned long long)(bbytes * bid), (unsigned long long)(offset+(idx*rozofs_disk_psize)));
	printf("prj id %d / length %d / CRC %s / time stamp %s\n", 
        	pH->s.projection_id,pH->s.effective_length,crc32_string, ts2string(pH->s.timestamp)); 	
	printf("_________________________________________________________________________________________\n");
	if ((pH->s.projection_id == 0)&&(pH->s.timestamp==0)) continue;
	hexdump(pH, (offset+(idx*rozofs_disk_psize)), rozofs_disk_psize);      	            
      }
    }
    offset += (nb_read*rozofs_disk_psize);
  }
  if (dump_data == 0) {
    printf ("+------------+------------------+------------+----+------+-------+--------------------------------------------\n");
  }
  close(fd);
}    
Esempio n. 13
0
int read_data_file() {
    int status = -1;
    uint64_t size = 0;
    int block_idx = 0;
    int idx =0;
    int count;
    rozofs_stor_bins_hdr_t * rozofs_bins_hdr_p;
    rozofs_stor_bins_footer_t * rozofs_bins_foot_p;
    char * loc_read_bins_p = NULL;
    int      forward = rozofs_get_rozofs_forward(layout);
//    int      inverse = rozofs_get_rozofs_inverse(layout);
    uint16_t disk_block_size; 
    uint16_t max_block_size = (rozofs_get_max_psize(layout,bsize)*sizeof (bin_t)) 
                            + sizeof (rozofs_stor_bins_hdr_t) + sizeof (rozofs_stor_bins_footer_t);
    char * p;
    int empty,valid;
    int prj_id;
    int      nb_ts;
    uint64_t ts[32];
    int      ts_count[32];
        
    // Allocate memory for reading
    loc_read_bins_p = xmalloc(max_block_size);   

    for (idx=0; idx < nb_file; idx++) {
      if (strcmp(filename[idx],"NULL") == 0) {
        fd[idx] = -1;
      }
      else {
	fd[idx] = open(filename[idx],O_RDWR);
	if (fd < 0) {
	    severe("Can not open file %s %s",filename[idx],strerror(errno));
	    goto out;
	}
      }	
    }
            
    printf (" ______ __________ ");
    for (idx=0; idx < nb_file; idx++) printf (" __________________ ______ ____ ");
    printf ("\n");

    printf("| %4s | %8s |","Blk","Offset");     
    for (idx=0; idx < nb_file; idx++) printf("| %16s | %4s | %2s |", "Time stamp", "lgth", "id");
    printf ("\n");  
    
    printf ("|______|__________|");
    for (idx=0; idx < nb_file; idx++) printf ("|__________________|______|____|");
    printf ("\n"); 
    
    if (block_number == -1) block_idx = 0;
    else                    block_idx = block_number;
    count = 1;
    
    empty = 0;
    while ( count ) {

      valid = 0;
      count = 0;
      
      nb_ts = 0;
      
      p = &LINE[0];
      p += sprintf(p,"| %4d | %8d ",block_idx+firstBlock,(block_idx+firstBlock)*bbytes);

      for (idx=0; idx < nb_file; idx++) {
             
       if (fd[idx] == -1) {
         p += sprintf(p,"%32s"," ");
	 continue;
       }
       
       if (idx >= forward)
          disk_block_size = rozofs_get_max_psize_in_msg(layout, bsize);
       else
          disk_block_size = rozofs_get_psizes_on_disk(layout,bsize,idx);          
       
       size = pread(fd[idx],loc_read_bins_p,disk_block_size,block_idx*disk_block_size);
       
       if (size !=  disk_block_size) {
           p += sprintf(p,"|__________________|______|____|");
	   close(fd[idx]);
	   fd[idx] = -1;        
       }
       else {
         count++;
	 rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t *)loc_read_bins_p;
	 prj_id = rozofs_bins_hdr_p->s.projection_id;
	 
	 if (prj_id >= forward) {
	   valid = 1;
	   p += sprintf(p,"|| xxxxxxxxxxxxxxxx | xxxx | %2d ",prj_id);	     
	 }
	 else {
           disk_block_size = (rozofs_get_psizes(layout,bsize,prj_id)*sizeof (bin_t));
           disk_block_size += sizeof (rozofs_stor_bins_hdr_t);
	   
	   rozofs_bins_foot_p = (rozofs_stor_bins_footer_t *) 
	            ((char*) rozofs_bins_hdr_p + disk_block_size);
           if (rozofs_bins_hdr_p->s.timestamp == 0) {
	     p += sprintf(p,"|| %16d | .... | %2d ",0,prj_id);
	   }		    
	   else if (rozofs_bins_foot_p->timestamp != rozofs_bins_hdr_p->s.timestamp) {
	     valid = 1;
	     p += sprintf(p,"|--%16.16llu----------%2d-", 
	                  (long long unsigned int)rozofs_bins_hdr_p->s.timestamp, 
			  prj_id);	     
	   }
	   else if (rozofs_bins_hdr_p->s.timestamp == 0) {
	     p += sprintf(p,"|| %16d | .... | %2d ",0,prj_id);
	   }
	   else {
	     valid = 1;
	     p += sprintf(p,"|| %16llu | %4d | %2d ",
        	    (unsigned long long)rozofs_bins_hdr_p->s.timestamp,    
        	    rozofs_bins_hdr_p->s.effective_length,    
        	    rozofs_bins_hdr_p->s.projection_id); 

             int tsidx;
	     for (tsidx=0; tsidx< nb_ts; tsidx++) {
	       if (ts[tsidx] == rozofs_bins_hdr_p->s.timestamp) {
	         ts_count[tsidx]++;
		 break;
	       }
	     }		    
	     if (tsidx == nb_ts) {
	       ts[tsidx] = rozofs_bins_hdr_p->s.timestamp;
	       ts_count[tsidx] = 1;
	       nb_ts++;
	     }  
           }
	 }  		  
       }

     }
     
     if (valid) {
       if (empty) {
         printf("... %d blocks...\n",empty);
	 empty = 0;
       }
       int best=-1,tsidx;
       for (tsidx=0; tsidx< nb_ts; tsidx++) {
	 if (ts_count[tsidx] > best) best = ts_count[tsidx];
       }       
       printf("%s%s\n",LINE, (best<forward)?"<<<<----":"|");
        
     }
     else {
       empty++;
     }
     block_idx++;
     if (block_number!=-1) break;
   }  	
   printf ("|______|__________|\n");

   if (block_number!=-1) {
      for (idx=0; idx < nb_file; idx++) {

       if (idx < forward) {	 
         disk_block_size = (rozofs_get_psizes(layout,bsize,idx)*sizeof (bin_t)) + sizeof (rozofs_stor_bins_hdr_t) + sizeof (rozofs_stor_bins_footer_t);
       }	 
       else {
         disk_block_size = (rozofs_get_max_psize(layout,bsize)*sizeof (bin_t)) + sizeof (rozofs_stor_bins_hdr_t) + sizeof (rozofs_stor_bins_footer_t);
       }  
       size = pread(fd[idx],loc_read_bins_p,disk_block_size,block_number*disk_block_size);
       if (size !=  disk_block_size) {
	   printf("Can not read block %d of %s\n", block_number, filename[idx]); 
	   continue;      
       }

       FILE * f;
       char fname[128];
       sprintf(fname,"block_%d_dist_%d.txt", block_number, idx);
       f = fopen(fname,"w");
       if (f == NULL) {
	 printf ("Can not create file %s",fname);
	 continue;
       }
       printf("- %s\n",fname);


       fprintf(f,"%s Block %d size %d\n", filename[idx], block_number, disk_block_size);

       rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t *)loc_read_bins_p;   	 
       fprintf(f,"Block header : TS %llu SZ %d PRJ %d CRC32 0x%x\n", 
	       (long long unsigned int)rozofs_bins_hdr_p->s.timestamp, 
	       rozofs_bins_hdr_p->s.effective_length,
	       rozofs_bins_hdr_p->s.projection_id,
	       rozofs_bins_hdr_p->s.filler);

       rozofs_bins_foot_p = (rozofs_stor_bins_footer_t *) (loc_read_bins_p + disk_block_size);
       rozofs_bins_foot_p--;
       fprintf(f,"Block footer : TS %llu %s\n", 
	      (long long unsigned int)rozofs_bins_foot_p->timestamp,
	      (rozofs_bins_hdr_p->s.timestamp==rozofs_bins_foot_p->timestamp)?"":" !!!!!!");	 

       hexdump(f,loc_read_bins_p, 0, disk_block_size);   
       fclose(f);		  
     }   
   }
     
   status = 0;
    			      
out:
    // This spare file used to exist but is not needed any more

    for (idx=0; idx < nb_file; idx++) {
      if (fd[idx] != -1) close(fd[idx]);
    }  	
    if (loc_read_bins_p != NULL) {
      //free(loc_read_bins_p);
      loc_read_bins_p = NULL;
    }
    return status;
}