/*
** That function is called when all the projection are ready to be sent

 @param working_ctx_p: pointer to the root context associated with the top level write request

*/
void rozofs_storcli_write_repair_req_processing(rozofs_storcli_ctx_t *working_ctx_p)
{

  storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg;
  uint8_t layout = storcli_read_rq_p->layout;
  uint8_t   rozofs_forward;
  uint8_t   projection_id;
  int       error=0;
  int       ret;
  rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
  uint8_t  bsize  = storcli_read_rq_p->bsize;
  int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize);
  sp_write_repair_arg_no_bins_t  *request; 
  sp_write_repair_arg_no_bins_t   repair_prj_args;
  sp_write_repair2_arg_no_bins_t *request2; 
  sp_write_repair2_arg_no_bins_t  repair2_prj_args;
      
  rozofs_forward = rozofs_get_rozofs_forward(layout);
  
  /*
  ** check if the buffer is still valid: we might face the situation where the rozofsmount
  ** time-out and re-allocate the write buffer located in shared memory for another
  ** transaction (either read or write:
  ** the control must take place only where here is the presence of a shared memory for the write
  */
  error  = 0;
  if (working_ctx_p->shared_mem_p!= NULL)
  {
      uint32_t *xid_p = (uint32_t*)working_ctx_p->shared_mem_p;
      if (*xid_p !=  working_ctx_p->src_transaction_id)
      {
        /*
        ** the source has aborted the request
        */
        error = EPROTO;
      }      
  } 
  /*
  ** send back the response of the read request towards rozofsmount
  */
  rozofs_storcli_read_reply_success(working_ctx_p);
   /*
   ** allocate a sequence number for the working context:
   **   This is mandatory to avoid any confusion with a late response of the previous read request
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
  /*
  ** check if it make sense to send the repaired blocks
  */
  if (error)
  {
    /*
    ** the requester has released the buffer and it could be possible that the
    ** rozofsmount uses it for another purpose, so the data that have been repaired
    ** might be wrong, so don't take the right to write wrong data for which we can can 
    ** a good crc !!
    */
    goto fail;
  }
  
  /*
  ** We have enough storage, so initiate the transaction towards the storage for each
  ** projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
     void  *xmit_buf;  
     int ret;  
	 
     /*
     ** skip the projections for which no error has been detected 
     */
     if (storcli_storage_supports_repair2) {
	   if (ROZOFS_BITMAP64_TEST_ALL0(working_ctx_p->prj_ctx[projection_id].crc_err_bitmap)) continue;
	 }
	 else {
	   if (working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0] == 0)  continue;
	 } 
	 
	 
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */       
       error = EIO;
       goto fail;     
     }
     /*
     ** fill partially the common header
     */
	 if (storcli_storage_supports_repair2) {
       request2   = &repair2_prj_args;
       request2->cid = storcli_read_rq_p->cid;
       request2->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       request2->layout        = storcli_read_rq_p->layout;
       request2->bsize         = storcli_read_rq_p->bsize;
       /*
       ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only
       */
       if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request2->spare = 1;
       else request2->spare = 0;
       memcpy(request2->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t));
       memcpy(request2->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t));
  //CRCrequest->proj_id = projection_id;
       request2->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request2->sid,rozofs_forward);
       request2->bid     = storcli_read_rq_p->bid;
       request2->bitmap[0]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0];     
       request2->bitmap[1]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[1];     
       request2->bitmap[2]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[2];     
       int nb_blocks       = ROZOFS_BITMAP64_NB_BIT1(request2->bitmap);
       request2->nb_proj    = nb_blocks;     

       /*
       ** set the length of the bins part: need to compute the number of blocks
       */

       int bins_len = (prj_size_in_msg * nb_blocks);
       request2->len = bins_len; /**< bins length MUST be in bytes !!! */
       uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len);
       /*
       ** caution we might have a direct reply if there is a direct error at load balancing group while
       ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
       ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called
       ** prior returning from rozofs_sorcli_send_rq_common')
       ** anticipate the status of the xmit state of the projection and lock the section to
       ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
       ** --> need to take care because the write context is released after the reply error sent to rozofsmount
       */
       working_ctx_p->write_ctx_lock = 1;
       prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;

       ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR2,
                                           (xdrproc_t) xdr_sp_write_repair2_arg_no_bins_t, (caddr_t) request2,
                                        	xmit_buf,
                                        	working_ctx_p->read_seqnum,
                                        	(uint32_t) projection_id,
                                        	bins_len,
                                        	rozofs_storcli_write_repair_req_processing_cbk,
                                           (void*)working_ctx_p);
     }
	 else {
	 
       request   = &repair_prj_args;
       request->cid = storcli_read_rq_p->cid;
       request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       request->layout        = storcli_read_rq_p->layout;
       request->bsize         = storcli_read_rq_p->bsize;
       /*
       ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only
       */
       if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
       else request->spare = 0;
       memcpy(request->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t));
       memcpy(request->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t));
  //CRCrequest->proj_id = projection_id;
       request->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request->sid,rozofs_forward);
       request->bid     = storcli_read_rq_p->bid;
       request->bitmap  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0];     
       int nb_blocks       = ROZOFS_BITMAP64_NB_BIT1_FUNC((uint8_t*)&request->bitmap,8);
       request->nb_proj    = nb_blocks;     

       /*
       ** set the length of the bins part: need to compute the number of blocks
       */

       int bins_len = (prj_size_in_msg * nb_blocks);
       request->len = bins_len; /**< bins length MUST be in bytes !!! */
       uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len);
       /*
       ** caution we might have a direct reply if there is a direct error at load balancing group while
       ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
       ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called
       ** prior returning from rozofs_sorcli_send_rq_common')
       ** anticipate the status of the xmit state of the projection and lock the section to
       ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
       ** --> need to take care because the write context is released after the reply error sent to rozofsmount
       */
       working_ctx_p->write_ctx_lock = 1;
       prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;

       ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR,
                                           (xdrproc_t) xdr_sp_write_repair_arg_no_bins_t, (caddr_t) request,
                                        	xmit_buf,
                                        	working_ctx_p->read_seqnum,
                                        	(uint32_t) projection_id,
                                        	bins_len,
                                        	rozofs_storcli_write_repair_req_processing_cbk,
                                           (void*)working_ctx_p);	   
	 }										   

     working_ctx_p->write_ctx_lock = 0;
     if (ret < 0)
     {
        /*
	** there is no retry, just keep on with a potential other projection to repair
	*/
        STORCLI_ERR_PROF(repair_prj_err);
        STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0);
	prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_ERROR;
	continue;
     } 
     else
     {
       /*
       ** check if the state has not been changed: -> it might be possible to get a direct error
       */
       if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
       {
          /*
	  ** it looks like that we cannot repair that preojection, check if there is some other
	  */
          STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0);

       }      
     }
   }
   /*
   ** check if there some write repair request pending, in such a case we wait for the end of the repair
   ** (answer from the storage node
   */
    ret = rozofs_storcli_all_prj_write_repair_check(storcli_read_rq_p->layout,
                                                    working_ctx_p->prj_ctx);
    if (ret == 0)
    {
       /*
       ** there is some pending write
       */
       return;
    }   
  
fail:
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,repair,0);
     rozofs_storcli_release_context(working_ctx_p);  
  return;

}
/**
  Initial write repair request


  Here it is assumed that storclo is working with the context that has been allocated 
  @param  working_ctx_p: pointer to the working context of a read transaction
 
   @retval : TRUE-> xmit ready event expected
  @retval : FALSE-> xmit  ready event not expected
*/
void rozofs_storcli_repair_req_init(rozofs_storcli_ctx_t *working_ctx_p)
{
   int i;
   storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg;

   STORCLI_START_NORTH_PROF(working_ctx_p,repair,0);

   /*
   ** set the pointer to to first available data (decoded data)
   */
   working_ctx_p->data_write_p  = working_ctx_p->data_read_p; 
   /*
   ** set now the working variable specific for handling the write
   ** We need one large buffer per projection that will be written on storage
   ** we keep the buffer that have been allocated for the read.
   */
   uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_read_rq_p->layout);
   for (i = 0; i < forward_projection; i++)
   {
     working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_WR_IDLE;
     if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
     {
       working_ctx_p->prj_ctx[i].prj_buf   = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
       if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
       {
	 /*
	 ** that situation MUST not occur since there the same number of receive buffer and working context!!
	 */
	 severe("out of large buffer");
	 goto failure;
       }
     }
     /*
     ** set the pointer to the bins
     */
     int position;
	 // For compatibility between new clients and old storages
	 if (storcli_storage_supports_repair2) {
 	   position = rozofs_storcli_repair2_get_position_of_first_byte2write();
	 }
	 else {
 	   position = rozofs_storcli_repair_get_position_of_first_byte2write();	   
	 }	
     uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); 

     working_ctx_p->prj_ctx[i].bins       = (bin_t*)(pbuf+position);   
   }	
   /*
   **  now regenerate the projections that were in error
   */
   rozofs_storcli_transform_forward_repair(working_ctx_p,
                                           storcli_read_rq_p->layout,
                                           storcli_read_rq_p->nb_proj,
                                           (char *)working_ctx_p->data_write_p);    			
   /*
   ** starts the sending of the repaired projections
   */
   rozofs_storcli_write_repair_req_processing(working_ctx_p);
   return;


failure:
  /*
  ** send back the response of the read request towards rozofsmount
  */
  rozofs_storcli_read_reply_success(working_ctx_p);
  /*
  ** release the root transaction context
  */
  STORCLI_STOP_NORTH_PROF(working_ctx_p,repair,0);
  rozofs_storcli_release_context(working_ctx_p);  
}
示例#3
0
/*
** That function is called when all the projection are ready to be sent

 @param working_ctx_p: pointer to the root context associated with the top level write request
 @param data         : pointer to the data of the last block to truncate

*/
void rozofs_storcli_truncate_req_processing_exec(rozofs_storcli_ctx_t *working_ctx_p, char * data)
{

  storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg;
  uint8_t layout = storcli_truncate_rq_p->layout;
  uint8_t   rozofs_forward;
  uint8_t   rozofs_safe;
  uint8_t   projection_id;
  int       storage_idx;
  int       error=0;
  rozofs_storcli_lbg_prj_assoc_t  *lbg_assoc_p = working_ctx_p->lbg_assoc_tb;
  rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
  
  rozofs_forward = rozofs_get_rozofs_forward(layout);
  rozofs_safe    = rozofs_get_rozofs_safe(layout);
  

  /*
  ** set the current state of each load balancing group belonging to the rozofs_safe group
  */
  for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) 
  {
    /*
    ** Check the state of the load Balancing group
    */
    rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p,
                                            storage_idx,
                                            NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id));      
  }
  /*
  ** Now find out a selectable lbg_id for each projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
    if (rozofs_storcli_select_storage_idx_for_write ( working_ctx_p,rozofs_forward, rozofs_safe,projection_id) < 0)
    {
       /*
       ** there is no enough valid storage !!
       */
       error = EIO;
       goto fail;
    }
  }  
  
  
  /*
  ** Let's transform the data to write
  */
  working_ctx_p->truncate_bins_len = 0;
  if (data != NULL) {
    STORCLI_START_KPI(storcli_kpi_transform_forward);

    rozofs_storcli_transform_forward(working_ctx_p->prj_ctx,  
                                     layout,
                                     0, 
                                     1, 
                                     working_ctx_p->timestamp,
                                     storcli_truncate_rq_p->last_seg,
                                     data);  
    STORCLI_STOP_KPI(storcli_kpi_transform_forward,0);
    working_ctx_p->truncate_bins_len = rozofs_get_max_psize(layout)*sizeof(bin_t) + sizeof(rozofs_stor_bins_hdr_t);
  } 
  
  /*
  ** We have enough storage, so initiate the transaction towards the storage for each
  ** projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
     sp_truncate_arg_no_bins_t *request; 
     sp_truncate_arg_no_bins_t  truncate_prj_args;
     void  *xmit_buf;  
     int ret;  
      
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */       
       error = EIO;
       goto fatal;     
     }
     /*
     ** fill partially the common header
     */
retry:
     request   = &truncate_prj_args;
     request->cid = storcli_truncate_rq_p->cid;
     request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     request->layout        = layout;
     if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
     else request->spare = 0;
     memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t));
     memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
     request->proj_id        = projection_id;
     request->bid            = storcli_truncate_rq_p->bid;
     request->last_seg       = storcli_truncate_rq_p->last_seg;
     request->last_timestamp = working_ctx_p->timestamp;

     request->len = working_ctx_p->truncate_bins_len;

     uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
     /*
     ** caution we might have a direct reply if there is a direct error at load balancing group while
     ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
     ** As a consequence the response fct 'rozofs_storcli_truncate_req_processing_cbk) can be called
     ** prior returning from rozofs_sorcli_send_rq_common')
     ** anticipate the status of the xmit state of the projection and lock the section to
     ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
     ** --> need to take care because the write context is released after the reply error sent to rozofsmount
     */
     working_ctx_p->write_ctx_lock = 1;
     prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;
     
     ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE,
                                         (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request,
                                          xmit_buf,
                                          working_ctx_p->read_seqnum,
                                          (uint32_t) projection_id,
                                          working_ctx_p->truncate_bins_len,
                                          rozofs_storcli_truncate_req_processing_cbk,
                                         (void*)working_ctx_p);
     working_ctx_p->write_ctx_lock = 0;
     if (ret < 0)
     {
       /*
       ** the communication with the storage seems to be wrong (more than TCP connection temporary down
       ** attempt to select a new storage
       **
       */
       if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
       {
         /*
         ** Out of storage !!-> too many storages are down
         */
         goto fatal;
       } 
       /*
       ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!!
       */
//#warning: it is assumed that xmit buffer has not been release, need to double check!!        
       goto retry;
     } 
     else
     {
       /*
       ** check if the state has not been changed: -> it might be possible to get a direct error
       */
       if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
       {
          error = prj_cxt_p[projection_id].errcode;
          goto fatal;       
       }
     }

   }

  return;
  
fail:
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  
     return;

fatal:
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  

  return;

}
示例#4
0
void rozofs_storcli_truncate_projection_retry(rozofs_storcli_ctx_t *working_ctx_p,uint8_t projection_id,int same_storage_retry_acceptable)
{
    uint8_t   rozofs_safe;
    uint8_t   rozofs_forward;
    uint8_t   layout;
    storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg;
    int error=0;
    int storage_idx;

    rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
    rozofs_storcli_lbg_prj_assoc_t  *lbg_assoc_p = working_ctx_p->lbg_assoc_tb;

    layout         = storcli_truncate_rq_p->layout;
    rozofs_safe    = rozofs_get_rozofs_safe(layout);
    rozofs_forward = rozofs_get_rozofs_forward(layout);
    /*
    ** Now update the state of each load balancing group since it might be possible
    ** that some experience a state change
    */
    for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) 
    {
      /*
      ** Check the state of the load Balancing group
      */
      rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p,
                                              storage_idx,
                                              NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id));      
    }    
    /**
    * attempt to select a new storage
    */
    if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
    {
      /*
      ** Cannot select a new storage: OK so now double check if the retry on the same storage is
      ** acceptable.When it is the case, check if the max retry has not been yet reached
      ** Otherwise, we are in deep shit-> reject the read request
      */
      if (same_storage_retry_acceptable == 0) 
      {
        error = EIO;
        prj_cxt_p[projection_id].errcode = error;
        goto reject;      
      }
      if (++prj_cxt_p[projection_id].retry_cpt >= ROZOFS_STORCLI_MAX_RETRY)
      {
        error = EIO;
        prj_cxt_p[projection_id].errcode = error;
        goto reject;          
      }
    } 
    /*
    ** we are lucky since either a get a new storage or the retry counter is not exhausted
    */
     sp_truncate_arg_no_bins_t *request; 
     sp_truncate_arg_no_bins_t  truncate_prj_args;
     void  *xmit_buf;  
     int ret;  
      
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */
       error = EFAULT;
       prj_cxt_p[projection_id].errcode = error;
       goto fatal;     
     }
     /*
     ** fill partially the common header
     */
retry:
     request   = &truncate_prj_args;
     request->cid = storcli_truncate_rq_p->cid;
     request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     request->layout        = layout;
     if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
     else request->spare = 0;
     memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t));
     memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
     request->proj_id        = projection_id;
     request->bid            = storcli_truncate_rq_p->bid;
     request->last_seg       = storcli_truncate_rq_p->last_seg;
     request->last_timestamp = working_ctx_p->timestamp;


     /*
     ** Bins len has been saved in the working context
     */
     request->len = working_ctx_p->truncate_bins_len;

     uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     /*
     **  increment the lock since it might be possible that this procedure is called after a synchronous transaction failu failure
     ** while the system is still in the initial procedure that triggers the writing of the projection. So it might be possible that
     ** the lock is already asserted
     ** as for the initial case, we need to anticipate the xmit state of the projection since the ERROR status might be set 
     ** on a synchronous transaction failure. If that state is set after a positive submission towards the lbg, we might
     ** overwrite the ERROR state with the IN_PRG state.
     */
     working_ctx_p->write_ctx_lock++;
     prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;
     
     STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
     ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE,
                                         (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request,
                                          xmit_buf,
                                          working_ctx_p->read_seqnum,
                                          (uint32_t) projection_id,
                                          working_ctx_p->truncate_bins_len,
                                          rozofs_storcli_truncate_req_processing_cbk,
                                         (void*)working_ctx_p);
     working_ctx_p->write_ctx_lock--;
     if (ret < 0)
     {
       /*
       ** the communication with the storage seems to be wrong (more than TCP connection temporary down
       ** attempt to select a new storage
       **
       */
       STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
       if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
       {
         /*
         ** Out of storage !!-> too many storages are down
         */
         goto fatal;
       } 
       /*
       ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!!
       */
       goto retry;
     }
     /*
     ** OK, the buffer has been accepted by the load balancing group, check if there was a direct failure for
     ** that transaction
     */
     if ( prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
     {
        error = prj_cxt_p[projection_id].errcode;
        goto fatal;     
     }    
    return;
    /*
    **_____________________________________________
    **  Exception cases
    **_____________________________________________
    */      
    
reject:  
     if (working_ctx_p->write_ctx_lock != 0) return;
     /*
     ** we fall in that case when we run out of  storage
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
    STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
    rozofs_storcli_release_context(working_ctx_p);  
     return; 
      
fatal:
     /*
     ** caution -> reply error is only generated if the ctx_lock is 0
     */
     if (working_ctx_p->write_ctx_lock != 0) return;
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  
     return; 

}
示例#5
0
/**
  Initial truncate request
    
  @param socket_ctx_p: pointer to the af unix socket
  @param socketId: reference of the socket (not used)
  @param rozofs_storcli_remote_rsp_cbk: callback for sending out the response
 
   @retval : TRUE-> xmit ready event expected
  @retval : FALSE-> xmit  ready event not expected
*/
void rozofs_storcli_truncate_req_init(uint32_t  socket_ctx_idx, void *recv_buf,rozofs_storcli_resp_pf_t rozofs_storcli_remote_rsp_cbk)
{
   rozofs_rpc_call_hdr_with_sz_t    *com_hdr_p;
   rozofs_storcli_ctx_t *working_ctx_p = NULL;
   int i;
   uint32_t  msg_len;  /* length of the rpc messsage including the header length */
   storcli_truncate_arg_t *storcli_truncate_rq_p = NULL;
   rozofs_rpc_call_hdr_t   hdr;   /* structure that contains the rpc header in host format */
   int      len;       /* effective length of application message               */
   uint8_t  *pmsg;     /* pointer to the first available byte in the application message */
   uint32_t header_len;
   XDR xdrs;
   int errcode = EINVAL;
   /*
   ** allocate a context for the duration of the write
   */
   working_ctx_p = rozofs_storcli_alloc_context();
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     severe("out of working read/write saved context");
     goto failure;
   }
   storcli_truncate_rq_p = &working_ctx_p->storcli_truncate_arg;
   STORCLI_START_NORTH_PROF(working_ctx_p,truncate,0);

   
   /*
   ** Get the full length of the message and adjust it the the length of the applicative part (RPC header+application msg)
   */
   msg_len = ruc_buf_getPayloadLen(recv_buf);
   msg_len -=sizeof(uint32_t);

   /*
   ** save the reference of the received socket since it will be needed for sending back the
   ** response
   */
   working_ctx_p->socketRef    = socket_ctx_idx;
   working_ctx_p->user_param   = NULL;
   working_ctx_p->recv_buf     = recv_buf;
   working_ctx_p->response_cbk = rozofs_storcli_remote_rsp_cbk;
   /*
   ** Get the payload of the receive buffer and set the pointer to the array that describes the write request
   */
   com_hdr_p  = (rozofs_rpc_call_hdr_with_sz_t*) ruc_buf_getPayload(recv_buf);   
   memcpy(&hdr,&com_hdr_p->hdr,sizeof(rozofs_rpc_call_hdr_t));
   /*
   ** swap the rpc header
   */
   scv_call_hdr_ntoh(&hdr);
   pmsg = rozofs_storcli_set_ptr_on_nfs_call_msg((char*)&com_hdr_p->hdr,&header_len);
   if (pmsg == NULL)
   {
     errcode = EFAULT;
     goto failure;
   }
   /*
   ** map the memory on the first applicative RPC byte available and prepare to decode:
   ** notice that we will not call XDR_FREE since the application MUST
   ** provide a pointer for storing the file handle
   */
   len = msg_len - header_len;    
   xdrmem_create(&xdrs,(char*)pmsg,len,XDR_DECODE); 
   /*
   ** store the source transaction id needed for the reply
   */
   working_ctx_p->src_transaction_id =  hdr.hdr.xid;
   /*
   ** decode the RPC message of the truncate request
   */
   if (xdr_storcli_truncate_arg_t(&xdrs,storcli_truncate_rq_p) == FALSE)
   {
      /*
      ** decoding error
      */
      errcode = EFAULT;
      severe("rpc trucnate request decoding error");
      goto failure;
      
   }   
   /*
   ** init of the load balancing group/ projection association table:
   ** That table is ordered: the first corresponds to the storage associated with projection 0, second with 1, etc..
   ** When build that table, we MUST consider the value of the base which is associated with the distribution
   */

   
   uint8_t   rozofs_safe = rozofs_get_rozofs_safe(storcli_truncate_rq_p->layout);
   int lbg_in_distribution = 0;
   for (i = 0; i  <rozofs_safe ; i ++)
   {
    /*
    ** Get the load balancing group associated with the sid
    */
    int lbg_id = rozofs_storcli_get_lbg_for_sid(storcli_truncate_rq_p->cid,storcli_truncate_rq_p->dist_set[i]);
    if (lbg_id < 0)
    {
      /*
      ** there is no associated between the sid and the lbg. It is typically the case
      ** when a new cluster has been added to the configuration and the client does not
      ** know yet the configuration change
      */
      severe("sid is unknown !! %d\n",storcli_truncate_rq_p->dist_set[i]);
      continue;    
    }
     rozofs_storcli_lbg_prj_insert_lbg_and_sid(working_ctx_p->lbg_assoc_tb,lbg_in_distribution,
                                                lbg_id,
                                                storcli_truncate_rq_p->dist_set[i]);  

     rozofs_storcli_lbg_prj_insert_lbg_state(working_ctx_p->lbg_assoc_tb,
                                             lbg_in_distribution,
                                             NORTH_LBG_GET_STATE(working_ctx_p->lbg_assoc_tb[lbg_in_distribution].lbg_id));    
     lbg_in_distribution++;
     if (lbg_in_distribution == rozofs_safe) break;

   }
   /*
   ** allocate a small buffer that will be used for sending the response to the truncate request
   */
   working_ctx_p->xmitBuf = ruc_buf_getBuffer(ROZOFS_STORCLI_NORTH_SMALL_POOL);
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     errcode = ENOMEM;
     severe("out of small buffer");
     goto failure;
   }
   /*
   ** allocate a sequence number for the working context (same aas for read)
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
   /*
   ** set now the working variable specific for handling the truncate
   ** we re-use the structure used for writing even if nothing is written
   */
   uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_truncate_rq_p->layout);
   for (i = 0; i < forward_projection; i++)
   {
     working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_READ_IDLE;
     working_ctx_p->prj_ctx[i].prj_buf   = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
     if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
     {
       /*
       ** that situation MUST not occur since there the same number of receive buffer and working context!!
       */
       errcode = ENOMEM;
       severe("out of large buffer");
       goto failure;
     }
     /*
     ** increment inuse counter on each buffer since we might need to re-use that packet in case
     ** of retransmission
     */
     working_ctx_p->prj_ctx[i].inuse_valid = 1;
     ruc_buf_inuse_increment(working_ctx_p->prj_ctx[i].prj_buf);
     /*
     ** set the pointer to the bins
     */
     int position = rozofs_storcli_get_position_of_first_byte2write_in_truncate();
     uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); 

     working_ctx_p->prj_ctx[i].bins       = (bin_t*)(pbuf+position); 
   }
   		
   /*
   ** Prepare for request serialization
   */
   memcpy(working_ctx_p->fid_key, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
   working_ctx_p->opcode_key = STORCLI_TRUNCATE;
   {
       /**
        * lock all the file for a truncate
        */
       uint64_t nb_blocks = 0;
       nb_blocks--;
       int ret;
       ret = stc_rng_insert((void*)working_ctx_p,
               STORCLI_READ,working_ctx_p->fid_key,
               0,nb_blocks,
               &working_ctx_p->sched_idx);
       if (ret == 0)
       {
           /*
            ** there is a current request that is processed with the same fid and there is a collision
            */
           return;
       }
       /*
        ** no request pending with that fid, so we can process it right away
        */
       return rozofs_storcli_truncate_req_processing(working_ctx_p);
   }

    /*
    **_____________________________________________
    **  Exception cases
    **_____________________________________________
    */      
       

    /*
    ** there was a failure while attempting to allocate a memory ressource.
    */
failure:
     /*
     ** send back the response with the appropriated error code. 
     ** note: The received buffer (rev_buf)  is
     ** intended to be released by this service in case of error or the TCP transmitter
     ** once it has been passed to the TCP stack.
     */
     rozofs_storcli_reply_error_with_recv_buf(socket_ctx_idx,recv_buf,NULL,rozofs_storcli_remote_rsp_cbk,errcode);
     /*
     ** check if the root context was allocated. Free it if is exist
     */
     if (working_ctx_p != NULL) 
     {
        /*
        ** remove the reference to the recvbuf to avoid releasing it twice
        */
       STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
       working_ctx_p->recv_buf   = NULL;
       rozofs_storcli_release_context(working_ctx_p);
     }
     return;
}