Ejemplo n.º 1
0
/*
** That function is called when all the projection are ready to be sent

 @param working_ctx_p: pointer to the root context associated with the top level write request

*/
void rozofs_storcli_write_repair_req_processing(rozofs_storcli_ctx_t *working_ctx_p)
{

  storcli_read_arg_t *storcli_read_rq_p = (storcli_read_arg_t*)&working_ctx_p->storcli_read_arg;
  uint8_t layout = storcli_read_rq_p->layout;
  uint8_t   rozofs_forward;
  uint8_t   projection_id;
  int       error=0;
  int       ret;
  rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
  uint8_t  bsize  = storcli_read_rq_p->bsize;
  int prj_size_in_msg = rozofs_get_max_psize_in_msg(layout,bsize);
  sp_write_repair_arg_no_bins_t  *request; 
  sp_write_repair_arg_no_bins_t   repair_prj_args;
  sp_write_repair2_arg_no_bins_t *request2; 
  sp_write_repair2_arg_no_bins_t  repair2_prj_args;
      
  rozofs_forward = rozofs_get_rozofs_forward(layout);
  
  /*
  ** check if the buffer is still valid: we might face the situation where the rozofsmount
  ** time-out and re-allocate the write buffer located in shared memory for another
  ** transaction (either read or write:
  ** the control must take place only where here is the presence of a shared memory for the write
  */
  error  = 0;
  if (working_ctx_p->shared_mem_p!= NULL)
  {
      uint32_t *xid_p = (uint32_t*)working_ctx_p->shared_mem_p;
      if (*xid_p !=  working_ctx_p->src_transaction_id)
      {
        /*
        ** the source has aborted the request
        */
        error = EPROTO;
      }      
  } 
  /*
  ** send back the response of the read request towards rozofsmount
  */
  rozofs_storcli_read_reply_success(working_ctx_p);
   /*
   ** allocate a sequence number for the working context:
   **   This is mandatory to avoid any confusion with a late response of the previous read request
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
  /*
  ** check if it make sense to send the repaired blocks
  */
  if (error)
  {
    /*
    ** the requester has released the buffer and it could be possible that the
    ** rozofsmount uses it for another purpose, so the data that have been repaired
    ** might be wrong, so don't take the right to write wrong data for which we can can 
    ** a good crc !!
    */
    goto fail;
  }
  
  /*
  ** We have enough storage, so initiate the transaction towards the storage for each
  ** projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
     void  *xmit_buf;  
     int ret;  
	 
     /*
     ** skip the projections for which no error has been detected 
     */
     if (storcli_storage_supports_repair2) {
	   if (ROZOFS_BITMAP64_TEST_ALL0(working_ctx_p->prj_ctx[projection_id].crc_err_bitmap)) continue;
	 }
	 else {
	   if (working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0] == 0)  continue;
	 } 
	 
	 
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */       
       error = EIO;
       goto fail;     
     }
     /*
     ** fill partially the common header
     */
	 if (storcli_storage_supports_repair2) {
       request2   = &repair2_prj_args;
       request2->cid = storcli_read_rq_p->cid;
       request2->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       request2->layout        = storcli_read_rq_p->layout;
       request2->bsize         = storcli_read_rq_p->bsize;
       /*
       ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only
       */
       if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request2->spare = 1;
       else request2->spare = 0;
       memcpy(request2->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t));
       memcpy(request2->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t));
  //CRCrequest->proj_id = projection_id;
       request2->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request2->sid,rozofs_forward);
       request2->bid     = storcli_read_rq_p->bid;
       request2->bitmap[0]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0];     
       request2->bitmap[1]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[1];     
       request2->bitmap[2]  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[2];     
       int nb_blocks       = ROZOFS_BITMAP64_NB_BIT1(request2->bitmap);
       request2->nb_proj    = nb_blocks;     

       /*
       ** set the length of the bins part: need to compute the number of blocks
       */

       int bins_len = (prj_size_in_msg * nb_blocks);
       request2->len = bins_len; /**< bins length MUST be in bytes !!! */
       uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len);
       /*
       ** caution we might have a direct reply if there is a direct error at load balancing group while
       ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
       ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called
       ** prior returning from rozofs_sorcli_send_rq_common')
       ** anticipate the status of the xmit state of the projection and lock the section to
       ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
       ** --> need to take care because the write context is released after the reply error sent to rozofsmount
       */
       working_ctx_p->write_ctx_lock = 1;
       prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;

       ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR2,
                                           (xdrproc_t) xdr_sp_write_repair2_arg_no_bins_t, (caddr_t) request2,
                                        	xmit_buf,
                                        	working_ctx_p->read_seqnum,
                                        	(uint32_t) projection_id,
                                        	bins_len,
                                        	rozofs_storcli_write_repair_req_processing_cbk,
                                           (void*)working_ctx_p);
     }
	 else {
	 
       request   = &repair_prj_args;
       request->cid = storcli_read_rq_p->cid;
       request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       request->layout        = storcli_read_rq_p->layout;
       request->bsize         = storcli_read_rq_p->bsize;
       /*
       ** the case of spare 1 must not occur because repair is done for th eoptimal distribution only
       */
       if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
       else request->spare = 0;
       memcpy(request->dist_set, storcli_read_rq_p->dist_set, ROZOFS_SAFE_MAX_STORCLI*sizeof (uint8_t));
       memcpy(request->fid, storcli_read_rq_p->fid, sizeof (sp_uuid_t));
  //CRCrequest->proj_id = projection_id;
       request->proj_id = rozofs_storcli_get_mojette_proj_id(storcli_read_rq_p->dist_set,request->sid,rozofs_forward);
       request->bid     = storcli_read_rq_p->bid;
       request->bitmap  = working_ctx_p->prj_ctx[projection_id].crc_err_bitmap[0];     
       int nb_blocks       = ROZOFS_BITMAP64_NB_BIT1_FUNC((uint8_t*)&request->bitmap,8);
       request->nb_proj    = nb_blocks;     

       /*
       ** set the length of the bins part: need to compute the number of blocks
       */

       int bins_len = (prj_size_in_msg * nb_blocks);
       request->len = bins_len; /**< bins length MUST be in bytes !!! */
       uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
       STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,bins_len);
       /*
       ** caution we might have a direct reply if there is a direct error at load balancing group while
       ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
       ** As a consequence the response fct 'rozofs_storcli_write_repair_req_processing_cbk) can be called
       ** prior returning from rozofs_sorcli_send_rq_common')
       ** anticipate the status of the xmit state of the projection and lock the section to
       ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
       ** --> need to take care because the write context is released after the reply error sent to rozofsmount
       */
       working_ctx_p->write_ctx_lock = 1;
       prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;

       ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_WRITE_REPAIR,
                                           (xdrproc_t) xdr_sp_write_repair_arg_no_bins_t, (caddr_t) request,
                                        	xmit_buf,
                                        	working_ctx_p->read_seqnum,
                                        	(uint32_t) projection_id,
                                        	bins_len,
                                        	rozofs_storcli_write_repair_req_processing_cbk,
                                           (void*)working_ctx_p);	   
	 }										   

     working_ctx_p->write_ctx_lock = 0;
     if (ret < 0)
     {
        /*
	** there is no retry, just keep on with a potential other projection to repair
	*/
        STORCLI_ERR_PROF(repair_prj_err);
        STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0);
	prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_ERROR;
	continue;
     } 
     else
     {
       /*
       ** check if the state has not been changed: -> it might be possible to get a direct error
       */
       if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
       {
          /*
	  ** it looks like that we cannot repair that preojection, check if there is some other
	  */
          STORCLI_STOP_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),repair_prj,0);

       }      
     }
   }
   /*
   ** check if there some write repair request pending, in such a case we wait for the end of the repair
   ** (answer from the storage node
   */
    ret = rozofs_storcli_all_prj_write_repair_check(storcli_read_rq_p->layout,
                                                    working_ctx_p->prj_ctx);
    if (ret == 0)
    {
       /*
       ** there is some pending write
       */
       return;
    }   
  
fail:
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,repair,0);
     rozofs_storcli_release_context(working_ctx_p);  
  return;

}
Ejemplo n.º 2
0
/**
  Initial truncate request
    
  @param socket_ctx_p: pointer to the af unix socket
  @param socketId: reference of the socket (not used)
  @param rozofs_storcli_remote_rsp_cbk: callback for sending out the response
 
   @retval : TRUE-> xmit ready event expected
  @retval : FALSE-> xmit  ready event not expected
*/
void rozofs_storcli_truncate_req_init(uint32_t  socket_ctx_idx, void *recv_buf,rozofs_storcli_resp_pf_t rozofs_storcli_remote_rsp_cbk)
{
   rozofs_rpc_call_hdr_with_sz_t    *com_hdr_p;
   rozofs_storcli_ctx_t *working_ctx_p = NULL;
   int i;
   uint32_t  msg_len;  /* length of the rpc messsage including the header length */
   storcli_truncate_arg_t *storcli_truncate_rq_p = NULL;
   rozofs_rpc_call_hdr_t   hdr;   /* structure that contains the rpc header in host format */
   int      len;       /* effective length of application message               */
   uint8_t  *pmsg;     /* pointer to the first available byte in the application message */
   uint32_t header_len;
   XDR xdrs;
   int errcode = EINVAL;
   /*
   ** allocate a context for the duration of the write
   */
   working_ctx_p = rozofs_storcli_alloc_context();
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     severe("out of working read/write saved context");
     goto failure;
   }
   storcli_truncate_rq_p = &working_ctx_p->storcli_truncate_arg;
   STORCLI_START_NORTH_PROF(working_ctx_p,truncate,0);

   
   /*
   ** Get the full length of the message and adjust it the the length of the applicative part (RPC header+application msg)
   */
   msg_len = ruc_buf_getPayloadLen(recv_buf);
   msg_len -=sizeof(uint32_t);

   /*
   ** save the reference of the received socket since it will be needed for sending back the
   ** response
   */
   working_ctx_p->socketRef    = socket_ctx_idx;
   working_ctx_p->user_param   = NULL;
   working_ctx_p->recv_buf     = recv_buf;
   working_ctx_p->response_cbk = rozofs_storcli_remote_rsp_cbk;
   /*
   ** Get the payload of the receive buffer and set the pointer to the array that describes the write request
   */
   com_hdr_p  = (rozofs_rpc_call_hdr_with_sz_t*) ruc_buf_getPayload(recv_buf);   
   memcpy(&hdr,&com_hdr_p->hdr,sizeof(rozofs_rpc_call_hdr_t));
   /*
   ** swap the rpc header
   */
   scv_call_hdr_ntoh(&hdr);
   pmsg = rozofs_storcli_set_ptr_on_nfs_call_msg((char*)&com_hdr_p->hdr,&header_len);
   if (pmsg == NULL)
   {
     errcode = EFAULT;
     goto failure;
   }
   /*
   ** map the memory on the first applicative RPC byte available and prepare to decode:
   ** notice that we will not call XDR_FREE since the application MUST
   ** provide a pointer for storing the file handle
   */
   len = msg_len - header_len;    
   xdrmem_create(&xdrs,(char*)pmsg,len,XDR_DECODE); 
   /*
   ** store the source transaction id needed for the reply
   */
   working_ctx_p->src_transaction_id =  hdr.hdr.xid;
   /*
   ** decode the RPC message of the truncate request
   */
   if (xdr_storcli_truncate_arg_t(&xdrs,storcli_truncate_rq_p) == FALSE)
   {
      /*
      ** decoding error
      */
      errcode = EFAULT;
      severe("rpc trucnate request decoding error");
      goto failure;
      
   }   
   /*
   ** init of the load balancing group/ projection association table:
   ** That table is ordered: the first corresponds to the storage associated with projection 0, second with 1, etc..
   ** When build that table, we MUST consider the value of the base which is associated with the distribution
   */

   
   uint8_t   rozofs_safe = rozofs_get_rozofs_safe(storcli_truncate_rq_p->layout);
   int lbg_in_distribution = 0;
   for (i = 0; i  <rozofs_safe ; i ++)
   {
    /*
    ** Get the load balancing group associated with the sid
    */
    int lbg_id = rozofs_storcli_get_lbg_for_sid(storcli_truncate_rq_p->cid,storcli_truncate_rq_p->dist_set[i]);
    if (lbg_id < 0)
    {
      /*
      ** there is no associated between the sid and the lbg. It is typically the case
      ** when a new cluster has been added to the configuration and the client does not
      ** know yet the configuration change
      */
      severe("sid is unknown !! %d\n",storcli_truncate_rq_p->dist_set[i]);
      continue;    
    }
     rozofs_storcli_lbg_prj_insert_lbg_and_sid(working_ctx_p->lbg_assoc_tb,lbg_in_distribution,
                                                lbg_id,
                                                storcli_truncate_rq_p->dist_set[i]);  

     rozofs_storcli_lbg_prj_insert_lbg_state(working_ctx_p->lbg_assoc_tb,
                                             lbg_in_distribution,
                                             NORTH_LBG_GET_STATE(working_ctx_p->lbg_assoc_tb[lbg_in_distribution].lbg_id));    
     lbg_in_distribution++;
     if (lbg_in_distribution == rozofs_safe) break;

   }
   /*
   ** allocate a small buffer that will be used for sending the response to the truncate request
   */
   working_ctx_p->xmitBuf = ruc_buf_getBuffer(ROZOFS_STORCLI_NORTH_SMALL_POOL);
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     errcode = ENOMEM;
     severe("out of small buffer");
     goto failure;
   }
   /*
   ** allocate a sequence number for the working context (same aas for read)
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
   /*
   ** set now the working variable specific for handling the truncate
   ** we re-use the structure used for writing even if nothing is written
   */
   uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_truncate_rq_p->layout);
   for (i = 0; i < forward_projection; i++)
   {
     working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_READ_IDLE;
     working_ctx_p->prj_ctx[i].prj_buf   = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
     if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
     {
       /*
       ** that situation MUST not occur since there the same number of receive buffer and working context!!
       */
       errcode = ENOMEM;
       severe("out of large buffer");
       goto failure;
     }
     /*
     ** increment inuse counter on each buffer since we might need to re-use that packet in case
     ** of retransmission
     */
     working_ctx_p->prj_ctx[i].inuse_valid = 1;
     ruc_buf_inuse_increment(working_ctx_p->prj_ctx[i].prj_buf);
     /*
     ** set the pointer to the bins
     */
     int position = rozofs_storcli_get_position_of_first_byte2write_in_truncate();
     uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); 

     working_ctx_p->prj_ctx[i].bins       = (bin_t*)(pbuf+position); 
   }
   		
   /*
   ** Prepare for request serialization
   */
   memcpy(working_ctx_p->fid_key, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
   working_ctx_p->opcode_key = STORCLI_TRUNCATE;
   {
       /**
        * lock all the file for a truncate
        */
       uint64_t nb_blocks = 0;
       nb_blocks--;
       int ret;
       ret = stc_rng_insert((void*)working_ctx_p,
               STORCLI_READ,working_ctx_p->fid_key,
               0,nb_blocks,
               &working_ctx_p->sched_idx);
       if (ret == 0)
       {
           /*
            ** there is a current request that is processed with the same fid and there is a collision
            */
           return;
       }
       /*
        ** no request pending with that fid, so we can process it right away
        */
       return rozofs_storcli_truncate_req_processing(working_ctx_p);
   }

    /*
    **_____________________________________________
    **  Exception cases
    **_____________________________________________
    */      
       

    /*
    ** there was a failure while attempting to allocate a memory ressource.
    */
failure:
     /*
     ** send back the response with the appropriated error code. 
     ** note: The received buffer (rev_buf)  is
     ** intended to be released by this service in case of error or the TCP transmitter
     ** once it has been passed to the TCP stack.
     */
     rozofs_storcli_reply_error_with_recv_buf(socket_ctx_idx,recv_buf,NULL,rozofs_storcli_remote_rsp_cbk,errcode);
     /*
     ** check if the root context was allocated. Free it if is exist
     */
     if (working_ctx_p != NULL) 
     {
        /*
        ** remove the reference to the recvbuf to avoid releasing it twice
        */
       STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
       working_ctx_p->recv_buf   = NULL;
       rozofs_storcli_release_context(working_ctx_p);
     }
     return;
}