/*
**__________________________________________________________________________
*/
void af_unix_send_stream_fsm(af_unix_ctx_generic_t *socket_p,com_xmit_template_t *xmit_p)
{
  char *pbuf;
  int write_len;
  int ret;
  int inuse;

  while(1)
  {

    switch (xmit_p->state)
    {
      case XMIT_READY:
      /*
      ** the transmitter is ready to send however we need to double if there is a
      ** current buffer to send (because we just exit from congestion or if there
      ** some buffer in the xmit pending queue
      */
      /*
      ** Check if there is a current buffer to send
      */
      if (xmit_p->bufRefCurrent != NULL)
      {
         write_len  = (int)ruc_buf_getPayloadLen(xmit_p->bufRefCurrent);
         /*
         ** Get the reference of the destination socket (name) from the ruc_buffer)
         */
         xmit_p->nbWrite  = 0;
         xmit_p->nb2Write = write_len;
         xmit_p->state = XMIT_IN_PRG;
      }
      else
      {
        /*
        ** nothing to send !!
        */
        return;
      }
      break;

      case XMIT_IN_PRG:

        /*
        ** Check if there is a current buffer to send
        */
        socket_p->stats.totalXmitAttempts++;
        pbuf = (char *)ruc_buf_getPayload(xmit_p->bufRefCurrent);

        ret  = af_unix_send_stream_generic(socket_p->socketRef,pbuf+xmit_p->nbWrite,xmit_p->nb2Write - xmit_p->nbWrite, &write_len);

        switch (ret)
        {
          case RUC_OK:
          /*
          ** release the buffer that has been sent
          */
          xmit_p->xmit_credit++;
          inuse = ruc_buf_inuse_decrement(xmit_p->bufRefCurrent);
          if (inuse < 0)
          {
            /*
	    ** inuse MUST never be negative so EXIT !!!!!
	    */
            fatal("Inuse is negative %d",inuse);
          }
          if (socket_p->userXmitDoneCallBack != NULL)
          {
             /*
             ** caution: in that case it is up to the application that provides the callback to release
             ** the xmit buffer
             */
	         if (ruc_buf_get_opaque_ref(xmit_p->bufRefCurrent) == socket_p) 
             {
                   (socket_p->userXmitDoneCallBack)(socket_p->userRef,socket_p->index,xmit_p->bufRefCurrent);
	         }
	         else 
             {
                if (inuse == 1) 
                {
                  /*
                  ** need an obj remove since that buffer might still queue somewhere : typically
                  ** in the xmit list of a load balacner entry.
                  */
                  ruc_objRemove((ruc_obj_desc_t*)xmit_p->bufRefCurrent);
                  ruc_buf_freeBuffer(xmit_p->bufRefCurrent);	
                }        
	         }  
          }
          else
          {
            if (inuse == 1) 
            {
              ruc_objRemove((ruc_obj_desc_t*)xmit_p->bufRefCurrent);
              ruc_buf_freeBuffer(xmit_p->bufRefCurrent);
            }
          }
          xmit_p->bufRefCurrent = NULL;
          xmit_p->nbWrite  = 0;
          xmit_p->nb2Write = 0;
          socket_p->stats.totalXmitSuccess++;
          socket_p->stats.totalXmitBytes += write_len;
          xmit_p->state = XMIT_CHECK_XMITQ;
          break;

          case RUC_PARTIAL:
          /*
          ** need to re-attempt writing
          */
          xmit_p->nbWrite  += write_len;
          socket_p->stats.totalXmitBytes += write_len;
          break;

          case RUC_WOULDBLOCK:
          /*
          ** the socket is congested-> so exit
          */
          socket_p->stats.totalXmitCongested++;
          xmit_p->congested_flag = 1;
          xmit_p->eoc_flag       = 0;
          xmit_p->eoc_threshold  = AF_UNIX_CONGESTION_DEFAULT_THRESHOLD;
          xmit_p->state = XMIT_CONGESTED;
          return ;

          case RUC_DISC:
          /*
          ** something wrong on sending: if the user has a callback use it:
          ** the transmitter is no more the owner of the buffer
          */
          inuse = ruc_buf_inuse_decrement(xmit_p->bufRefCurrent);
          if (inuse < 0)
          {
            /*
	        * inuse MUST never be negative so EXIT !!!!!
	        */
            fatal("Inuse is negative %d",inuse);
          }
          socket_p->stats.totalXmitError++;
          if (socket_p->userDiscCallBack != NULL)
          {
            void *bufref = xmit_p->bufRefCurrent;
            xmit_p->bufRefCurrent = NULL;	     
            if (ruc_buf_get_opaque_ref(bufref) != socket_p) 
            {
              /*
              ** the buffer is affected to another socket, however it might possible
              ** that the real owner of the buffer has finished while the buffer is
              ** still used by that old connection. So it might be necessary to release
              ** the buffer.
              ** However in any case the application must not be inform that there was
              ** an issue while sendig that buffer since the connection is not considered
              ** anymore.
              */ 
              if (inuse == 1) 
              {
                ruc_objRemove((ruc_obj_desc_t*)bufref);
                ruc_buf_freeBuffer(bufref);
              }
              bufref = NULL;
            }
            /*
            ** it is up to the application to release the buffer if the error is fatal:
            ** caution the internal disconnection MUST be called before the application since
            ** the application might attempt to perform a direct re-connection
            */
            xmit_p->state = XMIT_DEAD;
            af_unix_sock_stream_disconnect_internal(socket_p);
            (socket_p->userDiscCallBack)(socket_p->userRef,socket_p->index,bufref,errno);
            return;
          }
          else
          {
              if (inuse == 1) 
              {
                ruc_objRemove((ruc_obj_desc_t*)xmit_p->bufRefCurrent);
                ruc_buf_freeBuffer(xmit_p->bufRefCurrent);
              }            
              xmit_p->bufRefCurrent = NULL;
          }
          /*
          ** general disconnection->need to clean the socket queue
          */
          xmit_p->state = XMIT_DEAD;
          af_unix_sock_stream_disconnect_internal(socket_p);
          return ;
          break;

        }
        break;

      case XMIT_CHECK_XMITQ:
        /*
        ** Check the xmit credit
        */
        if (xmit_p->xmit_credit >= xmit_p->xmit_credit_conf)
        {
          xmit_p->xmit_credit = 0;
          /*
          ** asser the flag to request a re-activation on the next run of the socket
          ** controller
          */
          xmit_p->xmit_req_flag = 1;
          return;
        }
        /*
        ** check if there is a pending buffer (case found if there was a previous congestion
        */
        if (xmit_p->bufRefCurrent != NULL)
        {
          /*
          * lest's go and send it
          */
          xmit_p->state =  XMIT_IN_PRG;
          break;
        }
        /*
        ** read the pending Xmit queue (only priority 0 is considered in the current implementation
        */
        xmit_p->bufRefCurrent = com_xmit_pendingQueue_get(xmit_p,0);
        if (xmit_p->bufRefCurrent == NULL)
        {
          /*
          ** queue is empty
          */
          xmit_p->xmit_credit = 0;
          xmit_p->state =  XMIT_READY;
          return;
        }
        /*
        ** OK, go back to send that new bufffer
        */
        ruc_buf_inuse_increment(xmit_p->bufRefCurrent);
        xmit_p->state =  XMIT_READY;
        break;


      case XMIT_CONGESTED:
        /*
        ** the transmitter is congested: check of the threshold has reached 0
        */
        xmit_p->eoc_threshold--;
        if (xmit_p->eoc_threshold == 0)
        {
           xmit_p->eoc_flag  = 1;
           xmit_p->congested_flag = 0;
           xmit_p->state = XMIT_IN_PRG;
           break;
        }
        return;

       case XMIT_DEAD:
        /*
        ** the transmitter is dead
        */
        return;

    }
  }
}
Exemplo n.º 2
0
/**
  Initial truncate request
    
  @param socket_ctx_p: pointer to the af unix socket
  @param socketId: reference of the socket (not used)
  @param rozofs_storcli_remote_rsp_cbk: callback for sending out the response
 
   @retval : TRUE-> xmit ready event expected
  @retval : FALSE-> xmit  ready event not expected
*/
void rozofs_storcli_truncate_req_init(uint32_t  socket_ctx_idx, void *recv_buf,rozofs_storcli_resp_pf_t rozofs_storcli_remote_rsp_cbk)
{
   rozofs_rpc_call_hdr_with_sz_t    *com_hdr_p;
   rozofs_storcli_ctx_t *working_ctx_p = NULL;
   int i;
   uint32_t  msg_len;  /* length of the rpc messsage including the header length */
   storcli_truncate_arg_t *storcli_truncate_rq_p = NULL;
   rozofs_rpc_call_hdr_t   hdr;   /* structure that contains the rpc header in host format */
   int      len;       /* effective length of application message               */
   uint8_t  *pmsg;     /* pointer to the first available byte in the application message */
   uint32_t header_len;
   XDR xdrs;
   int errcode = EINVAL;
   /*
   ** allocate a context for the duration of the write
   */
   working_ctx_p = rozofs_storcli_alloc_context();
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     severe("out of working read/write saved context");
     goto failure;
   }
   storcli_truncate_rq_p = &working_ctx_p->storcli_truncate_arg;
   STORCLI_START_NORTH_PROF(working_ctx_p,truncate,0);

   
   /*
   ** Get the full length of the message and adjust it the the length of the applicative part (RPC header+application msg)
   */
   msg_len = ruc_buf_getPayloadLen(recv_buf);
   msg_len -=sizeof(uint32_t);

   /*
   ** save the reference of the received socket since it will be needed for sending back the
   ** response
   */
   working_ctx_p->socketRef    = socket_ctx_idx;
   working_ctx_p->user_param   = NULL;
   working_ctx_p->recv_buf     = recv_buf;
   working_ctx_p->response_cbk = rozofs_storcli_remote_rsp_cbk;
   /*
   ** Get the payload of the receive buffer and set the pointer to the array that describes the write request
   */
   com_hdr_p  = (rozofs_rpc_call_hdr_with_sz_t*) ruc_buf_getPayload(recv_buf);   
   memcpy(&hdr,&com_hdr_p->hdr,sizeof(rozofs_rpc_call_hdr_t));
   /*
   ** swap the rpc header
   */
   scv_call_hdr_ntoh(&hdr);
   pmsg = rozofs_storcli_set_ptr_on_nfs_call_msg((char*)&com_hdr_p->hdr,&header_len);
   if (pmsg == NULL)
   {
     errcode = EFAULT;
     goto failure;
   }
   /*
   ** map the memory on the first applicative RPC byte available and prepare to decode:
   ** notice that we will not call XDR_FREE since the application MUST
   ** provide a pointer for storing the file handle
   */
   len = msg_len - header_len;    
   xdrmem_create(&xdrs,(char*)pmsg,len,XDR_DECODE); 
   /*
   ** store the source transaction id needed for the reply
   */
   working_ctx_p->src_transaction_id =  hdr.hdr.xid;
   /*
   ** decode the RPC message of the truncate request
   */
   if (xdr_storcli_truncate_arg_t(&xdrs,storcli_truncate_rq_p) == FALSE)
   {
      /*
      ** decoding error
      */
      errcode = EFAULT;
      severe("rpc trucnate request decoding error");
      goto failure;
      
   }   
   /*
   ** init of the load balancing group/ projection association table:
   ** That table is ordered: the first corresponds to the storage associated with projection 0, second with 1, etc..
   ** When build that table, we MUST consider the value of the base which is associated with the distribution
   */

   
   uint8_t   rozofs_safe = rozofs_get_rozofs_safe(storcli_truncate_rq_p->layout);
   int lbg_in_distribution = 0;
   for (i = 0; i  <rozofs_safe ; i ++)
   {
    /*
    ** Get the load balancing group associated with the sid
    */
    int lbg_id = rozofs_storcli_get_lbg_for_sid(storcli_truncate_rq_p->cid,storcli_truncate_rq_p->dist_set[i]);
    if (lbg_id < 0)
    {
      /*
      ** there is no associated between the sid and the lbg. It is typically the case
      ** when a new cluster has been added to the configuration and the client does not
      ** know yet the configuration change
      */
      severe("sid is unknown !! %d\n",storcli_truncate_rq_p->dist_set[i]);
      continue;    
    }
     rozofs_storcli_lbg_prj_insert_lbg_and_sid(working_ctx_p->lbg_assoc_tb,lbg_in_distribution,
                                                lbg_id,
                                                storcli_truncate_rq_p->dist_set[i]);  

     rozofs_storcli_lbg_prj_insert_lbg_state(working_ctx_p->lbg_assoc_tb,
                                             lbg_in_distribution,
                                             NORTH_LBG_GET_STATE(working_ctx_p->lbg_assoc_tb[lbg_in_distribution].lbg_id));    
     lbg_in_distribution++;
     if (lbg_in_distribution == rozofs_safe) break;

   }
   /*
   ** allocate a small buffer that will be used for sending the response to the truncate request
   */
   working_ctx_p->xmitBuf = ruc_buf_getBuffer(ROZOFS_STORCLI_NORTH_SMALL_POOL);
   if (working_ctx_p == NULL)
   {
     /*
     ** that situation MUST not occur since there the same number of receive buffer and working context!!
     */
     errcode = ENOMEM;
     severe("out of small buffer");
     goto failure;
   }
   /*
   ** allocate a sequence number for the working context (same aas for read)
   */
   working_ctx_p->read_seqnum = rozofs_storcli_allocate_read_seqnum();
   /*
   ** set now the working variable specific for handling the truncate
   ** we re-use the structure used for writing even if nothing is written
   */
   uint8_t forward_projection = rozofs_get_rozofs_forward(storcli_truncate_rq_p->layout);
   for (i = 0; i < forward_projection; i++)
   {
     working_ctx_p->prj_ctx[i].prj_state = ROZOFS_PRJ_READ_IDLE;
     working_ctx_p->prj_ctx[i].prj_buf   = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
     if (working_ctx_p->prj_ctx[i].prj_buf == NULL)
     {
       /*
       ** that situation MUST not occur since there the same number of receive buffer and working context!!
       */
       errcode = ENOMEM;
       severe("out of large buffer");
       goto failure;
     }
     /*
     ** increment inuse counter on each buffer since we might need to re-use that packet in case
     ** of retransmission
     */
     working_ctx_p->prj_ctx[i].inuse_valid = 1;
     ruc_buf_inuse_increment(working_ctx_p->prj_ctx[i].prj_buf);
     /*
     ** set the pointer to the bins
     */
     int position = rozofs_storcli_get_position_of_first_byte2write_in_truncate();
     uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(working_ctx_p->prj_ctx[i].prj_buf); 

     working_ctx_p->prj_ctx[i].bins       = (bin_t*)(pbuf+position); 
   }
   		
   /*
   ** Prepare for request serialization
   */
   memcpy(working_ctx_p->fid_key, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
   working_ctx_p->opcode_key = STORCLI_TRUNCATE;
   {
       /**
        * lock all the file for a truncate
        */
       uint64_t nb_blocks = 0;
       nb_blocks--;
       int ret;
       ret = stc_rng_insert((void*)working_ctx_p,
               STORCLI_READ,working_ctx_p->fid_key,
               0,nb_blocks,
               &working_ctx_p->sched_idx);
       if (ret == 0)
       {
           /*
            ** there is a current request that is processed with the same fid and there is a collision
            */
           return;
       }
       /*
        ** no request pending with that fid, so we can process it right away
        */
       return rozofs_storcli_truncate_req_processing(working_ctx_p);
   }

    /*
    **_____________________________________________
    **  Exception cases
    **_____________________________________________
    */      
       

    /*
    ** there was a failure while attempting to allocate a memory ressource.
    */
failure:
     /*
     ** send back the response with the appropriated error code. 
     ** note: The received buffer (rev_buf)  is
     ** intended to be released by this service in case of error or the TCP transmitter
     ** once it has been passed to the TCP stack.
     */
     rozofs_storcli_reply_error_with_recv_buf(socket_ctx_idx,recv_buf,NULL,rozofs_storcli_remote_rsp_cbk,errcode);
     /*
     ** check if the root context was allocated. Free it if is exist
     */
     if (working_ctx_p != NULL) 
     {
        /*
        ** remove the reference to the recvbuf to avoid releasing it twice
        */
       STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
       working_ctx_p->recv_buf   = NULL;
       rozofs_storcli_release_context(working_ctx_p);
     }
     return;
}
int storcli_lbg_cnx_sup_is_selectable(int lbg_id)
{
  uint64_t current_date;
  storcli_lbg_cnx_supervision_t *p;
  void *xmit_buf = NULL;
  int ret;

  if (lbg_id >=STORCLI_MAX_LBG) return 0;

  p = &storcli_lbg_cnx_supervision_tab[lbg_id];

  if (p->state == STORCLI_LBG_RUNNING) return 1;

  current_date = timer_get_ticker();

//  if (current_date > p->expiration_date) return 1;
  /*
  ** check if poll is active
  */
  if (p->poll_state == STORCLI_POLL_IN_PRG) return 0;
  /*
  ** check the period
  */
  if (current_date > p->next_poll_date)
  {
    /*
    ** attempt to poll
    */
      p->poll_counter++;
      
      xmit_buf = ruc_buf_getBuffer(ROZOFS_STORCLI_SOUTH_LARGE_POOL);
      if (xmit_buf == NULL)
      {
         return 0; 
      }
      p->poll_state = STORCLI_POLL_IN_PRG;
      /*
      ** increment the inuse to avoid a release of the xmit buffer by rozofs_sorcli_send_rq_common()
      */
      ruc_buf_inuse_increment(xmit_buf);
      
      ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_RPC_NULL_PROC_LBG),STORAGE_PROGRAM,STORAGE_VERSION,SP_NULL,
                                          (xdrproc_t) xdr_void, (caddr_t) NULL,
                                           xmit_buf,
                                           lbg_id,
                                           0,
                                           0,
                                           rozofs_storcli_sp_null_processing_cbk,
                                           (void*)NULL);
      ruc_buf_inuse_decrement(xmit_buf);

     if (ret < 0)
     {
      /*
      ** direct need to free the xmit buffer
      */
      ruc_buf_freeBuffer(xmit_buf);    
      return 0;   

     }
     /*
     ** Check if there is direct response from tx module
     */
     if (p->poll_state == STORCLI_POLL_ERR)
     {
       /*
       ** set the next expiration date
       */
       p->next_poll_date = current_date+STORCLI_LBG_SP_NULL_INTERVAL;
       /*
       ** release the xmit buffer since there was a direct reply from the lbg while attempting to send the buffer
       */
      ruc_buf_freeBuffer(xmit_buf);    
       return 0;
     } 
  }  
  return 0;
}