Пример #1
0
int sclient_read_rbs(sclient_t * clt, cid_t cid, sid_t sid, uint8_t layout,
        uint8_t spare, sid_t dist_set[ROZOFS_SAFE_MAX], fid_t fid, bid_t bid,
        uint32_t nb_proj, uint32_t * nb_proj_recv, bin_t * bins) {
    int status = -1;
    sp_read_ret_t *ret = 0;
    sp_read_arg_t args;

    DEBUG_FUNCTION;

    // Fill request
    args.cid = cid;
    args.sid = sid;
    args.layout = layout;
    args.spare = spare;
    memcpy(args.dist_set, dist_set, sizeof (sid_t) * ROZOFS_SAFE_MAX);
    memcpy(args.fid, fid, sizeof (fid_t));
    args.bid = bid;
    args.nb_proj = nb_proj;

    if (!(clt->rpcclt.client) ||
            !(ret = sp_read_1(&args, clt->rpcclt.client))) {
        clt->status = 0;
        warning("sclient_read_rbs failed: storage read failed "
                "(no response from storage server: %s)", clt->host);
        errno = EPROTO;
        goto out;
    }
    if (ret->status != 0) {
        errno = ret->sp_read_ret_t_u.error;
        if (errno == ENOENT) {
            // Receive a response but the file
            // is not on storage
            // This is possible when it's just be removed
            *nb_proj_recv = 0;
            status = 0;
            goto out;
        } else {
            severe("sclient_read_rbs failed (error from %s): (%s)",
                    clt->host, strerror(errno));
            goto out;
        }
    }
    // XXX ret->sp_read_ret_t_u.bins.bins_len is coherent
    // XXX could we avoid memcpy ??
    memcpy(bins, ret->sp_read_ret_t_u.rsp.bins.bins_val,
            ret->sp_read_ret_t_u.rsp.bins.bins_len);

    *nb_proj_recv = ret->sp_read_ret_t_u.rsp.bins.bins_len /
            ((rozofs_get_max_psize(layout) * sizeof (bin_t))
            + sizeof (rozofs_stor_bins_hdr_t));

    status = 0;
out:
    if (ret)
        xdr_free((xdrproc_t) xdr_sp_read_ret_t, (char *) ret);
    return status;
}
Пример #2
0
void sp_write_1_svc_nb(void * pt, rozorpc_srv_ctx_t *req_ctx_p) {
    sp_write_arg_t * args = (sp_write_arg_t *) pt;
    static sp_write_ret_t ret;
    storage_t *st = 0;
    // Variable to be used in a later version.
    uint8_t version = 0;
    char *buf_bins;
    
    /*
    ** put  the pointer to the bins (still in received buffer
    */
    int position = storage_get_position_of_first_byte2write_from_write_req();
    buf_bins = (char*)ruc_buf_getPayload(req_ctx_p->recv_buf);
    buf_bins+= position;


    DEBUG_FUNCTION;

    START_PROFILING_IO(write, args->nb_proj * rozofs_get_max_psize(args->layout)
            * sizeof (bin_t));

    ret.status = SP_FAILURE;

    // Get the storage for the couple (cid;sid)
    if ((st = storaged_lookup(args->cid, args->sid)) == 0) {
        ret.sp_write_ret_t_u.error = errno;
        goto out;
    }

    // Write projections
    if (storage_write(st, args->layout, (sid_t *) args->dist_set, args->spare,
            (unsigned char *) args->fid, args->bid, args->nb_proj, version,
            &ret.sp_write_ret_t_u.file_size,
            (bin_t *) buf_bins) <= 0) {
        ret.sp_write_ret_t_u.error = errno;
        goto out;
    }

    ret.status = SP_SUCCESS;
out:
 
    req_ctx_p->xmitBuf  = req_ctx_p->recv_buf;
    req_ctx_p->recv_buf = NULL;
    rozorpc_srv_forward_reply(req_ctx_p,(char*)&ret); 
    /*
    ** release the context
    */
    rozorpc_srv_release_context(req_ctx_p);
    STOP_PROFILING(write);
    return ;
}
Пример #3
0
int sclient_write(sclient_t * clt, cid_t cid, sid_t sid, uint8_t layout,
        uint8_t spare, sid_t dist_set[ROZOFS_SAFE_MAX], fid_t fid, bid_t bid,
        uint32_t nb_proj, const bin_t * bins) {
    int status = -1;
    sp_write_ret_t *ret = 0;
    sp_write_arg_t args;

    DEBUG_FUNCTION;

    // Fill request
    args.cid = cid;
    args.sid = sid;
    args.layout = layout;
    args.spare = spare;
    memcpy(args.dist_set, dist_set, sizeof (sid_t) * ROZOFS_SAFE_MAX);
    memcpy(args.fid, fid, sizeof (uuid_t));
    args.bid = bid;
    args.nb_proj = nb_proj;
    args.bins.bins_len = nb_proj * (rozofs_get_max_psize(layout)
            * sizeof (bin_t) + sizeof (rozofs_stor_bins_hdr_t));
    args.bins.bins_val = (char *) bins;

    if (!(clt->rpcclt.client) ||
            !(ret = sp_write_1(&args, clt->rpcclt.client))) {
        clt->status = 0;
        warning("sclient_write failed: no response from storage server"
                " (%s, %u, %u)", clt->host, clt->port, sid);
        errno = EPROTO;
        goto out;
    }
    if (ret->status != 0) {
        severe("sclient_write failed: storage write response failure (%s)",
                strerror(errno));
        errno = ret->sp_write_ret_t_u.error;
        goto out;
    }
    status = 0;
out:
    if (ret)
        xdr_free((xdrproc_t) xdr_sp_write_ret_t, (char *) ret);
    return status;
}
Пример #4
0
/**
*   API to update in the internal structure associated with the projection
    the header of each blocks
    That function is required since the read can return less blocks than expected
    so we might face the situation where the system check headers in memory
    on an array that has not be updated
    We need also to consider the case of the end of file as well as the 
    case where blocks has been reserved but not yet written (file with holes).
    For these two cases we might have a timestam of 0 so we need to use
    the effective length to discriminate between a hole (0's array on BSIZE length)
    and a EOF case where length is set to 0.
    
    @param prj_ctx_p : pointer to the projection context
    @param layout : layout associated with the file
    @param number_of_blocks_returned : number of blocks in the projection
    @param number_of_blocks_requested : number of blocks requested
    @param raw_file_size : raw file_size reported from a fstat on the projection file (on storage)
    
    @retval none
*/     
void rozofs_storcli_transform_update_headers(rozofs_storcli_projection_ctx_t *prj_ctx_p, 
                                             uint8_t  layout,
                                             uint32_t number_of_blocks_returned,
                                             uint32_t number_of_blocks_requested,
                                             uint64_t raw_file_size)
{

    int block_idx;
    
    prj_ctx_p->raw_file_size = raw_file_size;
                    
    for (block_idx = 0; block_idx < number_of_blocks_returned; block_idx++) 
    {
      /*
      ** Get the pointer to the beginning of the block and extract its header
      */
      rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)(prj_ctx_p->bins 
                                            +((rozofs_get_max_psize(layout)+(sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t))) * block_idx));
      if (rozofs_bins_hdr_p->s.timestamp == 0)
      {
        prj_ctx_p->block_hdr_tab[block_idx].s.timestamp = rozofs_bins_hdr_p->s.timestamp;
        prj_ctx_p->block_hdr_tab[block_idx].s.effective_length = ROZOFS_BSIZE;          
      }
      else
      {
        prj_ctx_p->block_hdr_tab[block_idx].s.timestamp = rozofs_bins_hdr_p->s.timestamp;
        prj_ctx_p->block_hdr_tab[block_idx].s.effective_length = rozofs_bins_hdr_p->s.effective_length;                 
      }    
    }
    /*
    ** clear the part that is after number of returned block (assume end of file)
    */
    for (block_idx = number_of_blocks_returned; block_idx < number_of_blocks_requested; block_idx++)
    {    
      prj_ctx_p->block_hdr_tab[block_idx].s.timestamp = 0;
      prj_ctx_p->block_hdr_tab[block_idx].s.effective_length = 0;      
    } 
}    
Пример #5
0
/*
** That function is called when all the projection are ready to be sent

 @param working_ctx_p: pointer to the root context associated with the top level write request
 @param data         : pointer to the data of the last block to truncate

*/
void rozofs_storcli_truncate_req_processing_exec(rozofs_storcli_ctx_t *working_ctx_p, char * data)
{

  storcli_truncate_arg_t *storcli_truncate_rq_p = (storcli_truncate_arg_t*)&working_ctx_p->storcli_truncate_arg;
  uint8_t layout = storcli_truncate_rq_p->layout;
  uint8_t   rozofs_forward;
  uint8_t   rozofs_safe;
  uint8_t   projection_id;
  int       storage_idx;
  int       error=0;
  rozofs_storcli_lbg_prj_assoc_t  *lbg_assoc_p = working_ctx_p->lbg_assoc_tb;
  rozofs_storcli_projection_ctx_t *prj_cxt_p   = working_ctx_p->prj_ctx;   
  
  rozofs_forward = rozofs_get_rozofs_forward(layout);
  rozofs_safe    = rozofs_get_rozofs_safe(layout);
  

  /*
  ** set the current state of each load balancing group belonging to the rozofs_safe group
  */
  for (storage_idx = 0; storage_idx < rozofs_safe; storage_idx++) 
  {
    /*
    ** Check the state of the load Balancing group
    */
    rozofs_storcli_lbg_prj_insert_lbg_state(lbg_assoc_p,
                                            storage_idx,
                                            NORTH_LBG_GET_STATE(lbg_assoc_p[storage_idx].lbg_id));      
  }
  /*
  ** Now find out a selectable lbg_id for each projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
    if (rozofs_storcli_select_storage_idx_for_write ( working_ctx_p,rozofs_forward, rozofs_safe,projection_id) < 0)
    {
       /*
       ** there is no enough valid storage !!
       */
       error = EIO;
       goto fail;
    }
  }  
  
  
  /*
  ** Let's transform the data to write
  */
  working_ctx_p->truncate_bins_len = 0;
  if (data != NULL) {
    STORCLI_START_KPI(storcli_kpi_transform_forward);

    rozofs_storcli_transform_forward(working_ctx_p->prj_ctx,  
                                     layout,
                                     0, 
                                     1, 
                                     working_ctx_p->timestamp,
                                     storcli_truncate_rq_p->last_seg,
                                     data);  
    STORCLI_STOP_KPI(storcli_kpi_transform_forward,0);
    working_ctx_p->truncate_bins_len = rozofs_get_max_psize(layout)*sizeof(bin_t) + sizeof(rozofs_stor_bins_hdr_t);
  } 
  
  /*
  ** We have enough storage, so initiate the transaction towards the storage for each
  ** projection
  */
  for (projection_id = 0; projection_id < rozofs_forward; projection_id++)
  {
     sp_truncate_arg_no_bins_t *request; 
     sp_truncate_arg_no_bins_t  truncate_prj_args;
     void  *xmit_buf;  
     int ret;  
      
     xmit_buf = prj_cxt_p[projection_id].prj_buf;
     if (xmit_buf == NULL)
     {
       /*
       ** fatal error since the ressource control already took place
       */       
       error = EIO;
       goto fatal;     
     }
     /*
     ** fill partially the common header
     */
retry:
     request   = &truncate_prj_args;
     request->cid = storcli_truncate_rq_p->cid;
     request->sid = (uint8_t) rozofs_storcli_lbg_prj_get_sid(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     request->layout        = layout;
     if (prj_cxt_p[projection_id].stor_idx >= rozofs_forward) request->spare = 1;
     else request->spare = 0;
     memcpy(request->dist_set, storcli_truncate_rq_p->dist_set, ROZOFS_SAFE_MAX*sizeof (uint8_t));
     memcpy(request->fid, storcli_truncate_rq_p->fid, sizeof (sp_uuid_t));
     request->proj_id        = projection_id;
     request->bid            = storcli_truncate_rq_p->bid;
     request->last_seg       = storcli_truncate_rq_p->last_seg;
     request->last_timestamp = working_ctx_p->timestamp;

     request->len = working_ctx_p->truncate_bins_len;

     uint32_t  lbg_id = rozofs_storcli_lbg_prj_get_lbg(working_ctx_p->lbg_assoc_tb,prj_cxt_p[projection_id].stor_idx);
     STORCLI_START_NORTH_PROF((&working_ctx_p->prj_ctx[projection_id]),truncate_prj,0);
     /*
     ** caution we might have a direct reply if there is a direct error at load balancing group while
     ** ateempting to send the RPC message-> typically a disconnection of the TCP connection 
     ** As a consequence the response fct 'rozofs_storcli_truncate_req_processing_cbk) can be called
     ** prior returning from rozofs_sorcli_send_rq_common')
     ** anticipate the status of the xmit state of the projection and lock the section to
     ** avoid a reply error before returning from rozofs_sorcli_send_rq_common() 
     ** --> need to take care because the write context is released after the reply error sent to rozofsmount
     */
     working_ctx_p->write_ctx_lock = 1;
     prj_cxt_p[projection_id].prj_state = ROZOFS_PRJ_WR_IN_PRG;
     
     ret =  rozofs_sorcli_send_rq_common(lbg_id,ROZOFS_TMR_GET(TMR_STORAGE_PROGRAM),STORAGE_PROGRAM,STORAGE_VERSION,SP_TRUNCATE,
                                         (xdrproc_t) xdr_sp_truncate_arg_no_bins_t, (caddr_t) request,
                                          xmit_buf,
                                          working_ctx_p->read_seqnum,
                                          (uint32_t) projection_id,
                                          working_ctx_p->truncate_bins_len,
                                          rozofs_storcli_truncate_req_processing_cbk,
                                         (void*)working_ctx_p);
     working_ctx_p->write_ctx_lock = 0;
     if (ret < 0)
     {
       /*
       ** the communication with the storage seems to be wrong (more than TCP connection temporary down
       ** attempt to select a new storage
       **
       */
       if (rozofs_storcli_select_storage_idx_for_write (working_ctx_p,rozofs_forward,rozofs_safe,projection_id) < 0)
       {
         /*
         ** Out of storage !!-> too many storages are down
         */
         goto fatal;
       } 
       /*
       ** retry for that projection with a new storage index: WARNING: we assume that xmit buffer has not been released !!!
       */
//#warning: it is assumed that xmit buffer has not been release, need to double check!!        
       goto retry;
     } 
     else
     {
       /*
       ** check if the state has not been changed: -> it might be possible to get a direct error
       */
       if (prj_cxt_p[projection_id].prj_state == ROZOFS_PRJ_WR_ERROR)
       {
          error = prj_cxt_p[projection_id].errcode;
          goto fatal;       
       }
     }

   }

  return;
  
fail:
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  
     return;

fatal:
     /*
     ** we fall in that case when we run out of  resource-> that case is a BUG !!
     */
     rozofs_storcli_write_reply_error(working_ctx_p,error);
     /*
     ** release the root transaction context
     */
     STORCLI_STOP_NORTH_PROF(working_ctx_p,truncate,0);
     rozofs_storcli_release_context(working_ctx_p);  

  return;

}
Пример #6
0
/** 
  Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE
  aligned.
  The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer
  The number_of_blocks is the number of ROZOFS_BSIZE that must be transform
  Notice that the first_block_idx offset applies to the output transform buffer only
  not to the input buffer pointed by "data".
  
 * 
 * @param *prj_ctx_p: pointer to the working array of the projection
 * @param first_block_idx: index of the first block to transform
 * @param number_of_blocks: number of blocks to write
 * @param timestamp: date in microseconds
   @param last_block_size: effective length of the last block
 * @param *data: pointer to the source data that must be transformed
 *
 * @return: the length written on success, -1 otherwise (errno is set)
 */
 int rozofs_storcli_transform_forward(rozofs_storcli_projection_ctx_t *prj_ctx_p,  
                                       uint8_t layout,
                                       uint32_t first_block_idx, 
                                       uint32_t number_of_blocks,
                                       uint64_t timestamp, 
                                       uint16_t last_block_size,
                                       char *data) 
 {
    projection_t rozofs_fwd_projections[ROZOFS_SAFE_MAX];
    projection_t *projections; // Table of projections used to transform data
    uint16_t projection_id = 0;
    uint32_t i = 0;    
    uint8_t rozofs_forward = rozofs_get_rozofs_forward(layout);
    uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout);
    int empty_block = 0;

    projections = rozofs_fwd_projections;

    // For each projection
    for (projection_id = 0; projection_id < rozofs_forward; projection_id++) {
        projections[projection_id].angle.p =  rozofs_get_angles_p(layout,projection_id);
        projections[projection_id].angle.q =  rozofs_get_angles_q(layout,projection_id);
        projections[projection_id].size    =  rozofs_get_psizes(layout,projection_id);
    }

    /* Transform the data */
    // For each block to send
    for (i = 0; i < number_of_blocks; i++) 
    {
         empty_block = rozofs_data_block_check_empty(data + (i * ROZOFS_BSIZE), ROZOFS_BSIZE);

        // seek bins for each projection
        for (projection_id = 0; projection_id < rozofs_forward; projection_id++) 
        {
          /*
          ** Indicates the memory area where the transformed data must be stored
          */
          projections[projection_id].bins = prj_ctx_p[projection_id].bins +
                                           ((rozofs_get_max_psize(layout)+(sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t)))* (first_block_idx+i));
          rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)projections[projection_id].bins;
          /*
          ** check if the user data block is empty: if the data block is empty no need to transform
          */
          if (empty_block)
          {
            rozofs_bins_hdr_p->s.projection_id = 0;
            rozofs_bins_hdr_p->s.timestamp     = 0;          
            rozofs_bins_hdr_p->s.effective_length = 0;    
            continue;   
          }
          /*
          ** fill the header of the projection
          */
          rozofs_bins_hdr_p->s.projection_id = projection_id;
          rozofs_bins_hdr_p->s.timestamp     = timestamp;
          /*
          ** set the effective size of the block. It is always ROZOFS_BSIZE except for the last block
          */
          if (i == (number_of_blocks-1))
          {
            rozofs_bins_hdr_p->s.effective_length = last_block_size;
          }
          else
          {
            rozofs_bins_hdr_p->s.effective_length = ROZOFS_BSIZE;          
          } 
          /*
          ** update the pointer to point out the first bins
          */
          projections[projection_id].bins += sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t);
                                                     
        }
        /*
        ** do not apply transform for empty block
        */
        if (empty_block == 0)
        {
          /*
          ** Apply the erasure code transform for the block i+first_block_idx
          */
          transform_forward((pxl_t *) (data + (i * ROZOFS_BSIZE)),
                  rozofs_inverse,
                  ROZOFS_BSIZE / rozofs_inverse / sizeof (pxl_t),
                  rozofs_forward, projections);
        }
    }

    return 0;
}
Пример #7
0
/** 
  Apply the transform to a buffer starting at "data". That buffer MUST be ROZOFS_BSIZE
  aligned.
  The first_block_idx is the index of a ROZOFS_BSIZE array in the output buffer
  The number_of_blocks is the number of ROZOFS_BSIZE that must be transform
  Notice that the first_block_idx offset applies to the output transform buffer only
  not to the input buffer pointed by "data".
  
 * 
 * @param *prj_ctx_p: pointer to the working array of the projection
 * @param first_block_idx: index of the first block to transform
 * @param number_of_blocks: number of blocks to write
 * @param *data: pointer to the source data that must be transformed
   @param *number_of_blocks_p: pointer to the array where the function returns number of blocks on which the transform was applied
  @param *rozofs_storcli_prj_idx_table: pointer to the array used for storing the projections index for inverse process
 *
 * @return: the length written on success, -1 otherwise (errno is set)
 */
 int rozofs_storcli_transform_inverse(rozofs_storcli_projection_ctx_t *prj_ctx_p,  
                                       uint8_t layout,
                                       uint32_t first_block_idx, 
                                       uint32_t number_of_blocks, 
                                       rozofs_storcli_inverse_block_t *block_ctx_p,
                                       char *data,
                                       uint32_t *number_of_blocks_p,
				       uint8_t  *rozofs_storcli_prj_idx_table) 
 {

    projection_t *projections = NULL;
    projection_t rozofs_inv_projections[ROZOFS_SAFE_MAX]; 
    int block_idx;
    uint16_t projection_id = 0;
    int prj_ctx_idx;
    *number_of_blocks_p = 0;    
    uint8_t rozofs_inverse = rozofs_get_rozofs_inverse(layout);
    
    projections = rozofs_inv_projections;
        
    /*
    ** Proceed the inverse data transform for the nb_projections2read blocks.
    */
    for (block_idx = 0; block_idx < number_of_blocks; block_idx++) {
        if (block_ctx_p[block_idx].state == ROZOFS_BLK_TRANSFORM_DONE)
        {
          /*
          ** transformation has already been done for that block of ROZOFS_BSIZE siz
          ** check the next one
          */
          continue;        
        }
#if 0
        /*
        ** check if we can find out a set of rozofs_inverse projections that will permit to
        ** rebuild the current block of ROZOFS_BSIZE sise
        ** For this we check if we can find at least rozofs_inverse projections with the same
        ** time stamp and with different angles(projection id
        ** If there is no enough valid projection we need to read a new projection on the next
        ** storage in sequence that follows the index of the last valid storage on which a projection has been
        ** read.
        ** It might be possible that we run out of storage since rozofs_safe has been reached and we have not reached
        ** rozofs_inserse projection!!
        */

        ret =  rozofs_storcli_transform_inverse_check(prj_ctx_p,layout,
                                                      block_idx, rozofs_storcli_prj_idx_table,
                                                      &block_ctx_p[block_idx].timestamp,
                                                      &block_ctx_p[block_idx].effective_length);

        if (ret < 0)
        {
          /*
          ** the set of projection that have been read does not permit to rebuild, need to read more
          */
          return -1;        
        } 
#endif
        /*
        ** Check the case of the file that has no data (there is a hole in the file), this is indicated by
        ** reporting a timestamp of 0
        */
        if ((block_ctx_p[block_idx].timestamp == 0)  && (block_ctx_p[block_idx].effective_length == ROZOFS_BSIZE ))
        {
          /*
          ** clear the memory
          */
          ROZOFS_STORCLI_STATS(ROZOFS_STORCLI_EMPTY_READ);
          memset( data + (ROZOFS_BSIZE * (first_block_idx + block_idx)),0,ROZOFS_BSIZE);
          block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE;
          continue;
        
        }	                                                              
        if ((block_ctx_p[block_idx].timestamp == 0)  && (block_ctx_p[block_idx].effective_length == 0 ))
        {
          /*
          ** we have reached end of file
          */
          block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE;
          *number_of_blocks_p = (block_idx++);
          
          return 0;        
        }      
	
        /*
        ** Here we have to take care, since the index of the projection_id use to address
        ** prj_ctx_p is NOT the real projection_id. The projection ID is found in the header of
        ** each bins, so for a set of projections pointed by bins, we might have a different
        ** projection id in the header of the projections contains in the bins array that has
        ** been read!!
        */
        int prj_count = 0;
        for (prj_count = 0; prj_count < rozofs_inverse; prj_count++)
        {
           /*
           ** Get the pointer to the beginning of the projection and extract the projection Id
           */
	   
           prj_ctx_idx = rozofs_storcli_prj_idx_table[ROZOFS_SAFE_MAX*block_idx+prj_count];
         rozofs_stor_bins_hdr_t *rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t*)(prj_ctx_p[prj_ctx_idx].bins 
                                                 +((rozofs_get_max_psize(layout)+(sizeof(rozofs_stor_bins_hdr_t)/sizeof(bin_t))) * block_idx));
            
                                                 
           /*
           ** Extract the projection_id from the header
           ** and Fill the table of projections for the block block_idx
           **   For each meta-projection
           */
           projection_id = rozofs_bins_hdr_p->s.projection_id;
           projections[prj_count].angle.p = rozofs_get_angles_p(layout,projection_id);
           projections[prj_count].angle.q = rozofs_get_angles_q(layout,projection_id);
           projections[prj_count].size = rozofs_get_psizes(layout,projection_id);
           projections[prj_count].bins = (bin_t*)(rozofs_bins_hdr_p+1);                   
        }
        

        // Inverse data for the block (first_block_idx + block_idx)
        transform_inverse_inline((pxl_t *) (data + (ROZOFS_BSIZE * (first_block_idx + block_idx))),
                rozofs_inverse,
                ROZOFS_BSIZE / rozofs_inverse / sizeof (pxl_t),
                rozofs_inverse, projections);
        /*
        ** indicate that transform has been done for the projection
        */
        block_ctx_p[block_idx].state = ROZOFS_BLK_TRANSFORM_DONE;
        /*
        ** check the case of a block that is not full: need to zero's that part
        */
        if (block_ctx_p[block_idx].effective_length < ROZOFS_BSIZE)
        {
           /*
           ** clear the memory
           */
           char *raz_p = data + (ROZOFS_BSIZE * (first_block_idx + block_idx)) + block_ctx_p[block_idx].effective_length;
           memset( raz_p,0,(ROZOFS_BSIZE-block_ctx_p[block_idx].effective_length) );
        }
    }
    /*
    ** now the inverse transform is finished, release the allocated ressources used for
    ** rebuild
    */
    *number_of_blocks_p = number_of_blocks;
    return 0;   
}
Пример #8
0
void sp_read_1_svc_nb(void * pt, rozorpc_srv_ctx_t *req_ctx_p) {
    sp_read_arg_t * args = (sp_read_arg_t *) pt;
    static sp_read_ret_t ret;
    storage_t *st = 0;

    START_PROFILING_IO(read, args->nb_proj * rozofs_get_max_psize(args->layout)
            * sizeof (bin_t));
            
    ret.status = SP_FAILURE;            
    /*
    ** allocate a buffer for the response
    */
    req_ctx_p->xmitBuf = ruc_buf_getBuffer(storage_xmit_buffer_pool_p);
    if (req_ctx_p->xmitBuf == NULL)
    {
      severe("Out of memory STORAGE_NORTH_LARGE_POOL");
      ret.sp_read_ret_t_u.error = ENOMEM;
      req_ctx_p->xmitBuf  = req_ctx_p->recv_buf;
      req_ctx_p->recv_buf = NULL;
      goto error;         
    }


    // Get the storage for the couple (cid;sid)
    if ((st = storaged_lookup(args->cid, args->sid)) == 0) {
        ret.sp_read_ret_t_u.error = errno;
        goto error;
    }

    /*
    ** set the pointer to the bins
    */
    int position = storage_get_position_of_first_byte2write_from_read_req();
    uint8_t *pbuf = (uint8_t*)ruc_buf_getPayload(req_ctx_p->xmitBuf);     
    /*
    ** clear the length of the bins and set the pointer where data must be returned
    */  
    ret.sp_read_ret_t_u.rsp.bins.bins_val =(char *)(pbuf+position);  ;
    ret.sp_read_ret_t_u.rsp.bins.bins_len = 0;
#if 0 // for future usage with distributed cache 
    /*
    ** clear the optimization array
    */
    ret.sp_read_ret_t_u.rsp.optim.optim_val = (char*)sp_optim;
    ret.sp_read_ret_t_u.rsp.optim.optim_len = 0;
#endif    
    // Read projections
    if (storage_read(st, args->layout, (sid_t *) args->dist_set, args->spare,
            (unsigned char *) args->fid, args->bid, args->nb_proj,
            (bin_t *) ret.sp_read_ret_t_u.rsp.bins.bins_val,
            (size_t *) & ret.sp_read_ret_t_u.rsp.bins.bins_len,
            &ret.sp_read_ret_t_u.rsp.file_size) != 0) {
        ret.sp_read_ret_t_u.error = errno;
        goto error;
    }

    ret.status = SP_SUCCESS;
    storaged_srv_forward_read_success(req_ctx_p,&ret);
    /*
    ** check the case of the readahead
    */
    storage_check_readahead();
    goto out;
    
error:
    rozorpc_srv_forward_reply(req_ctx_p,(char*)&ret); 
    /*
    ** release the context
    */
out:
    rozorpc_srv_release_context(req_ctx_p);
    STOP_PROFILING(read);
    return ;
}
Пример #9
0
int storage_truncate(storage_t * st, uint8_t layout, sid_t * dist_set,
        uint8_t spare, fid_t fid, tid_t proj_id,bid_t bid,uint8_t version,uint16_t last_seg,uint64_t last_timestamp) {
    int status = -1;
    char path[FILENAME_MAX];
    int fd = -1;
    off_t bins_file_offset = 0;
    uint16_t rozofs_max_psize = 0;
    uint8_t write_file_hdr = 0;
    bid_t bid_truncate;
    size_t nb_write = 0;
    size_t length_to_write = 0;
    rozofs_stor_bins_hdr_t bins_hdr;
    
    // Build the full path of directory that contains the bins file
    storage_map_distribution(st, layout, dist_set, spare, path);

    // Check that this directory already exists, otherwise it will be create
    if (access(path, F_OK) == -1) {
        if (errno == ENOENT) {
            // If the directory doesn't exist, create it
            if (mkdir(path, ROZOFS_ST_DIR_MODE) != 0) {
	      if (errno != EEXIST) { 
	        // The directory is not created !!!
                severe("mkdir failed (%s) : %s", path, strerror(errno));
                goto out;
	      }	
	      // Well someone else has created the directory in the meantime
            }
        } else {
            goto out;
        }
    }

    // Build the path of bins file
    storage_map_projection(fid, path);

    // Check that this file already exists
    if (access(path, F_OK) == -1)
        write_file_hdr = 1; // We must write the header

    // Open bins file
    fd = open(path, ROZOFS_ST_BINS_FILE_FLAG, ROZOFS_ST_BINS_FILE_MODE);
    if (fd < 0) {
        severe("open failed (%s) : %s", path, strerror(errno));
        goto out;
    }

    // If we write the bins file for the first time, we must write the header
    if (write_file_hdr) {
        // Prepare file header
        rozofs_stor_bins_file_hdr_t file_hdr;
        memcpy(file_hdr.dist_set_current, dist_set,
                ROZOFS_SAFE_MAX * sizeof (sid_t));
        memset(file_hdr.dist_set_next, 0, ROZOFS_SAFE_MAX * sizeof (sid_t));
        file_hdr.layout = layout;
        file_hdr.version = version;

        // Write the header for this bins file
        nb_write = pwrite(fd, &file_hdr, sizeof (file_hdr), 0);
        if (nb_write != sizeof (file_hdr)) {
            severe("pwrite failed: %s", strerror(errno));
            goto out;
        }
    }

    // Compute the offset from the truncate
    rozofs_max_psize = rozofs_get_max_psize(layout);
    bid_truncate = bid;
    if (last_seg!= 0) bid_truncate+=1;
    bins_file_offset = ROZOFS_ST_BINS_FILE_HDR_SIZE + (bid_truncate) * (rozofs_max_psize *
            sizeof (bin_t) + sizeof (rozofs_stor_bins_hdr_t));

    status = ftruncate(fd, bins_file_offset);
    if (status < 0) goto out;
    /*
    ** Check the case of the last segment
    */
    if (last_seg!= 0)
    {
      bins_hdr.s.timestamp        = last_timestamp;
      bins_hdr.s.effective_length = last_seg;
      bins_hdr.s.projection_id    = proj_id;
      bins_hdr.s.version          = version;
      length_to_write = sizeof(rozofs_stor_bins_hdr_t);
      
      bins_file_offset = ROZOFS_ST_BINS_FILE_HDR_SIZE + (bid) * (rozofs_max_psize *
              sizeof (bin_t) + sizeof (rozofs_stor_bins_hdr_t));

      nb_write = pwrite(fd, &bins_hdr, length_to_write, bins_file_offset);
      if (nb_write != length_to_write) {
          severe("pwrite failed on last segment: %s", strerror(errno));
          goto out;
      }
      
    }
out:
    if (fd != -1) close(fd);
    return status;
}
Пример #10
0
int storage_read(storage_t * st, uint8_t layout, sid_t * dist_set,
        uint8_t spare, fid_t fid, bid_t bid, uint32_t nb_proj,
        bin_t * bins, size_t * len_read, uint64_t *file_size) {

    int status = -1;
    char path[FILENAME_MAX];
    int fd = -1;
    size_t nb_read = 0;
    size_t length_to_read = 0;
    off_t bins_file_offset = 0;
    uint16_t rozofs_max_psize = 0;
    struct stat sb;

    // Build the full path of directory that contains the bins file
    storage_map_distribution(st, layout, dist_set, spare, path);

    // Build the path of bins file
    storage_map_projection(fid, path);

    // Open bins file
    fd = open(path, ROZOFS_ST_BINS_FILE_FLAG, ROZOFS_ST_BINS_FILE_MODE);
    if (fd < 0) {
        DEBUG("open failed (%s) : %s", path, strerror(errno));
        goto out;
    }

    // Compute the offset and length to read
    rozofs_max_psize = rozofs_get_max_psize(layout);
    bins_file_offset = ROZOFS_ST_BINS_FILE_HDR_SIZE +
            bid * ((off_t) (rozofs_max_psize * sizeof (bin_t)) +
            sizeof (rozofs_stor_bins_hdr_t));
    length_to_read = nb_proj * (rozofs_max_psize * sizeof (bin_t)
            + sizeof (rozofs_stor_bins_hdr_t));

    
    // Read nb_proj * (projection + header)
    nb_read = pread(fd, bins, length_to_read, bins_file_offset);

    // Check error
    if (nb_read == -1) {
        severe("pread failed: %s", strerror(errno));
        goto out;
    }

    // Check the length read
    if ((nb_read % (rozofs_max_psize * sizeof (bin_t) +
            sizeof (rozofs_stor_bins_hdr_t))) != 0) {
        char fid_str[37];
        uuid_unparse(fid, fid_str);
        severe("storage_read failed (FID: %s): read inconsistent length",
                fid_str);
        errno = EIO;
        goto out;
    }

    // Update the length read
    *len_read = nb_read;


    // Stat file for return the size of bins file after the read operation
    if (fstat(fd, &sb) == -1) {
        severe("fstat failed: %s", strerror(errno));
        goto out;
    }

    *file_size = sb.st_size;

    // Read is successful
    status = 0;

out:
    if (fd != -1) close(fd);
    return status;
}
Пример #11
0
int storage_write(storage_t * st, uint8_t layout, sid_t * dist_set,
        uint8_t spare, fid_t fid, bid_t bid, uint32_t nb_proj, uint8_t version,
        uint64_t *file_size, const bin_t * bins) {
    int status = -1;
    char path[FILENAME_MAX];
    int fd = -1;
    size_t nb_write = 0;
    size_t length_to_write = 0;
    off_t bins_file_offset = 0;
    uint16_t rozofs_max_psize = 0;
    uint8_t write_file_hdr = 0;
    struct stat sb;
    
    rozofs_max_psize = rozofs_get_max_psize(layout);

    // Build the full path of directory that contains the bins file
    storage_map_distribution(st, layout, dist_set, spare, path);

    // Check that this directory already exists, otherwise it will be create
    if (access(path, F_OK) == -1) {
        if (errno == ENOENT) {
            // If the directory doesn't exist, create it
            if (mkdir(path, ROZOFS_ST_DIR_MODE) != 0) {
	      if (errno != EEXIST) { 
	        // The directory is not created !!!
                severe("mkdir failed (%s) : %s", path, strerror(errno));
                goto out;
	      }	
	      // Well someone else has created the directory in the meantime
            }
        } else {
            goto out;
        }
    }

    // Build the path of bins file
    storage_map_projection(fid, path);

    // Check that this file already exists
    if (access(path, F_OK) == -1)
        write_file_hdr = 1; // We must write the header

    // Open bins file
    fd = open(path, ROZOFS_ST_BINS_FILE_FLAG, ROZOFS_ST_BINS_FILE_MODE);
    if (fd < 0) {
        severe("open failed (%s) : %s", path, strerror(errno));
        goto out;
    }

    // If we write the bins file for the first time, we must write the header
    if (write_file_hdr) {
        // Prepare file header
        rozofs_stor_bins_file_hdr_t file_hdr;
        memcpy(file_hdr.dist_set_current, dist_set,
                ROZOFS_SAFE_MAX * sizeof (sid_t));
        memset(file_hdr.dist_set_next, 0, ROZOFS_SAFE_MAX * sizeof (sid_t));
        file_hdr.layout = layout;
        file_hdr.version = version;

        // Write the header for this bins file
        nb_write = pwrite(fd, &file_hdr, sizeof (file_hdr), 0);
        if (nb_write != sizeof (file_hdr)) {
            severe("pwrite failed: %s", strerror(errno));
            goto out;
        }
    }

    // Compute the offset and length to write
    
    bins_file_offset = ROZOFS_ST_BINS_FILE_HDR_SIZE + bid * (rozofs_max_psize *
            sizeof (bin_t) + sizeof (rozofs_stor_bins_hdr_t));
    length_to_write = nb_proj * (rozofs_max_psize * sizeof (bin_t)
            + sizeof (rozofs_stor_bins_hdr_t));

    // Write nb_proj * (projection + header)
    nb_write = pwrite(fd, bins, length_to_write, bins_file_offset);
    if (nb_write != length_to_write) {
        severe("pwrite failed: %s", strerror(errno));
        goto out;
    }
    /**
    * insert in the fid cache the written section
    */
//    storage_build_ts_table_from_prj_header((char*)bins,nb_proj,rozofs_max_psize,buf_ts_storage_write);
//    storio_cache_insert(fid,bid,nb_proj,buf_ts_storage_write,0);
    
    // Stat file for return the size of bins file after the write operation
    if (fstat(fd, &sb) == -1) {
        severe("fstat failed: %s", strerror(errno));
        goto out;
    }

    *file_size = sb.st_size;


    // Write is successful
    status = length_to_write;

out:
    if (fd != -1) close(fd);
    return status;
}
Пример #12
0
int read_data_file() {
    int status = -1;
    uint64_t size = 0;
    int block_idx = 0;
    int idx =0;
    int count;
    rozofs_stor_bins_hdr_t * rozofs_bins_hdr_p;
    rozofs_stor_bins_footer_t * rozofs_bins_foot_p;
    char * loc_read_bins_p = NULL;
    int      forward = rozofs_get_rozofs_forward(layout);
//    int      inverse = rozofs_get_rozofs_inverse(layout);
    uint16_t disk_block_size; 
    uint16_t max_block_size = (rozofs_get_max_psize(layout,bsize)*sizeof (bin_t)) 
                            + sizeof (rozofs_stor_bins_hdr_t) + sizeof (rozofs_stor_bins_footer_t);
    char * p;
    int empty,valid;
    int prj_id;
    int      nb_ts;
    uint64_t ts[32];
    int      ts_count[32];
        
    // Allocate memory for reading
    loc_read_bins_p = xmalloc(max_block_size);   

    for (idx=0; idx < nb_file; idx++) {
      if (strcmp(filename[idx],"NULL") == 0) {
        fd[idx] = -1;
      }
      else {
	fd[idx] = open(filename[idx],O_RDWR);
	if (fd < 0) {
	    severe("Can not open file %s %s",filename[idx],strerror(errno));
	    goto out;
	}
      }	
    }
            
    printf (" ______ __________ ");
    for (idx=0; idx < nb_file; idx++) printf (" __________________ ______ ____ ");
    printf ("\n");

    printf("| %4s | %8s |","Blk","Offset");     
    for (idx=0; idx < nb_file; idx++) printf("| %16s | %4s | %2s |", "Time stamp", "lgth", "id");
    printf ("\n");  
    
    printf ("|______|__________|");
    for (idx=0; idx < nb_file; idx++) printf ("|__________________|______|____|");
    printf ("\n"); 
    
    if (block_number == -1) block_idx = 0;
    else                    block_idx = block_number;
    count = 1;
    
    empty = 0;
    while ( count ) {

      valid = 0;
      count = 0;
      
      nb_ts = 0;
      
      p = &LINE[0];
      p += sprintf(p,"| %4d | %8d ",block_idx+firstBlock,(block_idx+firstBlock)*bbytes);

      for (idx=0; idx < nb_file; idx++) {
             
       if (fd[idx] == -1) {
         p += sprintf(p,"%32s"," ");
	 continue;
       }
       
       if (idx >= forward)
          disk_block_size = rozofs_get_max_psize_in_msg(layout, bsize);
       else
          disk_block_size = rozofs_get_psizes_on_disk(layout,bsize,idx);          
       
       size = pread(fd[idx],loc_read_bins_p,disk_block_size,block_idx*disk_block_size);
       
       if (size !=  disk_block_size) {
           p += sprintf(p,"|__________________|______|____|");
	   close(fd[idx]);
	   fd[idx] = -1;        
       }
       else {
         count++;
	 rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t *)loc_read_bins_p;
	 prj_id = rozofs_bins_hdr_p->s.projection_id;
	 
	 if (prj_id >= forward) {
	   valid = 1;
	   p += sprintf(p,"|| xxxxxxxxxxxxxxxx | xxxx | %2d ",prj_id);	     
	 }
	 else {
           disk_block_size = (rozofs_get_psizes(layout,bsize,prj_id)*sizeof (bin_t));
           disk_block_size += sizeof (rozofs_stor_bins_hdr_t);
	   
	   rozofs_bins_foot_p = (rozofs_stor_bins_footer_t *) 
	            ((char*) rozofs_bins_hdr_p + disk_block_size);
           if (rozofs_bins_hdr_p->s.timestamp == 0) {
	     p += sprintf(p,"|| %16d | .... | %2d ",0,prj_id);
	   }		    
	   else if (rozofs_bins_foot_p->timestamp != rozofs_bins_hdr_p->s.timestamp) {
	     valid = 1;
	     p += sprintf(p,"|--%16.16llu----------%2d-", 
	                  (long long unsigned int)rozofs_bins_hdr_p->s.timestamp, 
			  prj_id);	     
	   }
	   else if (rozofs_bins_hdr_p->s.timestamp == 0) {
	     p += sprintf(p,"|| %16d | .... | %2d ",0,prj_id);
	   }
	   else {
	     valid = 1;
	     p += sprintf(p,"|| %16llu | %4d | %2d ",
        	    (unsigned long long)rozofs_bins_hdr_p->s.timestamp,    
        	    rozofs_bins_hdr_p->s.effective_length,    
        	    rozofs_bins_hdr_p->s.projection_id); 

             int tsidx;
	     for (tsidx=0; tsidx< nb_ts; tsidx++) {
	       if (ts[tsidx] == rozofs_bins_hdr_p->s.timestamp) {
	         ts_count[tsidx]++;
		 break;
	       }
	     }		    
	     if (tsidx == nb_ts) {
	       ts[tsidx] = rozofs_bins_hdr_p->s.timestamp;
	       ts_count[tsidx] = 1;
	       nb_ts++;
	     }  
           }
	 }  		  
       }

     }
     
     if (valid) {
       if (empty) {
         printf("... %d blocks...\n",empty);
	 empty = 0;
       }
       int best=-1,tsidx;
       for (tsidx=0; tsidx< nb_ts; tsidx++) {
	 if (ts_count[tsidx] > best) best = ts_count[tsidx];
       }       
       printf("%s%s\n",LINE, (best<forward)?"<<<<----":"|");
        
     }
     else {
       empty++;
     }
     block_idx++;
     if (block_number!=-1) break;
   }  	
   printf ("|______|__________|\n");

   if (block_number!=-1) {
      for (idx=0; idx < nb_file; idx++) {

       if (idx < forward) {	 
         disk_block_size = (rozofs_get_psizes(layout,bsize,idx)*sizeof (bin_t)) + sizeof (rozofs_stor_bins_hdr_t) + sizeof (rozofs_stor_bins_footer_t);
       }	 
       else {
         disk_block_size = (rozofs_get_max_psize(layout,bsize)*sizeof (bin_t)) + sizeof (rozofs_stor_bins_hdr_t) + sizeof (rozofs_stor_bins_footer_t);
       }  
       size = pread(fd[idx],loc_read_bins_p,disk_block_size,block_number*disk_block_size);
       if (size !=  disk_block_size) {
	   printf("Can not read block %d of %s\n", block_number, filename[idx]); 
	   continue;      
       }

       FILE * f;
       char fname[128];
       sprintf(fname,"block_%d_dist_%d.txt", block_number, idx);
       f = fopen(fname,"w");
       if (f == NULL) {
	 printf ("Can not create file %s",fname);
	 continue;
       }
       printf("- %s\n",fname);


       fprintf(f,"%s Block %d size %d\n", filename[idx], block_number, disk_block_size);

       rozofs_bins_hdr_p = (rozofs_stor_bins_hdr_t *)loc_read_bins_p;   	 
       fprintf(f,"Block header : TS %llu SZ %d PRJ %d CRC32 0x%x\n", 
	       (long long unsigned int)rozofs_bins_hdr_p->s.timestamp, 
	       rozofs_bins_hdr_p->s.effective_length,
	       rozofs_bins_hdr_p->s.projection_id,
	       rozofs_bins_hdr_p->s.filler);

       rozofs_bins_foot_p = (rozofs_stor_bins_footer_t *) (loc_read_bins_p + disk_block_size);
       rozofs_bins_foot_p--;
       fprintf(f,"Block footer : TS %llu %s\n", 
	      (long long unsigned int)rozofs_bins_foot_p->timestamp,
	      (rozofs_bins_hdr_p->s.timestamp==rozofs_bins_foot_p->timestamp)?"":" !!!!!!");	 

       hexdump(f,loc_read_bins_p, 0, disk_block_size);   
       fclose(f);		  
     }   
   }
     
   status = 0;
    			      
out:
    // This spare file used to exist but is not needed any more

    for (idx=0; idx < nb_file; idx++) {
      if (fd[idx] != -1) close(fd[idx]);
    }  	
    if (loc_read_bins_p != NULL) {
      //free(loc_read_bins_p);
      loc_read_bins_p = NULL;
    }
    return status;
}