int PVFS_Request_free(PVFS_Request * req) { PVFS_Request reqp; if (req == NULL) { gossip_lerr("PVFS_Request_free: NULL pointer argument\n"); return PVFS_ERR_REQ; } if (*req == NULL) { gossip_lerr("PVFS_Request_free: pointer to NULL pointer argument\n"); return PVFS_ERR_REQ; } if ((*req)->refcount <= 0) { /* if refcount is 0 then it has already been freed */ /* if less than 0 it should not be freed */ /* can't be sure if this is users's variable or not */ gossip_debug(GOSSIP_REQUEST_DEBUG, "don't free special request\n"); return PVFS_SUCCESS; } PINT_REQUEST_REFDEC(*req); if ((*req)->refcount > 0) { /* not ready to free this yet */ *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "don't free referenced request\n"); return PVFS_SUCCESS; } if (PINT_REQUEST_IS_PACKED(*req)) { /* these are contiguous and have no external refs */ free(*req); *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "free packed request\n"); return PVFS_SUCCESS; } /* this deals with the sreq chain */ reqp = (*req)->sreq; while (reqp) { PVFS_Request reqp_next; PVFS_Request_free(&(reqp->ereq)); /* this is a little awkward but it works */ reqp_next = reqp->sreq; free(reqp); gossip_debug(GOSSIP_REQUEST_DEBUG, "free sreq linked request\n"); reqp = reqp_next; } /* now deal with the main struct */ PVFS_Request_free(&((*req)->ereq)); free(*req); *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "free unpacked request\n"); return PVFS_SUCCESS; }
/* This function will take the request that points to all the * contained types, separate out each of the types and then lay them out in a * contiguous region of memory. A pointer to this contiguous region will * then be passed back in the argument */ int PVFS_Request_commit(PVFS_Request * reqp) { PVFS_Request region = NULL; PVFS_Request req; /* check pointer to pointer */ if (reqp == NULL) { gossip_lerr("PVFS_Request_commit: NULL pointer argument\n"); return PVFS_ERR_REQ; } req = *reqp; /* now check the pointer */ if (req == NULL) { gossip_lerr("PVFS_Request_commit: pointer to NULL pointer argument\n"); return PVFS_ERR_REQ; } /* this is a committed request - can't re-commit */ if (PINT_REQUEST_IS_PACKED(req)) { gossip_lerr("PVFS_Request_commit: pointer to commited request\n"); return PVFS_ERR_REQ; } /* Allocate memory for contiguous region */ if (PINT_REQUEST_NEST_SIZE(req) > 0) { region = (PVFS_Request) malloc(PINT_REQUEST_PACK_SIZE(req)); if (region == NULL) { gossip_lerr("PVFS_Request_commit: Memory cannot be allocated\n"); return PVFS_ERR_REQ; } /* pack the request */ PINT_request_commit(region, req); } /* return the pointer to the memory region */ *reqp = region; return PVFS_SUCCESS; }
static int orangefs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { int len = parent ? 10 : 5; int type = 1; struct orangefs_object_kref refn; if (*max_len < len) { gossip_lerr("fh buffer is too small for encoding\n"); *max_len = len; type = 255; goto out; } refn = ORANGEFS_I(inode)->refn; ORANGEFS_khandle_to(&refn.khandle, fh, 16); fh[4] = refn.fs_id; gossip_debug(GOSSIP_SUPER_DEBUG, "Encoding fh: handle %pU, fsid %u\n", &refn.khandle, refn.fs_id); if (parent) { refn = ORANGEFS_I(parent)->refn; ORANGEFS_khandle_to(&refn.khandle, (char *) fh + 20, 16); fh[9] = refn.fs_id; type = 2; gossip_debug(GOSSIP_SUPER_DEBUG, "Encoding parent: handle %pU, fsid %u\n", &refn.khandle, refn.fs_id); } *max_len = len; out: return type; }
/** lebf_decode_rel() * * releases resources consumed while decoding * * no return value */ static void lebf_decode_rel(struct PINT_decoded_msg *msg, enum PINT_encode_msg_type input_type) { gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_decode_rel\n"); if (input_type == PINT_DECODE_REQ) { struct PVFS_server_req *req = &msg->stub_dec.req; switch (req->op) { case PVFS_SERV_CREATE: if (req->u.create.attr.mask & PVFS_ATTR_META_DIST) decode_free(req->u.create.attr.u.meta.dist); if (req->u.create.layout.server_list.servers) decode_free(req->u.create.layout.server_list.servers); break; case PVFS_SERV_BATCH_CREATE: decode_free( req->u.batch_create.handle_extent_array.extent_array); break; case PVFS_SERV_IO: decode_free(req->u.io.io_dist); decode_free(req->u.io.file_req); break; case PVFS_SERV_SMALL_IO: decode_free(req->u.small_io.dist); decode_free(req->u.small_io.file_req); break; case PVFS_SERV_MIRROR: decode_free(req->u.mirror.dist); decode_free(req->u.mirror.dst_handle); decode_free(req->u.mirror.wcIndex); break; case PVFS_SERV_MKDIR: decode_free(req->u.mkdir.handle_extent_array.extent_array); if (req->u.mkdir.attr.mask & PVFS_ATTR_META_DIST) decode_free(req->u.mkdir.attr.u.meta.dist); if (req->u.mkdir.attr.mask & PVFS_ATTR_META_DFILES) decode_free(req->u.mkdir.attr.u.meta.dfile_array); break; case PVFS_SERV_MGMT_DSPACE_INFO_LIST: decode_free(req->u.mgmt_dspace_info_list.handle_array); break; case PVFS_SERV_SETATTR: if (req->u.setattr.attr.mask & PVFS_ATTR_META_DIST) decode_free(req->u.setattr.attr.u.meta.dist); if (req->u.setattr.attr.mask & PVFS_ATTR_META_DFILES) decode_free(req->u.setattr.attr.u.meta.dfile_array); break; case PVFS_SERV_TREE_REMOVE: decode_free(req->u.tree_remove.handle_array); break; case PVFS_SERV_TREE_GET_FILE_SIZE: decode_free(req->u.tree_get_file_size.handle_array); break; case PVFS_SERV_LISTATTR: if (req->u.listattr.handles) decode_free(req->u.listattr.handles); break; case PVFS_SERV_SETEATTR: decode_free(req->u.seteattr.key); decode_free(req->u.seteattr.val); break; case PVFS_SERV_GETEATTR: decode_free(req->u.geteattr.key); decode_free(req->u.geteattr.valsz); break; case PVFS_SERV_GETCONFIG: case PVFS_SERV_LOOKUP_PATH: case PVFS_SERV_REMOVE: case PVFS_SERV_MGMT_REMOVE_OBJECT: case PVFS_SERV_MGMT_REMOVE_DIRENT: case PVFS_SERV_MGMT_GET_DIRDATA_HANDLE: case PVFS_SERV_GETATTR: case PVFS_SERV_CRDIRENT: case PVFS_SERV_RMDIRENT: case PVFS_SERV_CHDIRENT: case PVFS_SERV_TRUNCATE: case PVFS_SERV_READDIR: case PVFS_SERV_FLUSH: case PVFS_SERV_MGMT_SETPARAM: case PVFS_SERV_MGMT_NOOP: case PVFS_SERV_STATFS: case PVFS_SERV_MGMT_ITERATE_HANDLES: case PVFS_SERV_MGMT_PERF_MON: case PVFS_SERV_MGMT_EVENT_MON: case PVFS_SERV_MGMT_GET_UID: case PVFS_SERV_DELEATTR: case PVFS_SERV_LISTEATTR: case PVFS_SERV_BATCH_REMOVE: case PVFS_SERV_UNSTUFF: case PVFS_SERV_IMM_COPIES: /*nothing to free*/ break; case PVFS_SERV_INVALID: case PVFS_SERV_WRITE_COMPLETION: case PVFS_SERV_PERF_UPDATE: case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: case PVFS_SERV_PROTO_ERROR: case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid request operation %d.\n", __func__, req->op); break; } } else if (input_type == PINT_DECODE_RESP) { struct PVFS_server_resp *resp = &msg->stub_dec.resp; if(resp->status == 0) { switch (resp->op) { case PVFS_SERV_LOOKUP_PATH: { struct PVFS_servresp_lookup_path *lookup = &resp->u.lookup_path; decode_free(lookup->handle_array); decode_free(lookup->attr_array); break; } case PVFS_SERV_READDIR: decode_free(resp->u.readdir.dirent_array); break; case PVFS_SERV_MGMT_PERF_MON: decode_free(resp->u.mgmt_perf_mon.perf_array); break; case PVFS_SERV_MGMT_ITERATE_HANDLES: decode_free(resp->u.mgmt_iterate_handles.handle_array); break; case PVFS_SERV_BATCH_CREATE: decode_free(resp->u.batch_create.handle_array); break; case PVFS_SERV_CREATE: decode_free(resp->u.create.datafile_handles); break; case PVFS_SERV_MGMT_DSPACE_INFO_LIST: decode_free(resp->u.mgmt_dspace_info_list.dspace_info_array); break; case PVFS_SERV_GETATTR: if (resp->u.getattr.attr.mask & PVFS_ATTR_META_DIST) decode_free(resp->u.getattr.attr.u.meta.dist); if (resp->u.getattr.attr.mask & PVFS_ATTR_META_DFILES) decode_free(resp->u.getattr.attr.u.meta.dfile_array); if ( resp->u.getattr.attr.mask & PVFS_ATTR_META_MIRROR_DFILES ) decode_free (resp->u.getattr.attr.u.meta.mirror_dfile_array); break; case PVFS_SERV_UNSTUFF: if (resp->u.unstuff.attr.mask & PVFS_ATTR_META_DIST) decode_free(resp->u.unstuff.attr.u.meta.dist); if (resp->u.unstuff.attr.mask & PVFS_ATTR_META_DFILES) { decode_free(resp->u.unstuff.attr.u.meta.dfile_array); } if ( resp->u.unstuff.attr.mask & PVFS_ATTR_META_MIRROR_DFILES ) decode_free (resp->u.unstuff.attr.u.meta.mirror_dfile_array); break; case PVFS_SERV_MGMT_EVENT_MON: decode_free(resp->u.mgmt_event_mon.event_array); break; case PVFS_SERV_GETEATTR: /** need a loop here? WBL */ if (resp->u.geteattr.val) decode_free(resp->u.geteattr.val); break; case PVFS_SERV_LISTEATTR: if (resp->u.listeattr.key) decode_free(resp->u.listeattr.key); break; case PVFS_SERV_LISTATTR: { int i; if (resp->u.listattr.error) decode_free(resp->u.listattr.error); if (resp->u.listattr.attr) { for (i = 0; i < resp->u.listattr.nhandles; i++) { if (resp->u.listattr.attr[i].mask & PVFS_ATTR_META_DIST) decode_free(resp->u.listattr.attr[i].u.meta.dist); if (resp->u.listattr.attr[i].mask & PVFS_ATTR_META_DFILES) { decode_free( resp->u.listattr.attr[i].u.meta.dfile_array); } if( resp->u.listattr.attr[i].mask & PVFS_ATTR_META_MIRROR_DFILES ) decode_free( resp->u.listattr.attr[i].u.meta.mirror_dfile_array); }/*end for*/ decode_free(resp->u.listattr.attr); }/*end if attr*/ break; }/*end case*/ case PVFS_SERV_MIRROR: { decode_free(resp->u.mirror.bytes_written); decode_free(resp->u.mirror.write_status_code); break; } case PVFS_SERV_TREE_GET_FILE_SIZE: { decode_free(resp->u.tree_get_file_size.size); decode_free(resp->u.tree_get_file_size.error); break; } case PVFS_SERV_MGMT_GET_UID: { decode_free(resp->u.mgmt_get_uid.uid_info_array); break; } case PVFS_SERV_GETCONFIG: case PVFS_SERV_REMOVE: case PVFS_SERV_MGMT_REMOVE_OBJECT: case PVFS_SERV_MGMT_REMOVE_DIRENT: case PVFS_SERV_MGMT_GET_DIRDATA_HANDLE: case PVFS_SERV_IO: case PVFS_SERV_SMALL_IO: case PVFS_SERV_SETATTR: case PVFS_SERV_SETEATTR: case PVFS_SERV_DELEATTR: case PVFS_SERV_CRDIRENT: case PVFS_SERV_RMDIRENT: case PVFS_SERV_CHDIRENT: case PVFS_SERV_TRUNCATE: case PVFS_SERV_MKDIR: case PVFS_SERV_FLUSH: case PVFS_SERV_MGMT_SETPARAM: case PVFS_SERV_MGMT_NOOP: case PVFS_SERV_STATFS: case PVFS_SERV_WRITE_COMPLETION: case PVFS_SERV_PROTO_ERROR: case PVFS_SERV_BATCH_REMOVE: case PVFS_SERV_IMM_COPIES: case PVFS_SERV_TREE_REMOVE: /*nothing to free */ break; case PVFS_SERV_INVALID: case PVFS_SERV_PERF_UPDATE: case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid response operation %d.\n", __func__, resp->op); break; } } } }
/** lebf_decode_resp() * * decodes a response structure * * returns 0 on success, -errno on failure */ static int lebf_decode_resp( void *input_buffer, int input_size, struct PINT_decoded_msg *target_msg, PVFS_BMI_addr_t target_addr) { int ret = 0; char *ptr = input_buffer; char **p = &ptr; struct PVFS_server_resp *resp = &target_msg->stub_dec.resp; target_msg->buffer = resp; /** decode generic part of response (including op number) */ decode_PVFS_server_resp(p, resp); gossip_debug(GOSSIP_ENDECODE_DEBUG,"lebf_decode_resp\n"); if (resp->status != 0) goto out; #define CASE(tag,var) \ case tag: decode_PVFS_servresp_##var(p,&resp->u.var); break switch (resp->op) { /** call standard function defined in headers */ CASE(PVFS_SERV_GETCONFIG, getconfig); CASE(PVFS_SERV_LOOKUP_PATH, lookup_path); CASE(PVFS_SERV_CREATE, create); CASE(PVFS_SERV_MIRROR, mirror); CASE(PVFS_SERV_UNSTUFF, unstuff); CASE(PVFS_SERV_BATCH_CREATE, batch_create); CASE(PVFS_SERV_IO, io); CASE(PVFS_SERV_SMALL_IO, small_io); CASE(PVFS_SERV_GETATTR, getattr); CASE(PVFS_SERV_RMDIRENT, rmdirent); CASE(PVFS_SERV_CHDIRENT, chdirent); CASE(PVFS_SERV_MKDIR, mkdir); CASE(PVFS_SERV_READDIR, readdir); CASE(PVFS_SERV_STATFS, statfs); CASE(PVFS_SERV_MGMT_PERF_MON, mgmt_perf_mon); CASE(PVFS_SERV_MGMT_ITERATE_HANDLES, mgmt_iterate_handles); CASE(PVFS_SERV_MGMT_DSPACE_INFO_LIST, mgmt_dspace_info_list); CASE(PVFS_SERV_MGMT_EVENT_MON, mgmt_event_mon); CASE(PVFS_SERV_MGMT_GET_DIRDATA_HANDLE, mgmt_get_dirdata_handle); CASE(PVFS_SERV_WRITE_COMPLETION, write_completion); CASE(PVFS_SERV_GETEATTR, geteattr); CASE(PVFS_SERV_LISTEATTR, listeattr); CASE(PVFS_SERV_LISTATTR, listattr); CASE(PVFS_SERV_TREE_GET_FILE_SIZE, tree_get_file_size); CASE(PVFS_SERV_MGMT_GET_UID, mgmt_get_uid); case PVFS_SERV_REMOVE: case PVFS_SERV_BATCH_REMOVE: case PVFS_SERV_MGMT_REMOVE_OBJECT: case PVFS_SERV_MGMT_REMOVE_DIRENT: case PVFS_SERV_TREE_REMOVE: case PVFS_SERV_SETATTR: case PVFS_SERV_SETEATTR: case PVFS_SERV_DELEATTR: case PVFS_SERV_CRDIRENT: case PVFS_SERV_TRUNCATE: case PVFS_SERV_FLUSH: case PVFS_SERV_MGMT_NOOP: case PVFS_SERV_PROTO_ERROR: case PVFS_SERV_IMM_COPIES: case PVFS_SERV_MGMT_SETPARAM: /** nothing else */ break; case PVFS_SERV_INVALID: case PVFS_SERV_PERF_UPDATE: case PVFS_SERV_PRECREATE_POOL_REFILLER: case PVFS_SERV_JOB_TIMER: case PVFS_SERV_NUM_OPS: /** sentinel */ gossip_lerr("%s: invalid operation %d.\n", __func__, resp->op); ret = -PVFS_EPROTO; goto out; } #undef CASE if (ptr != (char *) input_buffer + input_size) { gossip_lerr("%s: op %d consumed %ld bytes, but message was %d bytes.\n", __func__, resp->op, (long)(ptr - (char *) input_buffer), input_size); ret = -PVFS_EPROTO; } out: return(ret); }
/** Initiate modification of attributes of a single object. */ PVFS_error PVFS_isys_setattr( PVFS_object_ref ref, PVFS_sys_attr attr, const PVFS_credentials *credentials, PVFS_sys_op_id *op_id, PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_setattr entered\n"); if ((ref.handle == PVFS_HANDLE_NULL) || (ref.fs_id == PVFS_FS_ID_NULL)) { gossip_err("invalid (NULL) required argument\n"); return ret; } /* * make sure the caller didn't set invalid mask bits. * only common attributes can be set. */ if ((attr.mask & ~PVFS_ATTR_SYS_ALL_TIMES) != 0) { gossip_lerr("PVFS_isys_setattr() failure: invalid attributes " "specified\n"); return ret; } /* make sure that the permission bits are acceptable */ if ((attr.mask & PVFS_ATTR_SYS_PERM) && (attr.perms & ~PVFS_PERM_VALID) != 0) { gossip_lerr("PVFS_isys_setattr() failure: invalid or unsupported" "permission bits\n"); return(-PVFS_EINVAL); } PINT_smcb_alloc(&smcb, PVFS_SYS_SETATTR, sizeof(struct PINT_client_sm), client_op_state_get_machine, client_state_machine_terminate, pint_client_sm_context); if (smcb == NULL) { return -PVFS_ENOMEM; } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->object_ref = ref; PVFS_hint_copy(hints, &sm_p->hints); PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &ref.handle); ret = PVFS_util_copy_sys_attr(&sm_p->u.setattr.sys_attr, &attr); if(ret < 0) { gossip_lerr("PVFS_isys_setattr() failure: %s\n", strerror(PVFS_get_errno_mapping(-ret))); return ret; } gossip_debug(GOSSIP_CLIENT_DEBUG, "Doing setattr on handle %llu " "on fs %d\n", llu(ref.handle), ref.fs_id); return PINT_client_state_machine_post( smcb, op_id, user_ptr); }
/* * Function: io_send_completion_ack() * * Params: server_op *s_op, * job_status_s* js_p * * Pre: flow is completed so that we can report its status * * Post: if this is a write, response has been sent to client * if this is a read, do nothing * * Returns: int * * Synopsis: fills in a response to the I/O request, encodes it, * and sends it to the client via BMI. Note that it may * send either positive or negative acknowledgements. * */ static PINT_sm_action io_send_completion_ack( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int err = -PVFS_EIO; job_id_t tmp_id; struct server_configuration_s *user_opts = get_server_config_struct(); gossip_debug(GOSSIP_SERVER_DEBUG, "send completion ack 1 :%lld\n", lld(s_op->u.migposter.flow_desc->total_transferred)); /* release encoding of the first ack that we sent */ PINT_encode_release(&s_op->encoded, PINT_ENCODE_RESP); /* zero size for safety */ s_op->encoded.total_size = 0; /* fill in response -- status field is the only generic one we should have to set */ s_op->resp.op = PVFS_SERV_WRITE_COMPLETION; /* not IO */ s_op->resp.status = js_p->error_code; s_op->resp.u.write_completion.total_completed = 1111; // s_op->u.migposter.flow_desc->total_transferred; gossip_debug(GOSSIP_LB_DEBUG, "Server->send flow completion ack :%lld\n", s_op->resp.u.write_completion.total_completed); err = PINT_encode( &s_op->resp, PINT_ENCODE_RESP, &(s_op->encoded), s_op->addr, s_op->decoded.enc_type); if (err < 0) { gossip_lerr("Server: IO SM: PINT_encode() failure.\n"); js_p->error_code = err; return SM_ACTION_COMPLETE; } gossip_debug(GOSSIP_SERVER_DEBUG, "send completion ack 3 :%lld\n", lld(s_op->u.migposter.flow_desc->total_transferred)); err = job_bmi_send_list( s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list, s_op->encoded.list_count, s_op->encoded.total_size, 5, s_op->encoded.buffer_type, 0, smcb, 0, js_p, &tmp_id, server_job_context, user_opts->client_job_bmi_timeout,NULL); gossip_debug(GOSSIP_SERVER_DEBUG, "job_bmi_send_list: err=%d\n", err); return err; }
/* * Function: io_send_ack() * * Params: server_op *s_op, * job_status_s* js_p * * Pre: error code has been set in job status for us to * report to client * * Post: response has been sent to client * * Returns: int * * Synopsis: fills in a response to the I/O request, encodes it, * and sends it to the client via BMI. Note that it may * send either positive or negative acknowledgements. * */ static int io_send_ack( struct PINT_smcb *smcb, job_status_s *js_p) { struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int err = -PVFS_EIO; job_id_t tmp_id; struct server_configuration_s *user_opts = get_server_config_struct(); gossip_debug(GOSSIP_LB_DEBUG, "new df handle %lld with md handle %lld", new_handle, set_df_attr.metafile); /* this is where we report the file size to the client before * starting the I/O transfer, or else report an error if we * failed to get the size, or failed for permission reasons */ s_op->resp.status = js_p->error_code; s_op->resp.u.migposter.handle = new_handle; /* * record the handle for new data file * to escape the load counter for this handle */ mig_datafile_handle = new_handle; gossip_debug(GOSSIP_SERVER_DEBUG, "nio_send_ack error_code = %d\n new handle = %llu\n", js_p->error_code, llu(new_handle)); err = PINT_encode(&s_op->resp, PINT_ENCODE_RESP, &(s_op->encoded), s_op->addr, s_op->decoded.enc_type); //gossip_debug(GOSSIP_LB_DEBUG, "\n\nThe enc type is :%d \n\n",s_op->decoded.enc_type); if (err < 0) { gossip_lerr("Server: migrate server SM: PINT_encode() failure.\n"); js_p->error_code = err; return SM_ACTION_COMPLETE; } err = job_bmi_send_list( s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list, s_op->encoded.list_count, s_op->encoded.total_size, s_op->tag, s_op->encoded.buffer_type, 0, smcb, 0, js_p, &tmp_id, server_job_context, user_opts->server_job_bmi_timeout, NULL); /* gossip_debug(GOSSIP_LB_DEBUG, */ /* "send status:%d, %d, %llu\n", */ /* err, js_p->error_code, */ /* llu(s_op->resp.u.migposter.handle)); */ // js_p->error_code = 0; return err; }
static void aio_progress_notification(union sigval sig) { dbpf_queued_op_t *cur_op = NULL; struct dbpf_op *op_p = NULL; int ret, i, aiocb_inuse_count, state = 0; struct aiocb *aiocb_p = NULL, *aiocb_ptr_array[AIOCB_ARRAY_SZ] = {0}; PVFS_size eor = -1; int j; TROVE_ds_attributes attr; TROVE_object_ref ref; int sync_required = 0; cur_op = (dbpf_queued_op_t *)sig.sival_ptr; assert(cur_op); op_p = &cur_op->op; assert(op_p); gossip_debug( GOSSIP_TROVE_DEBUG," --- aio_progress_notification called " "with handle %llu (%p)\n", llu(op_p->handle), cur_op); aiocb_p = op_p->u.b_rw_list.aiocb_array; assert(aiocb_p); gen_mutex_lock(&cur_op->mutex); state = cur_op->op.state; gen_mutex_unlock(&cur_op->mutex); assert(state != OP_COMPLETED); /* we should iterate through the ops here to determine the error/return value of the op based on individual request error/return values. they're ignored for now, however. */ for (i = 0; i < op_p->u.b_rw_list.aiocb_array_count; i++) { if (aiocb_p[i].aio_lio_opcode == LIO_NOP) { continue; } /* aio_error gets the "errno" value of the individual op */ ret = op_p->u.b_rw_list.aio_ops->aio_error(&aiocb_p[i]); if (ret == 0) { /* aio_return gets the return value of the individual op */ ret = op_p->u.b_rw_list.aio_ops->aio_return(&aiocb_p[i]); gossip_debug(GOSSIP_TROVE_DEBUG, "%s: %s complete: " "aio_return() says %d [fd = %d]\n", __func__, dbpf_op_type_to_str(op_p->type), ret, op_p->u.b_rw_list.fd); *(op_p->u.b_rw_list.out_size_p) += ret; /* mark as a NOP so we ignore it from now on */ aiocb_p[i].aio_lio_opcode = LIO_NOP; } else { gossip_debug(GOSSIP_TROVE_DEBUG, "error %d (%s) from " "aio_error/aio_return on block %d; " "skipping\n", ret, strerror(ret), i); ret = -trove_errno_to_trove_error(ret); goto final_threaded_aio_cleanup; } } if (op_p->u.b_rw_list.list_proc_state == LIST_PROC_ALLPOSTED) { ret = 0; final_threaded_aio_cleanup: if ((op_p->type == BSTREAM_WRITE_AT) || (op_p->type == BSTREAM_WRITE_LIST)) { DBPF_AIO_SYNC_IF_NECESSARY( op_p, op_p->u.b_rw_list.fd, ret); /* TODO: need similar logic for non-threaded aio case too */ /* calculate end of request */ for(j=0; j<op_p->u.b_rw_list.stream_array_count; j++) { if(eor < op_p->u.b_rw_list.stream_offset_array[j] + op_p->u.b_rw_list.stream_size_array[j]) { eor = op_p->u.b_rw_list.stream_offset_array[j] + op_p->u.b_rw_list.stream_size_array[j]; } } ref.fs_id = op_p->coll_p->coll_id; ref.handle = op_p->handle; gen_mutex_lock(&dbpf_update_size_lock); ret = dbpf_dspace_attr_get(op_p->coll_p, ref, &attr); if(ret != 0) { gen_mutex_unlock(&dbpf_update_size_lock); goto error_in_cleanup; } if(eor > attr.u.datafile.b_size) { /* set the size of the file */ attr.u.datafile.b_size = eor; ret = dbpf_dspace_attr_set(op_p->coll_p, ref, &attr); if(ret != 0) { gen_mutex_unlock(&dbpf_update_size_lock); goto error_in_cleanup; } if(op_p->flags & TROVE_SYNC) { sync_required = 1; } } gen_mutex_unlock(&dbpf_update_size_lock); } error_in_cleanup: dbpf_open_cache_put(&op_p->u.b_rw_list.open_ref); op_p->u.b_rw_list.fd = -1; cur_op->state = ret; /* this is a macro defined in dbpf-thread.h */ if(sync_required) { int outcount; gossip_debug(GOSSIP_TROVE_DEBUG, "aio updating size for handle %llu\n", llu(ref.handle)); /* If we updated the size, then convert cur_op into a setattr. * Note that we are not actually going to perform a setattr. * We just want the coalescing path to treat it like a setattr * so that the size update is synced before we complete. */ /* We need to free the aiocb_array in this case, since the * dbpf_queued_op_free function won't know to do that anymore */ free(cur_op->op.u.b_rw_list.aiocb_array); cur_op->op.u.b_rw_list.aiocb_array = NULL; dbpf_queued_op_init(cur_op, DSPACE_SETATTR, ref.handle, cur_op->op.coll_p, dbpf_dspace_setattr_op_svc, cur_op->op.user_ptr, TROVE_SYNC, cur_op->op.context_id); cur_op->op.state = OP_IN_SERVICE; dbpf_sync_coalesce(cur_op, 0, &outcount); } else { dbpf_queued_op_complete(cur_op, OP_COMPLETED); } gossip_debug(GOSSIP_TROVE_DEBUG, "*** starting delayed ops if any " "(state is %s)\n", list_proc_state_strings[ op_p->u.b_rw_list.list_proc_state]); start_delayed_ops_if_any(1); } else { gossip_debug(GOSSIP_TROVE_DEBUG, "*** issuing more aio requests " "(state is %s)\n", list_proc_state_strings[ op_p->u.b_rw_list.list_proc_state]); /* no operations in progress; convert and post some more */ op_p->u.b_rw_list.aiocb_array_count = AIOCB_ARRAY_SZ; op_p->u.b_rw_list.aiocb_array = aiocb_p; /* convert listio arguments into aiocb structures */ aiocb_inuse_count = op_p->u.b_rw_list.aiocb_array_count; ret = dbpf_bstream_listio_convert( op_p->u.b_rw_list.fd, op_p->u.b_rw_list.opcode, op_p->u.b_rw_list.mem_offset_array, op_p->u.b_rw_list.mem_size_array, op_p->u.b_rw_list.mem_array_count, op_p->u.b_rw_list.stream_offset_array, op_p->u.b_rw_list.stream_size_array, op_p->u.b_rw_list.stream_array_count, aiocb_p, &aiocb_inuse_count, &op_p->u.b_rw_list.lio_state); if (ret == 1) { op_p->u.b_rw_list.list_proc_state = LIST_PROC_ALLCONVERTED; } op_p->u.b_rw_list.sigev.sigev_notify = SIGEV_THREAD; op_p->u.b_rw_list.sigev.sigev_notify_attributes = NULL; op_p->u.b_rw_list.sigev.sigev_notify_function = aio_progress_notification; op_p->u.b_rw_list.sigev.sigev_value.sival_ptr = (void *)cur_op; /* mark the unused with LIO_NOPs */ for(i = aiocb_inuse_count; i < op_p->u.b_rw_list.aiocb_array_count; i++) { /* mark these as NOPs and we'll ignore them */ aiocb_p[i].aio_lio_opcode = LIO_NOP; } for(i = 0; i < aiocb_inuse_count; i++) { aiocb_ptr_array[i] = &aiocb_p[i]; } assert(cur_op == op_p->u.b_rw_list.sigev.sigev_value.sival_ptr); if (op_p->u.b_rw_list.list_proc_state == LIST_PROC_ALLCONVERTED) { op_p->u.b_rw_list.list_proc_state = LIST_PROC_ALLPOSTED; } ret = issue_or_delay_io_operation( cur_op, aiocb_ptr_array, aiocb_inuse_count, &op_p->u.b_rw_list.sigev, 1); if (ret) { gossip_lerr("issue_or_delay_io_operation() returned " "%d\n", ret); } } }
static int issue_or_delay_io_operation( dbpf_queued_op_t *cur_op, struct aiocb **aiocb_ptr_array, int aiocb_inuse_count, struct sigevent *sig, int dec_first) { int ret = -TROVE_EINVAL, op_delayed = 0; int i; assert(cur_op); gen_mutex_lock(&s_dbpf_io_mutex); if (dec_first) { s_dbpf_ios_in_progress--; } if (s_dbpf_ios_in_progress < TROVE_max_concurrent_io) { s_dbpf_ios_in_progress++; } else { if (s_dbpf_io_ready_queue == NULL) { s_dbpf_io_ready_queue = dbpf_op_queue_new(); if (!s_dbpf_io_ready_queue) { return -TROVE_ENOMEM; } } assert(s_dbpf_io_ready_queue); dbpf_op_queue_add(s_dbpf_io_ready_queue, cur_op); op_delayed = 1; #ifndef __PVFS2_TROVE_AIO_THREADED__ /* setting this state flag tells the caller not to re-add this operation to the normal dbpf-op queue because it will be started automatically (internally) on completion of other I/O operations */ gen_mutex_lock(&cur_op->mutex); cur_op->op.state = OP_INTERNALLY_DELAYED; gen_mutex_unlock(&cur_op->mutex); #endif gossip_debug(GOSSIP_TROVE_DEBUG, "delayed I/O operation %p " "(%d already in progress)\n", cur_op, s_dbpf_ios_in_progress); } gossip_debug(GOSSIP_TROVE_DEBUG, "DBPF I/O ops in progress: %d\n", s_dbpf_ios_in_progress); gen_mutex_unlock(&s_dbpf_io_mutex); if (!op_delayed) { if(gossip_debug_enabled(GOSSIP_TROVE_DEBUG)) { gossip_debug(GOSSIP_TROVE_DEBUG, "lio_listio called with the following aiocbs:\n"); for(i=0; i<aiocb_inuse_count; i++) { gossip_debug(GOSSIP_TROVE_DEBUG, "aiocb_ptr_array[%d]: fd: %d, " "off: %lld, bytes: %d, buf: %p, type: %d\n", i, aiocb_ptr_array[i]->aio_fildes, lld(aiocb_ptr_array[i]->aio_offset), (int)aiocb_ptr_array[i]->aio_nbytes, aiocb_ptr_array[i]->aio_buf, (int)aiocb_ptr_array[i]->aio_lio_opcode); } } ret = cur_op->op.u.b_rw_list.aio_ops->lio_listio( LIO_NOWAIT, aiocb_ptr_array, aiocb_inuse_count, sig); if (ret != 0) { s_dbpf_ios_in_progress--; gossip_lerr("lio_listio() returned %d\n", ret); dbpf_open_cache_put(&cur_op->op.u.b_rw_list.open_ref); return -trove_errno_to_trove_error(errno); } gossip_debug(GOSSIP_TROVE_DEBUG, "%s: lio_listio posted %p " "(handle %llu, ret %d)\n", __func__, cur_op, llu(cur_op->op.handle), ret); } return 0; }
static void start_delayed_ops_if_any(int dec_first) { int ret = 0; dbpf_queued_op_t *cur_op = NULL; int i = 0, aiocb_inuse_count = 0; struct aiocb *aiocbs = NULL, *aiocb_ptr_array[AIOCB_ARRAY_SZ] = {0}; gen_mutex_lock(&s_dbpf_io_mutex); if (dec_first) { s_dbpf_ios_in_progress--; } gossip_debug(GOSSIP_TROVE_DEBUG, "DBPF I/O ops in progress: %d\n", s_dbpf_ios_in_progress); if (s_dbpf_io_ready_queue == NULL) { s_dbpf_io_ready_queue = dbpf_op_queue_new(); } assert(s_dbpf_io_ready_queue); if (!dbpf_op_queue_empty(s_dbpf_io_ready_queue)) { cur_op = dbpf_op_queue_shownext(s_dbpf_io_ready_queue); assert(cur_op); #ifndef __PVFS2_TROVE_AIO_THREADED__ assert(cur_op->op.state == OP_INTERNALLY_DELAYED); #endif assert((cur_op->op.type == BSTREAM_READ_AT) || (cur_op->op.type == BSTREAM_READ_LIST) || (cur_op->op.type == BSTREAM_WRITE_AT) || (cur_op->op.type == BSTREAM_WRITE_LIST)); dbpf_op_queue_remove(cur_op); gossip_debug(GOSSIP_TROVE_DEBUG, "starting delayed I/O " "operation %p (%d in progress)\n", cur_op, s_dbpf_ios_in_progress); aiocbs = cur_op->op.u.b_rw_list.aiocb_array; assert(aiocbs); for(i = 0; i < AIOCB_ARRAY_SZ; i++) { if (aiocbs[i].aio_lio_opcode != LIO_NOP) { aiocb_inuse_count++; } } for(i = 0; i < aiocb_inuse_count; i++) { aiocb_ptr_array[i] = &aiocbs[i]; } if(gossip_debug_enabled(GOSSIP_TROVE_DEBUG)) { gossip_debug(GOSSIP_TROVE_DEBUG, "lio_listio called with %d following aiocbs:\n", aiocb_inuse_count); for(i=0; i<aiocb_inuse_count; i++) { gossip_debug( GOSSIP_TROVE_DEBUG, "aiocb_ptr_array[%d]: fd: %d, off: %lld, " "bytes: %d, buf: %p, type: %d\n", i, aiocb_ptr_array[i]->aio_fildes, lld(aiocb_ptr_array[i]->aio_offset), (int)aiocb_ptr_array[i]->aio_nbytes, aiocb_ptr_array[i]->aio_buf, (int)aiocb_ptr_array[i]->aio_lio_opcode); } } ret = cur_op->op.u.b_rw_list.aio_ops->lio_listio( LIO_NOWAIT, aiocb_ptr_array, aiocb_inuse_count, &cur_op->op.u.b_rw_list.sigev); if (ret != 0) { gossip_lerr("lio_listio() returned %d\n", ret); dbpf_open_cache_put(&cur_op->op.u.b_rw_list.open_ref); goto error_exit; } s_dbpf_ios_in_progress++; gossip_debug(GOSSIP_TROVE_DEBUG, "%s: lio_listio posted %p " "(handle %llu, ret %d))\n", __func__, cur_op, llu(cur_op->op.handle), ret); #ifndef __PVFS2_TROVE_AIO_THREADED__ /* to continue making progress on this previously delayed I/O operation, we need to re-add it back to the normal dbpf operation queue so that the calling thread can continue to call the service method (state flag is updated as well) */ dbpf_queued_op_queue_nolock(cur_op); #endif } error_exit: gen_mutex_unlock(&s_dbpf_io_mutex); }
/** Initiate reading of entries from a directory. * * \param token opaque value used to track position in directory * when more than one read is required. * \param pvfs_dirent_incount maximum number of entries to read, if * available, starting from token. */ PVFS_error PVFS_isys_readdir( PVFS_object_ref ref, PVFS_ds_position token, int32_t pvfs_dirent_incount, const PVFS_credentials *credentials, PVFS_sysresp_readdir *resp, PVFS_sys_op_id *op_id, PVFS_hint hints, void *user_ptr) { PVFS_error ret = -PVFS_EINVAL; PINT_smcb *smcb = NULL; PINT_client_sm *sm_p = NULL; gossip_debug(GOSSIP_CLIENT_DEBUG, "PVFS_isys_readdir entered\n"); if ((ref.handle == PVFS_HANDLE_NULL) || (ref.fs_id == PVFS_FS_ID_NULL) || (resp == NULL)) { gossip_err("invalid (NULL) required argument\n"); return ret; } if (pvfs_dirent_incount > PVFS_REQ_LIMIT_DIRENT_COUNT) { gossip_lerr("PVFS_isys_readdir unable to handle request " "for %d entries.\n", pvfs_dirent_incount); return ret; } PINT_smcb_alloc(&smcb, PVFS_SYS_READDIR, sizeof(struct PINT_client_sm), client_op_state_get_machine, client_state_machine_terminate, pint_client_sm_context); if (smcb == NULL) { return -PVFS_ENOMEM; } sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); PINT_init_msgarray_params(sm_p, ref.fs_id); PINT_init_sysint_credentials(sm_p->cred_p, credentials); sm_p->u.readdir.readdir_resp = resp; sm_p->object_ref = ref; PVFS_hint_copy(hints, &sm_p->hints); PVFS_hint_add(&sm_p->hints, PVFS_HINT_HANDLE_NAME, sizeof(PVFS_handle), &ref.handle); /* point the sm dirent array and outcount to the readdir response field */ sm_p->readdir.dirent_array = &resp->dirent_array; sm_p->readdir.dirent_outcount = &resp->pvfs_dirent_outcount; sm_p->readdir.token = &resp->token; sm_p->readdir.directory_version = &resp->directory_version; sm_p->readdir.pos_token = sm_p->u.readdir.pos_token = token; sm_p->readdir.dirent_limit = sm_p->u.readdir.dirent_limit = pvfs_dirent_incount; gossip_debug(GOSSIP_READDIR_DEBUG, "Doing readdir on handle " "%llu on fs %d\n", llu(ref.handle), ref.fs_id); return PINT_client_state_machine_post( smcb, op_id, user_ptr); }
/* msgpairarray_post() * * The following elements of the PINT_sm_msgpair_state * should be valid prior to this state (for each msgpair in array): * - req (unencoded request) * - srv_addr of each element in msg array * * This state performs the following operations for each msgpair, * one at a time: * (1) encodes request * (2) calculates maximum response size * (3) allocates BMI memory for response data (encoded) * (4) gets a session tag for the pair of messages * (5) posts the receive of the response * (6) posts the send of the request * (7) stores job ids for later matching * */ static PINT_sm_action msgpairarray_post( struct PINT_smcb *smcb, job_status_s *js_p) { PINT_sm_msgarray_op *mop = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); int ret = -PVFS_EINVAL, i = 0, tmp = 0; struct server_configuration_s *server_config = NULL; PVFS_msg_tag_t session_tag; PINT_sm_msgpair_state *msg_p = NULL; struct filesystem_configuration_s *cur_fs = NULL; int must_loop_encodings = 0; int local_enc_and_alloc = 0; gossip_debug( GOSSIP_MSGPAIR_DEBUG, "%s: sm %p " "%d total message(s) with %d incomplete\n", __func__, smcb, mop->count * 2, mop->params.comp_ct); js_p->error_code = 0; assert(mop->count > 0); assert(mop->params.comp_ct >= 2); for (i = 0; i < mop->count; i++) { msg_p = &mop->msgarray[i]; assert(msg_p); /* here we skip over the msgs that have already completed in the case of being in the retry code path when it's ok */ if (msg_p->complete) { continue; } msg_p->op_status = 0; if (msg_p->encoded_resp_p == NULL) { if (msg_p->fs_id != PVFS_FS_ID_NULL) { server_config = PINT_server_config_mgr_get_config( msg_p->fs_id); assert(server_config); cur_fs = PINT_config_find_fs_id( server_config, msg_p->fs_id); PINT_server_config_mgr_put_config(server_config); assert(cur_fs); msg_p->enc_type = cur_fs->encoding; } if (!ENCODING_IS_VALID(msg_p->enc_type)) { PRINT_ENCODING_ERROR("supported", msg_p->enc_type); must_loop_encodings = 1; msg_p->enc_type = (ENCODING_INVALID_MIN + 1); } else if (!ENCODING_IS_SUPPORTED(msg_p->enc_type)) { PRINT_ENCODING_ERROR("supported", msg_p->enc_type); must_loop_encodings = 1; msg_p->enc_type = ENCODING_SUPPORTED_MIN; } try_next_encoding: assert(ENCODING_IS_VALID(msg_p->enc_type)); ret = PINT_encode(&msg_p->req, PINT_ENCODE_REQ, &msg_p->encoded_req, msg_p->svr_addr, msg_p->enc_type); if (ret != 0) { if (must_loop_encodings) { gossip_debug(GOSSIP_MSGPAIR_DEBUG, "Looping through " "encodings [%d/%d]\n", msg_p->enc_type, ENCODING_INVALID_MAX); msg_p->enc_type++; if (ENCODING_IS_VALID(msg_p->enc_type)) { goto try_next_encoding; } } gossip_lerr("msgpairarray_post: PINT_encode failed\n"); js_p->error_code = ret; return SM_ACTION_COMPLETE; } /* calculate max response msg size and allocate space */ msg_p->max_resp_sz = PINT_encode_calc_max_size( PINT_ENCODE_RESP, msg_p->req.op, msg_p->enc_type); msg_p->encoded_resp_p = BMI_memalloc( msg_p->svr_addr, msg_p->max_resp_sz, BMI_RECV); if (msg_p->encoded_resp_p == NULL) { js_p->error_code = -PVFS_ENOMEM; return SM_ACTION_COMPLETE; } local_enc_and_alloc = 1; } session_tag = PINT_util_get_next_tag(); gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p msgpair %d: " "posting recv\n", __func__, smcb, i); /* post receive of response; job_id stored in recv_id */ ret = job_bmi_recv(msg_p->svr_addr, msg_p->encoded_resp_p, msg_p->max_resp_sz, session_tag, BMI_PRE_ALLOC, smcb, i, &msg_p->recv_status, &msg_p->recv_id, mop->params.job_context, mop->params.job_timeout, msg_p->req.hints); if (ret == 0) { /* perform a quick test to see if the recv failed before posting * the send; if it reports an error quickly then we can save the * confusion of sending a request for which we can't recv a * response */ ret = job_test(msg_p->recv_id, &tmp, NULL, &msg_p->recv_status, 0, mop->params.job_context); } if ((ret < 0) || (ret == 1)) { /* it is impossible for this recv to complete at this point * without errors; we haven't sent the request yet! */ assert(ret < 0 || msg_p->recv_status.error_code != 0); if (ret < 0) { PVFS_perror_gossip("Post of receive failed", ret); } else { PVFS_perror_gossip("Receive immediately failed", msg_p->recv_status.error_code); } msg_p->recv_id = 0; msg_p->send_id = 0; /* mark send as bad too and don't post it */ msg_p->send_status.error_code = msg_p->recv_status.error_code; msg_p->op_status = msg_p->recv_status.error_code; mop->params.comp_ct -= 2; if (local_enc_and_alloc) { PINT_encode_release(&msg_p->encoded_req, PINT_ENCODE_REQ); BMI_memfree(msg_p->svr_addr,msg_p->encoded_resp_p, msg_p->max_resp_sz, BMI_RECV); msg_p->encoded_resp_p = NULL; local_enc_and_alloc = 0; } /* continue to send other array entries if possible */ continue; } /* if we reach here, the recv has been posted without failure, but * has not completed yet */ assert(ret == 0); gossip_debug(GOSSIP_MSGPAIR_DEBUG, "%s: sm %p msgpair %d: " "posting send\n", __func__, smcb, i); /* post send of request; job_id stored in send_id */ ret = job_bmi_send_list(msg_p->encoded_req.dest, msg_p->encoded_req.buffer_list, msg_p->encoded_req.size_list, msg_p->encoded_req.list_count, msg_p->encoded_req.total_size, session_tag, msg_p->encoded_req.buffer_type, 1, smcb, mop->count+i, &msg_p->send_status, &msg_p->send_id, mop->params.job_context, mop->params.job_timeout, msg_p->req.hints); if ((ret < 0) || ((ret == 1) && (msg_p->send_status.error_code != 0))) { if (ret < 0) { PVFS_perror_gossip("Post of send failed", ret); } else { PVFS_perror_gossip("Send immediately failed", msg_p->send_status.error_code); } gossip_err_unless_quiet("Send error: cancelling recv.\n"); job_bmi_cancel(msg_p->recv_id, mop->params.job_context); /* we still have to wait for recv completion, so just decrement * comp_ct by one and keep going */ msg_p->op_status = msg_p->send_status.error_code; msg_p->send_id = 0; mop->params.comp_ct--; } else if (ret == 1) { /* immediate completion */ msg_p->send_id = 0; /* decrement our count, since send is already done. */ mop->params.comp_ct--; } /* else: successful post, no immediate completion */ } if (mop->params.comp_ct == 0) { /* everything is completed already (could happen in some failure * cases); jump straight to final completion function. */ js_p->error_code = MSGPAIRS_COMPLETE; return SM_ACTION_COMPLETE; } /* we are still waiting on operations to complete, next state * transition will handle them */ return SM_ACTION_DEFERRED; }
static struct dentry *pvfs2_lookup( struct inode *dir, struct dentry *dentry, struct nameidata *nd) #endif { int ret = -EINVAL; struct inode *inode = NULL; pvfs2_kernel_op_t *new_op = NULL; pvfs2_inode_t *parent = NULL, *found_pvfs2_inode = NULL; struct super_block *sb = NULL; /* in theory we could skip a lookup here (if the intent is to create) in order to avoid a potentially failed lookup, but leaving it in can skip a valid lookup and try to create a file that already exists (e.g. the vfs already handles checking for -EEXIST on O_EXCL opens, which is broken if we skip this lookup in the create path) */ gossip_debug(GOSSIP_NAME_DEBUG, "pvfs2_lookup called on %s\n", dentry->d_name.name); if (dentry->d_name.len > (PVFS2_NAME_LEN-1)) { return ERR_PTR(-ENAMETOOLONG); } new_op = op_alloc(PVFS2_VFS_OP_LOOKUP); if (!new_op) { return ERR_PTR(-ENOMEM); } #ifdef PVFS2_LINUX_KERNEL_2_4 new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW; #else /* if we're at a symlink, should we follow it? never attempt to follow negative dentries */ new_op->upcall.req.lookup.sym_follow = ((nd && (nd->flags & LOOKUP_FOLLOW) && (dentry->d_inode != NULL)) ? PVFS2_LOOKUP_LINK_FOLLOW : PVFS2_LOOKUP_LINK_NO_FOLLOW); #endif if (dir) { sb = dir->i_sb; parent = PVFS2_I(dir); if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL) { gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %llu\n", __FILE__, __func__, __LINE__, llu(parent->refn.handle)); new_op->upcall.req.lookup.parent_refn = parent->refn; } else { #if defined(HAVE_IGET4_LOCKED) || defined(HAVE_IGET5_LOCKED) gossip_lerr("Critical error: i_ino cannot be relied on when using iget5/iget4\n"); op_release(new_op); return ERR_PTR(-EINVAL); #endif new_op->upcall.req.lookup.parent_refn.handle = get_handle_from_ino(dir); new_op->upcall.req.lookup.parent_refn.fs_id = PVFS2_SB(sb)->fs_id; } } else { /* if no parent at all was provided, use the root handle and file system id stored in the super block for the specified dentry's inode */ sb = dentry->d_inode->i_sb; new_op->upcall.req.lookup.parent_refn.handle = PVFS2_SB(sb)->root_handle; new_op->upcall.req.lookup.parent_refn.fs_id = PVFS2_SB(sb)->fs_id; } strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, PVFS2_NAME_LEN); gossip_debug(GOSSIP_NAME_DEBUG, "pvfs2_lookup: doing lookup on %s\n under %llu,%d " "(follow=%s)\n", new_op->upcall.req.lookup.d_name, llu(new_op->upcall.req.lookup.parent_refn.handle), new_op->upcall.req.lookup.parent_refn.fs_id, ((new_op->upcall.req.lookup.sym_follow == PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no")); ret = service_operation( new_op, "pvfs2_lookup", get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, "Lookup Got %llu, fsid %d (ret=%d)\n", llu(new_op->downcall.resp.lookup.refn.handle), new_op->downcall.resp.lookup.refn.fs_id, ret); if(ret < 0) { if(ret == -ENOENT) { /* * if no inode was found, add a negative dentry to dcache anyway; * if we don't, we don't hold expected lookup semantics and we most * noticeably break during directory renames. * * however, if the operation failed or exited, do not add the * dentry (e.g. in the case that a touch is issued on a file that * already exists that was interrupted during this lookup -- no * need to add another negative dentry for an existing file) */ gossip_debug(GOSSIP_NAME_DEBUG, "pvfs2_lookup: Adding *negative* dentry %p\n for %s\n", dentry, dentry->d_name.name); /* * make sure to set the pvfs2 specific dentry operations for * the negative dentry that we're adding now so that a * potential future lookup of this cached negative dentry can * be properly revalidated. */ dentry->d_op = &pvfs2_dentry_operations; d_add(dentry, inode); op_release(new_op); return NULL; } op_release(new_op); /* must be a non-recoverable error */ return ERR_PTR(ret); } inode = pvfs2_iget(sb, &new_op->downcall.resp.lookup.refn); if (inode && !is_bad_inode(inode)) { struct dentry *res; gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d Found good inode [%lu] with count [%d]\n", __FILE__, __func__, __LINE__, inode->i_ino, (int)atomic_read(&inode->i_count)); /* update dentry/inode pair into dcache */ dentry->d_op = &pvfs2_dentry_operations; res = pvfs2_d_splice_alias(dentry, inode); gossip_debug(GOSSIP_NAME_DEBUG, "Lookup success (inode ct = %d)\n", (int)atomic_read(&inode->i_count)); if (res) res->d_op = &pvfs2_dentry_operations; op_release(new_op); #ifdef PVFS2_LINUX_KERNEL_2_4 return NULL; #else return res; #endif } else if (inode && is_bad_inode(inode)) { gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d Found bad inode [%lu] with count [%d]. Returning error [%d]", __FILE__, __func__, __LINE__, inode->i_ino, (int)atomic_read(&inode->i_count), ret); ret = -EACCES; found_pvfs2_inode = PVFS2_I(inode); /* look for an error code, possibly set by pvfs2_read_inode(), * otherwise we have to guess EACCES */ if(found_pvfs2_inode->error_code) { ret = found_pvfs2_inode->error_code; } iput(inode); op_release(new_op); return ERR_PTR(ret); } /* no error was returned from service_operation, but the inode * from pvfs2_iget was null...just return EACCESS */ op_release(new_op); gossip_debug(GOSSIP_NAME_DEBUG, "Returning -EACCES for NULL inode\n"); return ERR_PTR(-EACCES); }