/** * Starts bulk transfer for descriptor \a desc on the server. * Returns 0 on success or error code. */ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc) { struct obd_export *exp = desc->bd_export; struct ptlrpc_connection *conn = exp->exp_connection; int rc = 0; __u64 xid; int posted_md; int total_md; lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_PUT_NET)) RETURN(0); /* NB no locking required until desc is on the network */ LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_type == BULK_PUT_SOURCE || desc->bd_type == BULK_GET_SINK); LASSERT(desc->bd_cbid.cbid_fn == server_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* NB total length may be 0 for a read past EOF, so we send 0 * length bulks, since the client expects bulk events. * * The client may not need all of the bulk XIDs for the RPC. The RPC * used the XID of the highest bulk XID needed, and the server masks * off high bits to get bulk count for this RPC. LU-1431 */ xid = desc->bd_req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); total_md = desc->bd_req->rq_xid - xid + 1; desc->bd_md_count = total_md; desc->bd_failure = 0; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 2; /* SENT and ACK/REPLY */ for (posted_md = 0; posted_md < total_md; xid++) { md.options = PTLRPC_MD_OPTIONS; /* NB it's assumed that source and sink buffer frags are * page-aligned. Otherwise we'd have to send client bulk * sizes over and split server buffer accordingly */ ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMDBind(md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDBind failed for MD %u: rc = %d\n", exp->exp_obd->obd_name, posted_md, rc); LASSERT(rc == -ENOMEM); if (posted_md == 0) { desc->bd_md_count = 0; RETURN(-ENOMEM); } break; } /* Network is about to get at the memory */ if (desc->bd_type == BULK_PUT_SOURCE) rc = LNetPut(conn->c_self, desc->bd_mds[posted_md], LNET_ACK_REQ, conn->c_peer, desc->bd_portal, xid, 0, 0); else rc = LNetGet(conn->c_self, desc->bd_mds[posted_md], conn->c_peer, desc->bd_portal, xid, 0); posted_md++; if (rc != 0) { CERROR("%s: failed bulk transfer with %s:%u x"LPU64": " "rc = %d\n", exp->exp_obd->obd_name, libcfs_id2str(conn->c_peer), desc->bd_portal, xid, rc); break; } } if (rc != 0) { /* Can't send, so we unlink the MD bound above. The UNLINK * event this creates will signal completion with failure, * so we return SUCCESS here! */ spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, posted_md); RETURN(0); } CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d " "id %s xid "LPX64"-"LPX64"\n", desc->bd_iov_count, desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer), xid - posted_md, xid - 1); RETURN(0); }
/** * Register bulk at the sender for later transfer. * Returns 0 on success or error code. */ int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; lnet_process_id_t peer; int rc = 0; int rc2; int posted_md; int total_md; __u64 xid; lnet_handle_me_t me_h; lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) RETURN(0); /* NB no locking required until desc is on the network */ LASSERT(desc->bd_nob > 0); LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req != NULL); LASSERT(desc->bd_type == BULK_PUT_SINK || desc->bd_type == BULK_GET_SOURCE); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) desc->bd_nob_transferred = 0; else LASSERT(desc->bd_nob_transferred == 0); desc->bd_failure = 0; peer = desc->bd_import->imp_connection->c_peer; LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* An XID is only used for a single request from the client. * For retried bulk transfers, a new XID will be allocated in * in ptlrpc_check_set() if it needs to be resent, so it is not * using the same RDMA match bits after an error. * * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */ xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || xid != desc->bd_last_xid, "registered: %d rq_xid: "LPU64" bd_last_xid: "LPU64"\n", desc->bd_registered, xid, desc->bd_last_xid); total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; desc->bd_registered = 1; desc->bd_last_xid = xid; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ for (posted_md = 0; posted_md < total_md; posted_md++, xid++) { md.options = PTLRPC_MD_OPTIONS | ((desc->bd_type == BULK_GET_SOURCE) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMEAttach(desc->bd_portal, peer, xid, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x"LPU64"/%d: rc = %d\n", desc->bd_export->exp_obd->obd_name, xid, posted_md, rc); break; } /* About to let the network at it... */ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x"LPU64"/%d: rc = %d\n", desc->bd_export->exp_obd->obd_name, xid, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); break; } } if (rc != 0) { LASSERT(rc == -ENOMEM); spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; RETURN(-ENOMEM); } /* Set rq_xid to matchbits of the final bulk so that server can * infer the number of bulks that were prepared */ req->rq_xid = --xid; LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK), "bd_last_xid = x"LPU64", rq_xid = x"LPU64"\n", desc->bd_last_xid, req->rq_xid); spin_lock(&desc->bd_lock); /* Holler if peer manages to touch buffers before he knows the xid */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_export->exp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, " "xid x"LPX64"-"LPX64", portal %u\n", desc->bd_md_count, desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, desc->bd_last_xid, req->rq_xid, desc->bd_portal); RETURN(0); }
static ssize_t osp_md_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *rbuf, loff_t *pos) { struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev); struct dt_device *dt_dev = &osp->opd_dt_dev; struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2; char *ptr = rbuf->lb_buf; struct osp_update_request *update = NULL; struct ptlrpc_request *req = NULL; struct out_read_reply *orr; struct ptlrpc_bulk_desc *desc; struct object_update_reply *reply; __u32 left_size; int nbufs; int i; int rc; ENTRY; /* Because it needs send the update buffer right away, * just create an update buffer, instead of attaching the * update_remote list of the thandle. */ update = osp_update_request_create(dt_dev); if (IS_ERR(update)) GOTO(out, rc = PTR_ERR(update)); rc = osp_update_rpc_pack(env, read, update, OUT_READ, lu_object_fid(&dt->do_lu), rbuf->lb_len, *pos); if (rc != 0) { CERROR("%s: cannot insert update: rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, rc); GOTO(out, rc); } rc = osp_prep_update_req(env, osp->opd_obd->u.cli.cl_import, update, &req); if (rc != 0) GOTO(out, rc); nbufs = (rbuf->lb_len + OUT_BULK_BUFFER_SIZE - 1) / OUT_BULK_BUFFER_SIZE; /* allocate bulk descriptor */ desc = ptlrpc_prep_bulk_imp(req, nbufs, 1, PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KVEC, MDS_BULK_PORTAL, &ptlrpc_bulk_kvec_ops); if (desc == NULL) GOTO(out, rc = -ENOMEM); /* split the buffer into small chunk size */ left_size = rbuf->lb_len; for (i = 0; i < nbufs; i++) { int read_size; read_size = left_size > OUT_BULK_BUFFER_SIZE ? OUT_BULK_BUFFER_SIZE : left_size; desc->bd_frag_ops->add_iov_frag(desc, ptr, read_size); ptr += read_size; } /* This will only be called with read-only update, and these updates * might be used to retrieve update log during recovery process, so * it will be allowed to send during recovery process */ req->rq_allow_replay = 1; req->rq_bulk_read = 1; /* send request to master and wait for RPC to complete */ rc = ptlrpc_queue_wait(req); if (rc != 0) GOTO(out, rc); rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, req->rq_bulk->bd_nob_transferred); if (rc < 0) GOTO(out, rc); reply = req_capsule_server_sized_get(&req->rq_pill, &RMF_OUT_UPDATE_REPLY, OUT_UPDATE_REPLY_SIZE); if (reply->ourp_magic != UPDATE_REPLY_MAGIC) { CERROR("%s: invalid update reply magic %x expected %x:" " rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO); GOTO(out, rc = -EPROTO); } rc = object_update_result_data_get(reply, lbuf, 0); if (rc < 0) GOTO(out, rc); if (lbuf->lb_len < sizeof(*orr)) GOTO(out, rc = -EPROTO); orr = lbuf->lb_buf; orr_le_to_cpu(orr, orr); rc = orr->orr_size; *pos = orr->orr_offset; out: if (req != NULL) ptlrpc_req_finished(req); if (update != NULL) osp_update_request_destroy(update); RETURN(rc); }
/** * Implementation of dt_index_operations::dio_lookup * * Look up record by key under a remote index object. It packs lookup update * into RPC, sends to the remote OUT and waits for the lookup result. * * \param[in] env execution environment * \param[in] dt index object to lookup * \param[out] rec record in which to return lookup result * \param[in] key key of index which will be looked up * * \retval 1 if the lookup succeeds. * \retval negative errno if the lookup fails. */ static int osp_md_index_lookup(const struct lu_env *env, struct dt_object *dt, struct dt_rec *rec, const struct dt_key *key) { struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2; struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev); struct dt_device *dt_dev = &osp->opd_dt_dev; struct osp_update_request *update; struct object_update_reply *reply; struct ptlrpc_request *req = NULL; struct lu_fid *fid; int rc; ENTRY; /* Because it needs send the update buffer right away, * just create an update buffer, instead of attaching the * update_remote list of the thandle. */ update = osp_update_request_create(dt_dev); if (IS_ERR(update)) RETURN(PTR_ERR(update)); rc = osp_update_rpc_pack(env, index_lookup, update, OUT_INDEX_LOOKUP, lu_object_fid(&dt->do_lu), rec, key); if (rc != 0) { CERROR("%s: Insert update error: rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, rc); GOTO(out, rc); } rc = osp_remote_sync(env, osp, update, &req); if (rc < 0) GOTO(out, rc); reply = req_capsule_server_sized_get(&req->rq_pill, &RMF_OUT_UPDATE_REPLY, OUT_UPDATE_REPLY_SIZE); if (reply->ourp_magic != UPDATE_REPLY_MAGIC) { CERROR("%s: Wrong version %x expected %x: rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO); GOTO(out, rc = -EPROTO); } rc = object_update_result_data_get(reply, lbuf, 0); if (rc < 0) GOTO(out, rc); if (lbuf->lb_len != sizeof(*fid)) { CERROR("%s: lookup "DFID" %s wrong size %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, PFID(lu_object_fid(&dt->do_lu)), (char *)key, (int)lbuf->lb_len); GOTO(out, rc = -EINVAL); } fid = lbuf->lb_buf; if (ptlrpc_rep_need_swab(req)) lustre_swab_lu_fid(fid); if (!fid_is_sane(fid)) { CERROR("%s: lookup "DFID" %s invalid fid "DFID"\n", dt_dev->dd_lu_dev.ld_obd->obd_name, PFID(lu_object_fid(&dt->do_lu)), (char *)key, PFID(fid)); GOTO(out, rc = -EINVAL); } memcpy(rec, fid, sizeof(*fid)); GOTO(out, rc = 1); out: if (req != NULL) ptlrpc_req_finished(req); osp_update_request_destroy(update); return rc; }
/* * Look-up a slave index file. If the slave index isn't found: * - if local is set to false, we allocate a FID from FID_SEQ_QUOTA sequence and * create the index. * - otherwise, we create the index file with a local reserved FID (see * lquota_local_oid) * * \param env - is the environment passed by the caller * \param dev - is the backend dt_device where to look-up/create the slave index * \param parent - is the parent directory where to create the slave index if * it does not exist already * \param glb_fid - is the fid of the global index file associated with this * slave index. * \param uuid - is the uuid of slave which is (re)connecting to the master * target * \param local - indicate whether to use local reserved FID (LQUOTA_USR_OID * & LQUOTA_GRP_OID) for the slave index creation or to * allocate a new fid from sequence FID_SEQ_QUOTA * * \retval - pointer to the dt_object of the slave index on success, * appropriate error on failure */ struct dt_object *lquota_disk_slv_find_create(const struct lu_env *env, struct dt_device *dev, struct dt_object *parent, struct lu_fid *glb_fid, struct obd_uuid *uuid, bool local) { struct lquota_thread_info *qti = lquota_info(env); struct dt_object *slv_idx; int rc; ENTRY; LASSERT(uuid != NULL); CDEBUG(D_QUOTA, "lookup/create slave index file for %s\n", obd_uuid2str(uuid)); /* generate filename associated with the slave */ rc = lquota_disk_slv_filename(glb_fid, uuid, qti->qti_buf); if (rc) RETURN(ERR_PTR(rc)); /* Slave indexes uses the FID_SEQ_QUOTA sequence since they can be read * through the network */ qti->qti_fid.f_seq = FID_SEQ_QUOTA; qti->qti_fid.f_ver = 0; if (local) { int type; rc = lquota_extract_fid(glb_fid, NULL, NULL, &type); if (rc) RETURN(ERR_PTR(rc)); /* use predefined fid in the reserved oid list */ qti->qti_fid.f_oid = qtype2slv_oid(type); slv_idx = local_index_find_or_create_with_fid(env, dev, &qti->qti_fid, parent, qti->qti_buf, LQUOTA_MODE, &dt_quota_slv_features); } else { /* allocate fid dynamically if index does not exist already */ qti->qti_fid.f_oid = LQUOTA_GENERATED_OID; /* lookup/create slave index file */ slv_idx = lquota_disk_find_create(env, dev, parent, &qti->qti_fid, &dt_quota_slv_features, qti->qti_buf); } if (IS_ERR(slv_idx)) RETURN(slv_idx); /* install index operation vector */ if (slv_idx->do_index_ops == NULL) { rc = slv_idx->do_ops->do_index_try(env, slv_idx, &dt_quota_slv_features); if (rc) { CERROR("%s: failed to setup index operations for "DFID " rc:%d\n", dev->dd_lu_dev.ld_obd->obd_name, PFID(lu_object_fid(&slv_idx->do_lu)), rc); dt_object_put(env, slv_idx); slv_idx = ERR_PTR(rc); } } RETURN(slv_idx); }
/* We always need to remove the presto options before passing mount options to cache FS */ struct super_block * presto_read_super(struct super_block * sb, void * data, int silent) { struct file_system_type *fstype; struct presto_cache *cache = NULL; char *cache_data = NULL; char *cache_data_end; char *cache_type = NULL; char *fileset = NULL; char *channel = NULL; int err; unsigned int minor; ENTRY; /* reserve space for the cache's data */ PRESTO_ALLOC(cache_data, PAGE_SIZE); if ( !cache_data ) { CERROR("presto_read_super: Cannot allocate data page.\n"); EXIT; goto out_err; } /* read and validate options */ cache_data_end = presto_options(sb, data, cache_data, &cache_type, &fileset, &channel); /* was there anything for the cache filesystem in the data? */ if (cache_data_end == cache_data) { PRESTO_FREE(cache_data, PAGE_SIZE); cache_data = NULL; } else { CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, cache_data); } /* set up the cache */ cache = presto_cache_init(); if ( !cache ) { CERROR("presto_read_super: failure allocating cache.\n"); EXIT; goto out_err; } cache->cache_type = cache_type; /* link cache to channel */ minor = presto_set_channel(cache, channel); if (minor < 0) { EXIT; goto out_err; } CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n", cache_type, fileset?fileset:"NULL", minor, cache->cache_flags); MOD_INC_USE_COUNT; /* get the filter for the cache */ fstype = get_fs_type(cache_type); cache->cache_filter = filter_get_filter_fs((const char *)cache_type); if ( !fstype || !cache->cache_filter) { CERROR("Presto: unrecognized fs type or cache type\n"); MOD_DEC_USE_COUNT; EXIT; goto out_err; } /* can we in fact mount the cache */ if ((fstype->fs_flags & FS_REQUIRES_DEV) && !sb->s_bdev) { CERROR("filesystem \"%s\" requires a valid block device\n", cache_type); MOD_DEC_USE_COUNT; EXIT; goto out_err; } sb = fstype->read_super(sb, cache_data, silent); /* this might have been freed above */ if (cache_data) { PRESTO_FREE(cache_data, PAGE_SIZE); cache_data = NULL; } if ( !sb ) { CERROR("InterMezzo: cache mount failure.\n"); MOD_DEC_USE_COUNT; EXIT; goto out_err; } cache->cache_sb = sb; cache->cache_root = dget(sb->s_root); /* we now know the dev of the cache: hash the cache */ presto_cache_add(cache, sb->s_dev); err = izo_prepare_fileset(sb->s_root, fileset); filter_setup_journal_ops(cache->cache_filter, cache->cache_type); /* make sure we have our own super operations: sb still contains the cache operations */ filter_setup_super_ops(cache->cache_filter, sb->s_op, &presto_super_ops); sb->s_op = filter_c2usops(cache->cache_filter); /* get izo directory operations: sb->s_root->d_inode exists now */ filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode, &presto_dir_iops, &presto_dir_fops); filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op, &presto_dentry_ops); sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter); sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter); sb->s_root->d_op = filter_c2udops(cache->cache_filter); EXIT; return sb; out_err: CDEBUG(D_SUPER, "out_err called\n"); if (cache) PRESTO_FREE(cache, sizeof(struct presto_cache)); if (cache_data) PRESTO_FREE(cache_data, PAGE_SIZE); if (fileset) PRESTO_FREE(fileset, strlen(fileset) + 1); if (channel) PRESTO_FREE(channel, strlen(channel) + 1); if (cache_type) PRESTO_FREE(cache_type, strlen(cache_type) + 1); CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n", presto_kmemory, presto_vmemory); return NULL; }
/* Allocate new fid on passed client @seq and save it to @fid. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { wait_queue_t link; int rc; ENTRY; LASSERT(seq != NULL); LASSERT(fid != NULL); init_waitqueue_entry_current(&link); mutex_lock(&seq->lcs_mutex); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) seq->lcs_fid.f_oid = seq->lcs_width; while (1) { seqno_t seqnr; if (!fid_is_zero(&seq->lcs_fid) && fid_oid(&seq->lcs_fid) < seq->lcs_width) { /* Just bump last allocated fid and return to caller. */ seq->lcs_fid.f_oid += 1; rc = 0; break; } rc = seq_fid_alloc_prep(seq, &link); if (rc) continue; rc = seq_client_alloc_seq(env, seq, &seqnr); if (rc) { CERROR("%s: Can't allocate new sequence, " "rc %d\n", seq->lcs_name, rc); seq_fid_alloc_fini(seq); mutex_unlock(&seq->lcs_mutex); RETURN(rc); } CDEBUG(D_INFO, "%s: Switch to sequence " "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr); seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; seq->lcs_fid.f_seq = seqnr; seq->lcs_fid.f_ver = 0; /* * Inform caller that sequence switch is performed to allow it * to setup FLD for it. */ rc = 1; seq_fid_alloc_fini(seq); break; } *fid = seq->lcs_fid; mutex_unlock(&seq->lcs_mutex); CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); RETURN(rc); }
/* * Server's incoming request callback */ void request_in_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg; struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; struct ptlrpc_service *service = svcpt->scp_service; struct ptlrpc_request *req; LASSERT (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); LASSERT ((char *)ev->md.start >= rqbd->rqbd_buffer); LASSERT ((char *)ev->md.start + ev->offset + ev->mlength <= rqbd->rqbd_buffer + service->srv_buf_size); CDEBUG((ev->status == 0) ? D_NET : D_ERROR, "event type %d, status %d, service %s\n", ev->type, ev->status, service->srv_name); if (ev->unlinked) { /* If this is the last request message to fit in the * request buffer we can use the request object embedded in * rqbd. Note that if we failed to allocate a request, * we'd have to re-post the rqbd, which we can't do in this * context. */ req = &rqbd->rqbd_req; memset(req, 0, sizeof (*req)); } else { LASSERT (ev->type == LNET_EVENT_PUT); if (ev->status != 0) { /* We moaned above already... */ return; } OBD_ALLOC_GFP(req, sizeof(*req), ALLOC_ATOMIC_TRY); if (req == NULL) { CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", service->srv_name, libcfs_id2str(ev->initiator)); return; } } /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, * flags are reset and scalars are zero. We only set the message * size to non-zero if this was a successful receive. */ req->rq_xid = ev->match_bits; req->rq_reqbuf = ev->md.start + ev->offset; if (ev->type == LNET_EVENT_PUT && ev->status == 0) req->rq_reqdata_len = ev->mlength; do_gettimeofday(&req->rq_arrival_time); req->rq_peer = ev->initiator; req->rq_self = ev->target.nid; req->rq_rqbd = rqbd; req->rq_phase = RQ_PHASE_NEW; spin_lock_init(&req->rq_lock); INIT_LIST_HEAD(&req->rq_timed_list); INIT_LIST_HEAD(&req->rq_exp_list); atomic_set(&req->rq_refcount, 1); if (ev->type == LNET_EVENT_PUT) CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n", req, req->rq_xid, ev->mlength); CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer)); spin_lock(&svcpt->scp_lock); ptlrpc_req_add_history(svcpt, req); if (ev->unlinked) { svcpt->scp_nrqbds_posted--; CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n", svcpt->scp_nrqbds_posted); /* Normally, don't complain about 0 buffers posted; LNET won't * drop incoming reqs since we set the portal lazy */ if (test_req_buffer_pressure && ev->type != LNET_EVENT_UNLINK && svcpt->scp_nrqbds_posted == 0) CWARN("All %s request buffers busy\n", service->srv_name); /* req takes over the network's ref on rqbd */ } else { /* req takes a ref on rqbd */ rqbd->rqbd_refcount++; } list_add_tail(&req->rq_list, &svcpt->scp_req_incoming); svcpt->scp_nreqs_incoming++; /* NB everything can disappear under us once the request * has been queued and we unlock, so do the wake now... */ wake_up(&svcpt->scp_waitq); spin_unlock(&svcpt->scp_lock); }
int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, struct niobuf_local *lnb, struct obd_trans_info *oti, struct lustre_capa *capa) { struct tgt_session_info *tsi = tgt_ses_info(env); struct ofd_device *ofd = ofd_exp(exp); struct ofd_thread_info *info; char *jobid; int rc = 0; if (*nr_local > PTLRPC_MAX_BRW_PAGES) { CERROR("%s: bulk has too many pages %d, which exceeds the" "maximum pages per RPC of %d\n", exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES); RETURN(-EPROTO); } if (tgt_ses_req(tsi) == NULL) { /* echo client case */ LASSERT(oti != NULL); lu_env_refill((struct lu_env *)env); info = ofd_info_init(env, exp); ofd_oti2info(info, oti); jobid = oti->oti_jobid; } else { info = tsi2ofd_info(tsi); jobid = tsi->tsi_jobid; } LASSERT(oa != NULL); if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) { struct ofd_seq *oseq; oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi)); if (IS_ERR(oseq)) { CERROR("%s: Can not find seq for "DOSTID ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi), PTR_ERR(oseq)); RETURN(-EINVAL); } if (oseq->os_destroys_in_progress == 0) { /* don't fail lookups for orphan recovery, it causes * later LBUGs when objects still exist during * precreate */ ofd_seq_put(env, oseq); RETURN(-ENOENT); } ofd_seq_put(env, oseq); } LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); rc = ostid_to_fid(&info->fti_fid, &oa->o_oi, 0); if (unlikely(rc != 0)) RETURN(rc); if (cmd == OBD_BRW_WRITE) { rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi), capa, CAPA_OPC_OSS_WRITE); if (rc == 0) { la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR); rc = ofd_preprw_write(env, exp, ofd, &info->fti_fid, &info->fti_attr, oa, objcount, obj, rnb, nr_local, lnb, jobid); } } else if (cmd == OBD_BRW_READ) { rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi), capa, CAPA_OPC_OSS_READ); if (rc == 0) { ofd_grant_prepare_read(env, exp, oa); rc = ofd_preprw_read(env, exp, ofd, &info->fti_fid, &info->fti_attr, obj->ioo_bufcnt, rnb, nr_local, lnb, jobid); obdo_from_la(oa, &info->fti_attr, LA_ATIME); } } else { CERROR("%s: wrong cmd %d received!\n", exp->exp_obd->obd_name, cmd); rc = -EPROTO; } RETURN(rc); }
static int ll_get_name(struct dentry *dentry, char *name, struct dentry *child) { struct inode *dir = d_inode(dentry); int rc; struct ll_getname_data lgd = { .lgd_name = name, .lgd_fid = ll_i2info(d_inode(child))->lli_fid, .ctx.actor = ll_nfs_get_name_filldir, }; if (!dir || !S_ISDIR(dir->i_mode)) { rc = -ENOTDIR; goto out; } if (!dir->i_fop) { rc = -EINVAL; goto out; } mutex_lock(&dir->i_mutex); rc = ll_dir_read(dir, &lgd.ctx); mutex_unlock(&dir->i_mutex); if (!rc && !lgd.lgd_found) rc = -ENOENT; out: return rc; } static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid; if (fh_type != LUSTRE_NFS_FID) return ERR_PTR(-EPROTO); return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent); } static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid; if (fh_type != LUSTRE_NFS_FID) return ERR_PTR(-EPROTO); return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL); } static struct dentry *ll_get_parent(struct dentry *dchild) { struct ptlrpc_request *req = NULL; struct inode *dir = d_inode(dchild); struct ll_sb_info *sbi; struct dentry *result = NULL; struct mdt_body *body; static char dotdot[] = ".."; struct md_op_data *op_data; int rc; int lmmsize; LASSERT(dir && S_ISDIR(dir->i_mode)); sbi = ll_s2sbi(dir->i_sb); CDEBUG(D_INFO, "getting parent for (%lu,"DFID")\n", dir->i_ino, PFID(ll_inode2fid(dir))); rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc != 0) return ERR_PTR(rc); op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot, strlen(dotdot), lmmsize, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return (void *)op_data; rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); return ERR_PTR(rc); } body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); LASSERT(body->valid & OBD_MD_FLID); CDEBUG(D_INFO, "parent for "DFID" is "DFID"\n", PFID(ll_inode2fid(dir)), PFID(&body->fid1)); result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL); ptlrpc_req_finished(req); return result; } struct export_operations lustre_export_operations = { .get_parent = ll_get_parent, .encode_fh = ll_encode_fh, .get_name = ll_get_name, .fh_to_dentry = ll_fh_to_dentry, .fh_to_parent = ll_fh_to_parent, };
static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, struct ofd_device *ofd, struct lu_fid *fid, struct lu_attr *la, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, struct niobuf_local *lnb, char *jobid) { struct ofd_object *fo; int i, j, k, rc = 0, tot_bytes = 0; ENTRY; LASSERT(env != NULL); LASSERT(objcount == 1); if (unlikely(exp->exp_obd->obd_recovering)) { struct ofd_thread_info *info = ofd_info(env); /* copied from ofd_precreate_object */ /* XXX this should be consolidated to use the same code * instead of a copy, due to the ongoing risk of bugs. */ memset(&info->fti_attr, 0, sizeof(info->fti_attr)); info->fti_attr.la_valid = LA_TYPE | LA_MODE; info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666; info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME; /* Initialize a/c/m time so any client timestamp will always * be newer and update the inode. ctime = 0 is also handled * specially in osd_inode_setattr(). See LU-221, LU-1042 */ info->fti_attr.la_atime = 0; info->fti_attr.la_mtime = 0; info->fti_attr.la_ctime = 0; fo = ofd_object_find_or_create(env, ofd, fid, &info->fti_attr); } else { fo = ofd_object_find(env, ofd, fid); } if (IS_ERR(fo)) GOTO(out, rc = PTR_ERR(fo)); LASSERT(fo != NULL); ofd_read_lock(env, fo); if (!ofd_object_exists(fo)) { CERROR("%s: BRW to missing obj "DOSTID"\n", exp->exp_obd->obd_name, POSTID(&obj->ioo_oid)); ofd_read_unlock(env, fo); ofd_object_put(env, fo); GOTO(out, rc = -ENOENT); } /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some * space back if possible */ ofd_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt); /* parse remote buffers to local buffers and prepare the latter */ *nr_local = 0; for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) { rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i, lnb + j, 1, ofd_object_capa(env, fo)); if (unlikely(rc < 0)) GOTO(err, rc); LASSERT(rc <= PTLRPC_MAX_BRW_PAGES); /* correct index for local buffers to continue with */ for (k = 0; k < rc; k++) { lnb[j+k].lnb_flags = rnb[i].rnb_flags; if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED)) lnb[j+k].lnb_rc = -ENOSPC; /* remote client can't break through quota */ if (exp_connect_rmtclient(exp)) lnb[j+k].lnb_flags &= ~OBD_BRW_NOQUOTA; } j += rc; *nr_local += rc; LASSERT(j <= PTLRPC_MAX_BRW_PAGES); tot_bytes += rnb[i].rnb_len; } LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES); rc = dt_write_prep(env, ofd_object_child(fo), lnb, *nr_local); if (unlikely(rc != 0)) GOTO(err, rc); ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, tot_bytes); RETURN(0); err: dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local); ofd_read_unlock(env, fo); /* ofd_grant_prepare_write() was called, so we must commit */ ofd_grant_commit(env, exp, rc); out: /* let's still process incoming grant information packed in the oa, * but without enforcing grant since we won't proceed with the write. * Just like a read request actually. */ ofd_grant_prepare_read(env, exp, oa); return rc; }
/* backup plain llog */ int llog_backup(const struct lu_env *env, struct obd_device *obd, struct llog_ctxt *ctxt, struct llog_ctxt *bctxt, char *name, char *backup) { struct llog_handle *llh, *bllh; int rc; /* open original log */ rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS); if (rc < 0) { /* the -ENOENT case is also reported to the caller * but silently so it should handle that if needed. */ if (rc != -ENOENT) CERROR("%s: failed to open log %s: rc = %d\n", obd->obd_name, name, rc); return rc; } rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL); if (rc) goto out_close; /* Make sure there's no old backup log */ rc = llog_erase(env, bctxt, NULL, backup); if (rc < 0 && rc != -ENOENT) goto out_close; /* open backup log */ rc = llog_open_create(env, bctxt, &bllh, NULL, backup); if (rc) { CERROR("%s: failed to open backup logfile %s: rc = %d\n", obd->obd_name, backup, rc); goto out_close; } /* check that backup llog is not the same object as original one */ if (llh->lgh_obj == bllh->lgh_obj) { CERROR("%s: backup llog %s to itself (%s), objects %p/%p\n", obd->obd_name, name, backup, llh->lgh_obj, bllh->lgh_obj); rc = -EEXIST; goto out_backup; } rc = llog_init_handle(env, bllh, LLOG_F_IS_PLAIN, NULL); if (rc) goto out_backup; /* Copy log record by record */ rc = llog_process_or_fork(env, llh, llog_copy_handler, (void *)bllh, NULL, false); if (rc) CERROR("%s: failed to backup log %s: rc = %d\n", obd->obd_name, name, rc); out_backup: llog_close(env, bllh); out_close: llog_close(env, llh); return rc; }
int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, int flags, struct obd_uuid *uuid) { struct llog_log_hdr *llh; int rc; LASSERT(handle->lgh_hdr == NULL); llh = kzalloc(sizeof(*llh), GFP_NOFS); if (llh == NULL) return -ENOMEM; handle->lgh_hdr = llh; /* first assign flags to use llog_client_ops */ llh->llh_flags = flags; rc = llog_read_header(env, handle, uuid); if (rc == 0) { if (unlikely((llh->llh_flags & LLOG_F_IS_PLAIN && flags & LLOG_F_IS_CAT) || (llh->llh_flags & LLOG_F_IS_CAT && flags & LLOG_F_IS_PLAIN))) { CERROR("%s: llog type is %s but initializing %s\n", handle->lgh_ctxt->loc_obd->obd_name, llh->llh_flags & LLOG_F_IS_CAT ? "catalog" : "plain", flags & LLOG_F_IS_CAT ? "catalog" : "plain"); rc = -EINVAL; goto out; } else if (llh->llh_flags & (LLOG_F_IS_PLAIN | LLOG_F_IS_CAT)) { /* * it is possible to open llog without specifying llog * type so it is taken from llh_flags */ flags = llh->llh_flags; } else { /* for some reason the llh_flags has no type set */ CERROR("llog type is not specified!\n"); rc = -EINVAL; goto out; } if (unlikely(uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) { CERROR("%s: llog uuid mismatch: %s/%s\n", handle->lgh_ctxt->loc_obd->obd_name, (char *)uuid->uuid, (char *)llh->llh_tgtuuid.uuid); rc = -EEXIST; goto out; } } if (flags & LLOG_F_IS_CAT) { LASSERT(list_empty(&handle->u.chd.chd_head)); INIT_LIST_HEAD(&handle->u.chd.chd_head); llh->llh_size = sizeof(struct llog_logid_rec); } else if (!(flags & LLOG_F_IS_PLAIN)) { CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n", handle->lgh_ctxt->loc_obd->obd_name, flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN); rc = -EINVAL; } out: if (rc) { kfree(llh); handle->lgh_hdr = NULL; } return rc; }
/** * Send request reply from request \a req reply buffer. * \a flags defines reply types * Returns 0 on sucess or error code */ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) { struct ptlrpc_reply_state *rs = req->rq_reply_state; struct ptlrpc_connection *conn; int rc; /* We must already have a reply buffer (only ptlrpc_error() may be * called without one). The reply generated by sptlrpc layer (e.g. * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must * have a request buffer which is either the actual (swabbed) incoming * request, or a saved copy if this is a req saved in * target_queue_final_reply(). */ LASSERT (req->rq_no_reply == 0); LASSERT (req->rq_reqbuf != NULL); LASSERT (rs != NULL); LASSERT ((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult); LASSERT (req->rq_repmsg != NULL); LASSERT (req->rq_repmsg == rs->rs_msg); LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback); LASSERT (rs->rs_cb_id.cbid_arg == rs); /* There may be no rq_export during failover */ if (unlikely(req->rq_export && req->rq_export->exp_obd && req->rq_export->exp_obd->obd_fail)) { /* Failed obd's only send ENODEV */ req->rq_type = PTL_RPC_MSG_ERR; req->rq_status = -ENODEV; CDEBUG(D_HA, "sending ENODEV from failed obd %d\n", req->rq_export->exp_obd->obd_minor); } /* In order to keep interoprability with the client (< 2.3) which * doesn't have pb_jobid in ptlrpc_body, We have to shrink the * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the * reply buffer on client will be overflow. * * XXX Remove this whenver we drop the interoprability with such client. */ req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0, sizeof(struct ptlrpc_body_v2), 1); if (req->rq_type != PTL_RPC_MSG_ERR) req->rq_type = PTL_RPC_MSG_REPLY; lustre_msg_set_type(req->rq_repmsg, req->rq_type); lustre_msg_set_status(req->rq_repmsg, ptlrpc_status_hton(req->rq_status)); lustre_msg_set_opc(req->rq_repmsg, req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0); target_pack_pool_reply(req); ptlrpc_at_set_reply(req, flags); if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL); else conn = ptlrpc_connection_addref(req->rq_export->exp_connection); if (unlikely(conn == NULL)) { CERROR("not replying on NULL connection\n"); /* bug 9635 */ return -ENOTCONN; } ptlrpc_rs_addref(rs); /* +1 ref for the network */ rc = sptlrpc_svc_wrap_reply(req); if (unlikely(rc)) goto out; req->rq_sent = cfs_time_current_sec(); rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len, (rs->rs_difficult && !rs->rs_no_ack) ? LNET_ACK_REQ : LNET_NOACK_REQ, &rs->rs_cb_id, conn, ptlrpc_req2svc(req)->srv_rep_portal, req->rq_xid, req->rq_reply_off); out: if (unlikely(rc != 0)) ptlrpc_req_drop_rs(req); ptlrpc_connection_put(conn); return rc; }
static int seq_client_rpc(struct lu_client_seq *seq, struct lu_seq_range *output, __u32 opc, const char *opcname) { struct obd_export *exp = seq->lcs_exp; struct ptlrpc_request *req; struct lu_seq_range *out, *in; __u32 *op; unsigned int debug_mask; int rc; ENTRY; LASSERT(exp != NULL && !IS_ERR(exp)); req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY, LUSTRE_MDS_VERSION, SEQ_QUERY); if (req == NULL) RETURN(-ENOMEM); /* Init operation code */ op = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_OPC); *op = opc; /* Zero out input range, this is not recovery yet. */ in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE); range_init(in); ptlrpc_request_set_replen(req); in->lsr_index = seq->lcs_space.lsr_index; if (seq->lcs_type == LUSTRE_SEQ_METADATA) fld_range_set_mdt(in); else fld_range_set_ost(in); if (opc == SEQ_ALLOC_SUPER) { req->rq_request_portal = SEQ_CONTROLLER_PORTAL; req->rq_reply_portal = MDC_REPLY_PORTAL; /* During allocating super sequence for data object, * the current thread might hold the export of MDT0(MDT0 * precreating objects on this OST), and it will send the * request to MDT0 here, so we can not keep resending the * request here, otherwise if MDT0 is failed(umounted), * it can not release the export of MDT0 */ if (seq->lcs_type == LUSTRE_SEQ_DATA) req->rq_no_delay = req->rq_no_resend = 1; debug_mask = D_CONSOLE; } else { if (seq->lcs_type == LUSTRE_SEQ_METADATA) req->rq_request_portal = SEQ_METADATA_PORTAL; else req->rq_request_portal = SEQ_DATA_PORTAL; debug_mask = D_INFO; } ptlrpc_at_set_req_timeout(req); if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc) GOTO(out_req, rc); out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE); *output = *out; if (!range_is_sane(output)) { CERROR("%s: Invalid range received from server: " DRANGE"\n", seq->lcs_name, PRANGE(output)); GOTO(out_req, rc = -EINVAL); } if (range_is_exhausted(output)) { CERROR("%s: Range received from server is exhausted: " DRANGE"]\n", seq->lcs_name, PRANGE(output)); GOTO(out_req, rc = -EINVAL); } CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence "DRANGE"]\n", seq->lcs_name, opcname, PRANGE(output)); EXIT; out_req: ptlrpc_req_finished(req); return rc; }
/** * Send request \a request. * if \a noreply is set, don't expect any reply back and don't set up * reply buffers. * Returns 0 on success or error code. */ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) { int rc; int rc2; int mpflag = 0; struct ptlrpc_connection *connection; lnet_handle_me_t reply_me_h; lnet_md_t reply_md; struct obd_device *obd = request->rq_import->imp_obd; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) RETURN(0); LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); LASSERT(request->rq_wait_ctx == 0); /* If this is a re-transmit, we're required to have disengaged * cleanly from the previous attempt */ LASSERT(!request->rq_receiving_reply); if (request->rq_import->imp_obd && request->rq_import->imp_obd->obd_fail) { CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", request->rq_import->imp_obd->obd_name); /* this prevents us from waiting in ptlrpc_queue_wait */ request->rq_err = 1; request->rq_status = -ENODEV; RETURN(-ENODEV); } connection = request->rq_import->imp_connection; lustre_msg_set_handle(request->rq_reqmsg, &request->rq_import->imp_remote_handle); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_conn_cnt(request->rq_reqmsg, request->rq_import->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, request->rq_import->imp_msghdr_flags); if (request->rq_resend) lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); if (request->rq_memalloc) mpflag = cfs_memory_pressure_get_and_set(); rc = sptlrpc_cli_wrap_request(request); if (rc) GOTO(out, rc); /* bulk register should be done after wrap_request() */ if (request->rq_bulk != NULL) { rc = ptlrpc_register_bulk (request); if (rc != 0) GOTO(out, rc); } if (!noreply) { LASSERT (request->rq_replen != 0); if (request->rq_repbuf == NULL) { LASSERT(request->rq_repdata == NULL); LASSERT(request->rq_repmsg == NULL); rc = sptlrpc_cli_alloc_repbuf(request, request->rq_replen); if (rc) { /* this prevents us from looping in * ptlrpc_queue_wait */ request->rq_err = 1; request->rq_status = rc; GOTO(cleanup_bulk, rc); } } else { request->rq_repdata = NULL; request->rq_repmsg = NULL; } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); GOTO(cleanup_bulk, rc = -ENOMEM); } } spin_lock(&request->rq_lock); /* If the MD attach succeeds, there _will_ be a reply_in callback */ request->rq_receiving_reply = !noreply; /* We are responsible for unlinking the reply buffer */ request->rq_must_unlink = !noreply; /* Clear any flags that may be present from previous sends. */ request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; request->rq_reply_truncate = 0; spin_unlock(&request->rq_lock); if (!noreply) { reply_md.start = request->rq_repbuf; reply_md.length = request->rq_repbuf_len; /* Allow multiple early replies */ reply_md.threshold = LNET_MD_THRESH_INF; /* Manage remote for early replies */ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MANAGE_REMOTE | LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */; reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; /* We must see the unlink callback to unset rq_must_unlink, so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); spin_lock(&request->rq_lock); /* ...but the MD attach didn't succeed... */ request->rq_receiving_reply = 0; spin_unlock(&request->rq_lock); GOTO(cleanup_me, rc = -ENOMEM); } CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 ", portal %u\n", request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } /* add references on request for request_out_callback */ ptlrpc_request_addref(request); if (obd->obd_svc_stats != NULL) lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, cfs_atomic_read(&request->rq_import->imp_inflight)); OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); cfs_gettimeofday(&request->rq_arrival_time); request->rq_sent = cfs_time_current_sec(); /* We give the server rq_timeout secs to process the req, and add the network latency for our local timeout. */ request->rq_deadline = request->rq_sent + request->rq_timeout + ptlrpc_at_get_net_latency(request); ptlrpc_pinger_sending_on_import(request->rq_import); DEBUG_REQ(D_INFO, request, "send flg=%x", lustre_msg_get_flags(request->rq_reqmsg)); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqbuf, request->rq_reqdata_len, LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid, 0); if (rc == 0) GOTO(out, rc); ptlrpc_req_finished(request); if (noreply) GOTO(out, rc); cleanup_me: /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to * access the reply buffer. */ rc2 = LNetMEUnlink(reply_me_h); LASSERT (rc2 == 0); /* UNLINKED callback called synchronously */ LASSERT(!request->rq_receiving_reply); cleanup_bulk: /* We do sync unlink here as there was no real transfer here so * the chance to have long unlink to sluggish net is smaller here. */ ptlrpc_unregister_bulk(request, 0); out: if (request->rq_memalloc) cfs_memory_pressure_restore(mpflag); return rc; }
/** * Create a memory descriptor and attach it to a ME * * \param meh A handle for a ME to associate the new MD with. * \param umd Provides initial values for the user-visible parts of a MD. * Other than its use for initialization, there is no linkage between this * structure and the MD maintained by the LNet. * \param unlink A flag to indicate whether the MD is automatically unlinked * when it becomes inactive, either because the operation threshold drops to * zero or because the available memory becomes less than \a umd.max_size. * (Note that the check for unlinking a MD only occurs after the completion * of a successful operation on the MD.) The value LNET_UNLINK enables auto * unlinking; the value LNET_RETAIN disables it. * \param handle On successful returns, a handle to the newly created MD is * saved here. This handle can be used later in LNetMDUnlink(). * * \retval 0 On success. * \retval -EINVAL If \a umd is not valid. * \retval -ENOMEM If new MD cannot be allocated. * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by * calling LNetInvalidateHandle() on it. * \retval -EBUSY If the ME pointed to by \a meh is already associated with * a MD. */ int LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) { CFS_LIST_HEAD (matches); CFS_LIST_HEAD (drops); struct lnet_me *me; struct lnet_libmd *md; int cpt; int rc; LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); if (lnet_md_validate(&umd) != 0) return -EINVAL; if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) { CERROR("Invalid option: no MD_OP set\n"); return -EINVAL; } md = lnet_md_alloc(&umd); if (md == NULL) return -ENOMEM; rc = lnet_md_build(md, &umd, unlink); cpt = lnet_cpt_of_cookie(meh.cookie); lnet_res_lock(cpt); if (rc != 0) goto failed; me = lnet_handle2me(&meh); if (me == NULL) rc = -ENOENT; else if (me->me_md != NULL) rc = -EBUSY; else rc = lnet_md_link(md, umd.eq_handle, cpt); if (rc != 0) goto failed; /* attach this MD to portal of ME and check if it matches any * blocked msgs on this portal */ lnet_ptl_attach_md(me, md, &matches, &drops); lnet_md2handle(handle, md); lnet_res_unlock(cpt); lnet_drop_delayed_msg_list(&drops, "Bad match"); lnet_recv_delayed_msg_list(&matches); return 0; failed: lnet_md_free_locked(md); lnet_res_unlock(cpt); return rc; }
static int lsm_unpackmd_common(struct lov_obd *lov, struct lov_stripe_md *lsm, struct lov_mds_md *lmm, struct lov_ost_data_v1 *objects) { struct lov_oinfo *loi; loff_t min_stripe_maxbytes = 0; loff_t lov_bytes; unsigned int stripe_count; unsigned int i; /* * This supposes lov_mds_md_v1/v3 first fields are * are the same */ lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi); lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size); lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern); lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen); lsm->lsm_pool_name[0] = '\0'; stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count; for (i = 0; i < stripe_count; i++) { loi = lsm->lsm_oinfo[i]; ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi); loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx); loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen); if (lov_oinfo_is_dummy(loi)) continue; if (loi->loi_ost_idx >= lov->desc.ld_tgt_count && !lov2obd(lov)->obd_process_conf) { CERROR("%s: OST index %d more than OST count %d\n", (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx, lov->desc.ld_tgt_count); lov_dump_lmm_v1(D_WARNING, lmm); return -EINVAL; } if (lov->lov_tgts[loi->loi_ost_idx] == NULL) { CERROR("%s: OST index %d missing\n", (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx); lov_dump_lmm_v1(D_WARNING, lmm); continue; } lov_bytes = lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx]); if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes) min_stripe_maxbytes = lov_bytes; } if (min_stripe_maxbytes == 0) min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; stripe_count = lsm->lsm_stripe_count ?: lov->desc.ld_tgt_count; lov_bytes = min_stripe_maxbytes * stripe_count; if (lov_bytes < min_stripe_maxbytes) /* handle overflow */ lsm->lsm_maxbytes = MAX_LFS_FILESIZE; else lsm->lsm_maxbytes = lov_bytes; return 0; }
static int llog_process_thread(void *arg) { struct llog_process_info *lpi = arg; struct llog_handle *loghandle = lpi->lpi_loghandle; struct llog_log_hdr *llh = loghandle->lgh_hdr; struct llog_process_cat_data *cd = lpi->lpi_catdata; char *buf; __u64 cur_offset = LLOG_CHUNK_SIZE; __u64 last_offset; int rc = 0, index = 1, last_index; int saved_index = 0; int last_called_index = 0; ENTRY; LASSERT(llh); OBD_ALLOC(buf, LLOG_CHUNK_SIZE); if (!buf) { lpi->lpi_rc = -ENOMEM; RETURN(0); } if (cd != NULL) { last_called_index = cd->lpcd_first_idx; index = cd->lpcd_first_idx + 1; } if (cd != NULL && cd->lpcd_last_idx) last_index = cd->lpcd_last_idx; else last_index = LLOG_BITMAP_BYTES * 8 - 1; while (rc == 0) { struct llog_rec_hdr *rec; /* skip records not set in bitmap */ while (index <= last_index && !ext2_test_bit(index, llh->llh_bitmap)) ++index; LASSERT(index <= last_index + 1); if (index == last_index + 1) break; repeat: CDEBUG(D_OTHER, "index: %d last_index %d\n", index, last_index); /* get the buf with our target record; avoid old garbage */ memset(buf, 0, LLOG_CHUNK_SIZE); last_offset = cur_offset; rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index, index, &cur_offset, buf, LLOG_CHUNK_SIZE); if (rc) GOTO(out, rc); /* NB: when rec->lrh_len is accessed it is already swabbed * since it is used at the "end" of the loop and the rec * swabbing is done at the beginning of the loop. */ for (rec = (struct llog_rec_hdr *)buf; (char *)rec < buf + LLOG_CHUNK_SIZE; rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){ CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n", rec, rec->lrh_type); if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) lustre_swab_llog_rec(rec); CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n", rec->lrh_type, rec->lrh_index); if (rec->lrh_index == 0) { /* probably another rec just got added? */ if (index <= loghandle->lgh_last_idx) GOTO(repeat, rc = 0); GOTO(out, rc = 0); /* no more records */ } if (rec->lrh_len == 0 || rec->lrh_len > LLOG_CHUNK_SIZE) { CWARN("invalid length %d in llog record for " "index %d/%d\n", rec->lrh_len, rec->lrh_index, index); GOTO(out, rc = -EINVAL); } if (rec->lrh_index < index) { CDEBUG(D_OTHER, "skipping lrh_index %d\n", rec->lrh_index); continue; } CDEBUG(D_OTHER, "lrh_index: %d lrh_len: %d (%d remains)\n", rec->lrh_index, rec->lrh_len, (int)(buf + LLOG_CHUNK_SIZE - (char *)rec)); loghandle->lgh_cur_idx = rec->lrh_index; loghandle->lgh_cur_offset = (char *)rec - (char *)buf + last_offset; /* if set, process the callback on this record */ if (ext2_test_bit(index, llh->llh_bitmap)) { rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec, lpi->lpi_cbdata); last_called_index = index; if (rc == LLOG_PROC_BREAK) { GOTO(out, rc); } else if (rc == LLOG_DEL_RECORD) { rc = llog_cancel_rec(lpi->lpi_env, loghandle, rec->lrh_index); } if (rc) GOTO(out, rc); } else { CDEBUG(D_OTHER, "Skipped index %d\n", index); } /* next record, still in buffer? */ ++index; if (index > last_index) GOTO(out, rc = 0); } } out: if (cd != NULL) cd->lpcd_last_idx = last_called_index; if (unlikely(rc == -EIO)) { /* something bad happened to the processing, probably I/O * error or the log got corrupted.. * to be able to finally release the log we discard any * remaining bits in the header */ CERROR("llog found corrupted\n"); while (index <= last_index) { if (ext2_test_bit(index, llh->llh_bitmap) != 0) llog_cancel_rec(lpi->lpi_env, loghandle, index); index++; } rc = 0; } OBD_FREE(buf, LLOG_CHUNK_SIZE); lpi->lpi_rc = rc; return 0; }