/**
 * Starts bulk transfer for descriptor \a desc on the server.
 * Returns 0 on success or error code.
 */
int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc)
{
	struct obd_export        *exp = desc->bd_export;
	struct ptlrpc_connection *conn = exp->exp_connection;
	int                       rc = 0;
	__u64                     xid;
	int                       posted_md;
	int                       total_md;
	lnet_md_t                 md;
	ENTRY;

	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_PUT_NET))
		RETURN(0);

	/* NB no locking required until desc is on the network */
	LASSERT(desc->bd_md_count == 0);
	LASSERT(desc->bd_type == BULK_PUT_SOURCE ||
		desc->bd_type == BULK_GET_SINK);

	LASSERT(desc->bd_cbid.cbid_fn == server_bulk_callback);
	LASSERT(desc->bd_cbid.cbid_arg == desc);

	/* NB total length may be 0 for a read past EOF, so we send 0
	 * length bulks, since the client expects bulk events.
	 *
	 * The client may not need all of the bulk XIDs for the RPC.  The RPC
	 * used the XID of the highest bulk XID needed, and the server masks
	 * off high bits to get bulk count for this RPC. LU-1431 */
	xid = desc->bd_req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
	total_md = desc->bd_req->rq_xid - xid + 1;

	desc->bd_md_count = total_md;
	desc->bd_failure = 0;

	md.user_ptr = &desc->bd_cbid;
	md.eq_handle = ptlrpc_eq_h;
	md.threshold = 2; /* SENT and ACK/REPLY */

	for (posted_md = 0; posted_md < total_md; xid++) {
		md.options = PTLRPC_MD_OPTIONS;

		/* NB it's assumed that source and sink buffer frags are
		 * page-aligned. Otherwise we'd have to send client bulk
		 * sizes over and split server buffer accordingly */
		ptlrpc_fill_bulk_md(&md, desc, posted_md);
		rc = LNetMDBind(md, LNET_UNLINK, &desc->bd_mds[posted_md]);
		if (rc != 0) {
			CERROR("%s: LNetMDBind failed for MD %u: rc = %d\n",
			       exp->exp_obd->obd_name, posted_md, rc);
			LASSERT(rc == -ENOMEM);
			if (posted_md == 0) {
				desc->bd_md_count = 0;
				RETURN(-ENOMEM);
			}
			break;
		}
		/* Network is about to get at the memory */
		if (desc->bd_type == BULK_PUT_SOURCE)
			rc = LNetPut(conn->c_self, desc->bd_mds[posted_md],
				     LNET_ACK_REQ, conn->c_peer,
				     desc->bd_portal, xid, 0, 0);
		else
			rc = LNetGet(conn->c_self, desc->bd_mds[posted_md],
				     conn->c_peer, desc->bd_portal, xid, 0);

		posted_md++;
		if (rc != 0) {
			CERROR("%s: failed bulk transfer with %s:%u x"LPU64": "
			       "rc = %d\n", exp->exp_obd->obd_name,
			       libcfs_id2str(conn->c_peer), desc->bd_portal,
			       xid, rc);
			break;
		}
	}

	if (rc != 0) {
		/* Can't send, so we unlink the MD bound above.  The UNLINK
		 * event this creates will signal completion with failure,
		 * so we return SUCCESS here! */
		spin_lock(&desc->bd_lock);
		desc->bd_md_count -= total_md - posted_md;
		spin_unlock(&desc->bd_lock);
		LASSERT(desc->bd_md_count >= 0);

		mdunlink_iterate_helper(desc->bd_mds, posted_md);
		RETURN(0);
	}

	CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d "
	       "id %s xid "LPX64"-"LPX64"\n", desc->bd_iov_count,
	       desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer),
	       xid - posted_md, xid - 1);

	RETURN(0);
}
/**
 * Register bulk at the sender for later transfer.
 * Returns 0 on success or error code.
 */
int ptlrpc_register_bulk(struct ptlrpc_request *req)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	lnet_process_id_t peer;
	int rc = 0;
	int rc2;
	int posted_md;
	int total_md;
	__u64 xid;
	lnet_handle_me_t  me_h;
	lnet_md_t         md;
	ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
                RETURN(0);

	/* NB no locking required until desc is on the network */
	LASSERT(desc->bd_nob > 0);
	LASSERT(desc->bd_md_count == 0);
	LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
	LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
	LASSERT(desc->bd_req != NULL);
	LASSERT(desc->bd_type == BULK_PUT_SINK ||
		desc->bd_type == BULK_GET_SOURCE);

	/* cleanup the state of the bulk for it will be reused */
	if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
		desc->bd_nob_transferred = 0;
	else
		LASSERT(desc->bd_nob_transferred == 0);

	desc->bd_failure = 0;

	peer = desc->bd_import->imp_connection->c_peer;

	LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
	LASSERT(desc->bd_cbid.cbid_arg == desc);

	/* An XID is only used for a single request from the client.
	 * For retried bulk transfers, a new XID will be allocated in
	 * in ptlrpc_check_set() if it needs to be resent, so it is not
	 * using the same RDMA match bits after an error.
	 *
	 * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
	 * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */
	xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
	LASSERTF(!(desc->bd_registered &&
		   req->rq_send_state != LUSTRE_IMP_REPLAY) ||
		 xid != desc->bd_last_xid,
		 "registered: %d  rq_xid: "LPU64" bd_last_xid: "LPU64"\n",
		 desc->bd_registered, xid, desc->bd_last_xid);

	total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
	desc->bd_registered = 1;
	desc->bd_last_xid = xid;
	desc->bd_md_count = total_md;
	md.user_ptr = &desc->bd_cbid;
	md.eq_handle = ptlrpc_eq_h;
	md.threshold = 1;                       /* PUT or GET */

	for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
		md.options = PTLRPC_MD_OPTIONS |
			     ((desc->bd_type == BULK_GET_SOURCE) ?
			      LNET_MD_OP_GET : LNET_MD_OP_PUT);
		ptlrpc_fill_bulk_md(&md, desc, posted_md);

		rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
				  LNET_UNLINK, LNET_INS_AFTER, &me_h);
		if (rc != 0) {
			CERROR("%s: LNetMEAttach failed x"LPU64"/%d: rc = %d\n",
			       desc->bd_export->exp_obd->obd_name, xid,
			       posted_md, rc);
			break;
		}

		/* About to let the network at it... */
		rc = LNetMDAttach(me_h, md, LNET_UNLINK,
				  &desc->bd_mds[posted_md]);
		if (rc != 0) {
			CERROR("%s: LNetMDAttach failed x"LPU64"/%d: rc = %d\n",
			       desc->bd_export->exp_obd->obd_name, xid,
			       posted_md, rc);
			rc2 = LNetMEUnlink(me_h);
			LASSERT(rc2 == 0);
			break;
		}
	}

	if (rc != 0) {
		LASSERT(rc == -ENOMEM);
		spin_lock(&desc->bd_lock);
		desc->bd_md_count -= total_md - posted_md;
		spin_unlock(&desc->bd_lock);
		LASSERT(desc->bd_md_count >= 0);
		mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
		req->rq_status = -ENOMEM;
		RETURN(-ENOMEM);
	}

	/* Set rq_xid to matchbits of the final bulk so that server can
	 * infer the number of bulks that were prepared */
	req->rq_xid = --xid;
	LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
		 "bd_last_xid = x"LPU64", rq_xid = x"LPU64"\n",
		 desc->bd_last_xid, req->rq_xid);

	spin_lock(&desc->bd_lock);
	/* Holler if peer manages to touch buffers before he knows the xid */
	if (desc->bd_md_count != total_md)
		CWARN("%s: Peer %s touched %d buffers while I registered\n",
		      desc->bd_export->exp_obd->obd_name, libcfs_id2str(peer),
		      total_md - desc->bd_md_count);
	spin_unlock(&desc->bd_lock);

	CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, "
	       "xid x"LPX64"-"LPX64", portal %u\n", desc->bd_md_count,
	       desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
	       desc->bd_iov_count, desc->bd_nob,
	       desc->bd_last_xid, req->rq_xid, desc->bd_portal);

	RETURN(0);
}
static ssize_t osp_md_read(const struct lu_env *env, struct dt_object *dt,
			   struct lu_buf *rbuf, loff_t *pos)
{
	struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev);
	struct dt_device *dt_dev	= &osp->opd_dt_dev;
	struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2;
	char *ptr = rbuf->lb_buf;
	struct osp_update_request *update = NULL;
	struct ptlrpc_request *req = NULL;
	struct out_read_reply *orr;
	struct ptlrpc_bulk_desc *desc;
	struct object_update_reply *reply;
	__u32 left_size;
	int nbufs;
	int i;
	int rc;
	ENTRY;

	/* Because it needs send the update buffer right away,
	 * just create an update buffer, instead of attaching the
	 * update_remote list of the thandle.  */
	update = osp_update_request_create(dt_dev);
	if (IS_ERR(update))
		GOTO(out, rc = PTR_ERR(update));

	rc = osp_update_rpc_pack(env, read, update, OUT_READ,
				 lu_object_fid(&dt->do_lu),
				 rbuf->lb_len, *pos);
	if (rc != 0) {
		CERROR("%s: cannot insert update: rc = %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name, rc);
		GOTO(out, rc);
	}

	rc = osp_prep_update_req(env, osp->opd_obd->u.cli.cl_import, update,
				 &req);
	if (rc != 0)
		GOTO(out, rc);

	nbufs = (rbuf->lb_len + OUT_BULK_BUFFER_SIZE - 1) /
					OUT_BULK_BUFFER_SIZE;
	/* allocate bulk descriptor */
	desc = ptlrpc_prep_bulk_imp(req, nbufs, 1,
				    PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KVEC,
				    MDS_BULK_PORTAL, &ptlrpc_bulk_kvec_ops);
	if (desc == NULL)
		GOTO(out, rc = -ENOMEM);

	/* split the buffer into small chunk size */
	left_size = rbuf->lb_len;
	for (i = 0; i < nbufs; i++) {
		int read_size;

		read_size = left_size > OUT_BULK_BUFFER_SIZE ?
				OUT_BULK_BUFFER_SIZE : left_size;
		desc->bd_frag_ops->add_iov_frag(desc, ptr, read_size);

		ptr += read_size;
	}

	/* This will only be called with read-only update, and these updates
	 * might be used to retrieve update log during recovery process, so
	 * it will be allowed to send during recovery process */
	req->rq_allow_replay = 1;
	req->rq_bulk_read = 1;
	/* send request to master and wait for RPC to complete */
	rc = ptlrpc_queue_wait(req);
	if (rc != 0)
		GOTO(out, rc);

	rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
					  req->rq_bulk->bd_nob_transferred);
	if (rc < 0)
		GOTO(out, rc);

	reply = req_capsule_server_sized_get(&req->rq_pill,
					     &RMF_OUT_UPDATE_REPLY,
					     OUT_UPDATE_REPLY_SIZE);

	if (reply->ourp_magic != UPDATE_REPLY_MAGIC) {
		CERROR("%s: invalid update reply magic %x expected %x:"
		       " rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name,
		       reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO);
		GOTO(out, rc = -EPROTO);
	}

	rc = object_update_result_data_get(reply, lbuf, 0);
	if (rc < 0)
		GOTO(out, rc);

	if (lbuf->lb_len < sizeof(*orr))
		GOTO(out, rc = -EPROTO);

	orr = lbuf->lb_buf;
	orr_le_to_cpu(orr, orr);
	rc = orr->orr_size;
	*pos = orr->orr_offset;
out:
	if (req != NULL)
		ptlrpc_req_finished(req);

	if (update != NULL)
		osp_update_request_destroy(update);

	RETURN(rc);
}
/**
 * Implementation of dt_index_operations::dio_lookup
 *
 * Look up record by key under a remote index object. It packs lookup update
 * into RPC, sends to the remote OUT and waits for the lookup result.
 *
 * \param[in] env	execution environment
 * \param[in] dt	index object to lookup
 * \param[out] rec	record in which to return lookup result
 * \param[in] key	key of index which will be looked up
 *
 * \retval		1 if the lookup succeeds.
 * \retval              negative errno if the lookup fails.
 */
static int osp_md_index_lookup(const struct lu_env *env, struct dt_object *dt,
			       struct dt_rec *rec, const struct dt_key *key)
{
	struct lu_buf		*lbuf	= &osp_env_info(env)->osi_lb2;
	struct osp_device	*osp	= lu2osp_dev(dt->do_lu.lo_dev);
	struct dt_device	*dt_dev	= &osp->opd_dt_dev;
	struct osp_update_request   *update;
	struct object_update_reply *reply;
	struct ptlrpc_request	   *req = NULL;
	struct lu_fid		   *fid;
	int			   rc;
	ENTRY;

	/* Because it needs send the update buffer right away,
	 * just create an update buffer, instead of attaching the
	 * update_remote list of the thandle.
	 */
	update = osp_update_request_create(dt_dev);
	if (IS_ERR(update))
		RETURN(PTR_ERR(update));

	rc = osp_update_rpc_pack(env, index_lookup, update, OUT_INDEX_LOOKUP,
				 lu_object_fid(&dt->do_lu), rec, key);
	if (rc != 0) {
		CERROR("%s: Insert update error: rc = %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name, rc);
		GOTO(out, rc);
	}

	rc = osp_remote_sync(env, osp, update, &req);
	if (rc < 0)
		GOTO(out, rc);

	reply = req_capsule_server_sized_get(&req->rq_pill,
					     &RMF_OUT_UPDATE_REPLY,
					     OUT_UPDATE_REPLY_SIZE);
	if (reply->ourp_magic != UPDATE_REPLY_MAGIC) {
		CERROR("%s: Wrong version %x expected %x: rc = %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name,
		       reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO);
		GOTO(out, rc = -EPROTO);
	}

	rc = object_update_result_data_get(reply, lbuf, 0);
	if (rc < 0)
		GOTO(out, rc);

	if (lbuf->lb_len != sizeof(*fid)) {
		CERROR("%s: lookup "DFID" %s wrong size %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name,
		       PFID(lu_object_fid(&dt->do_lu)), (char *)key,
		       (int)lbuf->lb_len);
		GOTO(out, rc = -EINVAL);
	}

	fid = lbuf->lb_buf;
	if (ptlrpc_rep_need_swab(req))
		lustre_swab_lu_fid(fid);
	if (!fid_is_sane(fid)) {
		CERROR("%s: lookup "DFID" %s invalid fid "DFID"\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name,
		       PFID(lu_object_fid(&dt->do_lu)), (char *)key, PFID(fid));
		GOTO(out, rc = -EINVAL);
	}

	memcpy(rec, fid, sizeof(*fid));

	GOTO(out, rc = 1);

out:
	if (req != NULL)
		ptlrpc_req_finished(req);

	osp_update_request_destroy(update);

	return rc;
}
/*
 * Look-up a slave index file. If the slave index isn't found:
 * - if local is set to false, we allocate a FID from FID_SEQ_QUOTA sequence and
 *   create the index.
 * - otherwise, we create the index file with a local reserved FID (see
 *   lquota_local_oid)
 *
 * \param env - is the environment passed by the caller
 * \param dev - is the backend dt_device where to look-up/create the slave index
 * \param parent - is the parent directory where to create the slave index if
 *                 it does not exist already
 * \param glb_fid - is the fid of the global index file associated with this
 *                  slave index.
 * \param uuid    - is the uuid of slave which is (re)connecting to the master
 *                  target
 * \param local   - indicate whether to use local reserved FID (LQUOTA_USR_OID
 *                  & LQUOTA_GRP_OID) for the slave index creation or to
 *                  allocate a new fid from sequence FID_SEQ_QUOTA
 *
 * \retval     - pointer to the dt_object of the slave index on success,
 *               appropriate error on failure
 */
struct dt_object *lquota_disk_slv_find_create(const struct lu_env *env,
					      struct dt_device *dev,
					      struct dt_object *parent,
					      struct lu_fid *glb_fid,
					      struct obd_uuid *uuid,
					      bool local)
{
	struct lquota_thread_info	*qti = lquota_info(env);
	struct dt_object		*slv_idx;
	int				 rc;
	ENTRY;

	LASSERT(uuid != NULL);

	CDEBUG(D_QUOTA, "lookup/create slave index file for %s\n",
	       obd_uuid2str(uuid));

	/* generate filename associated with the slave */
	rc = lquota_disk_slv_filename(glb_fid, uuid, qti->qti_buf);
	if (rc)
		RETURN(ERR_PTR(rc));

	/* Slave indexes uses the FID_SEQ_QUOTA sequence since they can be read
	 * through the network */
	qti->qti_fid.f_seq = FID_SEQ_QUOTA;
	qti->qti_fid.f_ver = 0;
	if (local) {
		int type;

		rc = lquota_extract_fid(glb_fid, NULL, NULL, &type);
		if (rc)
			RETURN(ERR_PTR(rc));

		/* use predefined fid in the reserved oid list */
		qti->qti_fid.f_oid = qtype2slv_oid(type);

		slv_idx = local_index_find_or_create_with_fid(env, dev,
							      &qti->qti_fid,
							      parent,
							      qti->qti_buf,
							      LQUOTA_MODE,
							&dt_quota_slv_features);
	} else {
		/* allocate fid dynamically if index does not exist already */
		qti->qti_fid.f_oid = LQUOTA_GENERATED_OID;

		/* lookup/create slave index file */
		slv_idx = lquota_disk_find_create(env, dev, parent,
						  &qti->qti_fid,
						  &dt_quota_slv_features,
						  qti->qti_buf);
	}

	if (IS_ERR(slv_idx))
		RETURN(slv_idx);

	/* install index operation vector */
	if (slv_idx->do_index_ops == NULL) {
		rc = slv_idx->do_ops->do_index_try(env, slv_idx,
						   &dt_quota_slv_features);
		if (rc) {
			CERROR("%s: failed to setup index operations for "DFID
			       " rc:%d\n", dev->dd_lu_dev.ld_obd->obd_name,
			       PFID(lu_object_fid(&slv_idx->do_lu)), rc);
			dt_object_put(env, slv_idx);
			slv_idx = ERR_PTR(rc);
		}
	}

	RETURN(slv_idx);
}
Exemple #6
0
/* We always need to remove the presto options before passing 
   mount options to cache FS */
struct super_block * presto_read_super(struct super_block * sb,
                                       void * data, int silent)
{
        struct file_system_type *fstype;
        struct presto_cache *cache = NULL;
        char *cache_data = NULL;
        char *cache_data_end;
        char *cache_type = NULL;
        char *fileset = NULL;
        char *channel = NULL;
        int err; 
        unsigned int minor;

        ENTRY;

        /* reserve space for the cache's data */
        PRESTO_ALLOC(cache_data, PAGE_SIZE);
        if ( !cache_data ) {
                CERROR("presto_read_super: Cannot allocate data page.\n");
                EXIT;
                goto out_err;
        }

        /* read and validate options */
        cache_data_end = presto_options(sb, data, cache_data, &cache_type, 
                                        &fileset, &channel);

        /* was there anything for the cache filesystem in the data? */
        if (cache_data_end == cache_data) {
                PRESTO_FREE(cache_data, PAGE_SIZE);
                cache_data = NULL;
        } else {
                CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data,
                       cache_data);
        }

        /* set up the cache */
        cache = presto_cache_init();
        if ( !cache ) {
                CERROR("presto_read_super: failure allocating cache.\n");
                EXIT;
                goto out_err;
        }
        cache->cache_type = cache_type;

        /* link cache to channel */ 
        minor = presto_set_channel(cache, channel);
        if (minor < 0) { 
                EXIT;
                goto out_err;
        }

        CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n",
               cache_type, fileset?fileset:"NULL", minor, cache->cache_flags);

        MOD_INC_USE_COUNT;

        /* get the filter for the cache */
        fstype = get_fs_type(cache_type);
        cache->cache_filter = filter_get_filter_fs((const char *)cache_type); 
        if ( !fstype || !cache->cache_filter) {
                CERROR("Presto: unrecognized fs type or cache type\n");
                MOD_DEC_USE_COUNT;
                EXIT;
                goto out_err;
        }

        /* can we in fact mount the cache */ 
        if ((fstype->fs_flags & FS_REQUIRES_DEV) && !sb->s_bdev) {
                CERROR("filesystem \"%s\" requires a valid block device\n",
                                cache_type);
                MOD_DEC_USE_COUNT;
                EXIT;
                goto out_err;
        }

        sb = fstype->read_super(sb, cache_data, silent);

        /* this might have been freed above */
        if (cache_data) {
                PRESTO_FREE(cache_data, PAGE_SIZE);
                cache_data = NULL;
        }

        if ( !sb ) {
                CERROR("InterMezzo: cache mount failure.\n");
                MOD_DEC_USE_COUNT;
                EXIT;
                goto out_err;
        }

        cache->cache_sb = sb;
        cache->cache_root = dget(sb->s_root);

        /* we now know the dev of the cache: hash the cache */
        presto_cache_add(cache, sb->s_dev);
        err = izo_prepare_fileset(sb->s_root, fileset); 

        filter_setup_journal_ops(cache->cache_filter, cache->cache_type); 

        /* make sure we have our own super operations: sb
           still contains the cache operations */
        filter_setup_super_ops(cache->cache_filter, sb->s_op, 
                               &presto_super_ops);
        sb->s_op = filter_c2usops(cache->cache_filter);

        /* get izo directory operations: sb->s_root->d_inode exists now */
        filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode,
                             &presto_dir_iops, &presto_dir_fops);
        filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op, 
                                &presto_dentry_ops);
        sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter);
        sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter);
        sb->s_root->d_op = filter_c2udops(cache->cache_filter);

        EXIT;
        return sb;

 out_err:
        CDEBUG(D_SUPER, "out_err called\n");
        if (cache)
                PRESTO_FREE(cache, sizeof(struct presto_cache));
        if (cache_data)
                PRESTO_FREE(cache_data, PAGE_SIZE);
        if (fileset)
                PRESTO_FREE(fileset, strlen(fileset) + 1);
        if (channel)
                PRESTO_FREE(channel, strlen(channel) + 1);
        if (cache_type)
                PRESTO_FREE(cache_type, strlen(cache_type) + 1);

        CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n",
               presto_kmemory, presto_vmemory);
        return NULL;
}
Exemple #7
0
/* Allocate new fid on passed client @seq and save it to @fid. */
int seq_client_alloc_fid(const struct lu_env *env,
			 struct lu_client_seq *seq, struct lu_fid *fid)
{
	wait_queue_t link;
	int rc;
	ENTRY;

	LASSERT(seq != NULL);
	LASSERT(fid != NULL);

	init_waitqueue_entry_current(&link);
	mutex_lock(&seq->lcs_mutex);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
		seq->lcs_fid.f_oid = seq->lcs_width;

        while (1) {
                seqno_t seqnr;

                if (!fid_is_zero(&seq->lcs_fid) &&
                    fid_oid(&seq->lcs_fid) < seq->lcs_width) {
                        /* Just bump last allocated fid and return to caller. */
                        seq->lcs_fid.f_oid += 1;
                        rc = 0;
                        break;
                }

                rc = seq_fid_alloc_prep(seq, &link);
                if (rc)
                        continue;

                rc = seq_client_alloc_seq(env, seq, &seqnr);
                if (rc) {
                        CERROR("%s: Can't allocate new sequence, "
                               "rc %d\n", seq->lcs_name, rc);
                        seq_fid_alloc_fini(seq);
			mutex_unlock(&seq->lcs_mutex);
                        RETURN(rc);
                }

                CDEBUG(D_INFO, "%s: Switch to sequence "
                       "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr);

                seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
                seq->lcs_fid.f_seq = seqnr;
                seq->lcs_fid.f_ver = 0;

                /*
                 * Inform caller that sequence switch is performed to allow it
                 * to setup FLD for it.
                 */
                rc = 1;

                seq_fid_alloc_fini(seq);
                break;
        }

        *fid = seq->lcs_fid;
	mutex_unlock(&seq->lcs_mutex);

        CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name,  PFID(fid));
        RETURN(rc);
}
/*
 * Server's incoming request callback
 */
void request_in_callback(lnet_event_t *ev)
{
	struct ptlrpc_cb_id		  *cbid = ev->md.user_ptr;
	struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
	struct ptlrpc_service_part	  *svcpt = rqbd->rqbd_svcpt;
	struct ptlrpc_service	     *service = svcpt->scp_service;
	struct ptlrpc_request	     *req;

	LASSERT (ev->type == LNET_EVENT_PUT ||
		 ev->type == LNET_EVENT_UNLINK);
	LASSERT ((char *)ev->md.start >= rqbd->rqbd_buffer);
	LASSERT ((char *)ev->md.start + ev->offset + ev->mlength <=
		 rqbd->rqbd_buffer + service->srv_buf_size);

	CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
	       "event type %d, status %d, service %s\n",
	       ev->type, ev->status, service->srv_name);

	if (ev->unlinked) {
		/* If this is the last request message to fit in the
		 * request buffer we can use the request object embedded in
		 * rqbd.  Note that if we failed to allocate a request,
		 * we'd have to re-post the rqbd, which we can't do in this
		 * context. */
		req = &rqbd->rqbd_req;
		memset(req, 0, sizeof (*req));
	} else {
		LASSERT (ev->type == LNET_EVENT_PUT);
		if (ev->status != 0) {
			/* We moaned above already... */
			return;
		}
		OBD_ALLOC_GFP(req, sizeof(*req), ALLOC_ATOMIC_TRY);
		if (req == NULL) {
			CERROR("Can't allocate incoming request descriptor: "
			       "Dropping %s RPC from %s\n",
			       service->srv_name,
			       libcfs_id2str(ev->initiator));
			return;
		}
	}

	/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
	 * flags are reset and scalars are zero.  We only set the message
	 * size to non-zero if this was a successful receive. */
	req->rq_xid = ev->match_bits;
	req->rq_reqbuf = ev->md.start + ev->offset;
	if (ev->type == LNET_EVENT_PUT && ev->status == 0)
		req->rq_reqdata_len = ev->mlength;
	do_gettimeofday(&req->rq_arrival_time);
	req->rq_peer = ev->initiator;
	req->rq_self = ev->target.nid;
	req->rq_rqbd = rqbd;
	req->rq_phase = RQ_PHASE_NEW;
	spin_lock_init(&req->rq_lock);
	INIT_LIST_HEAD(&req->rq_timed_list);
	INIT_LIST_HEAD(&req->rq_exp_list);
	atomic_set(&req->rq_refcount, 1);
	if (ev->type == LNET_EVENT_PUT)
		CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n",
		       req, req->rq_xid, ev->mlength);

	CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));

	spin_lock(&svcpt->scp_lock);

	ptlrpc_req_add_history(svcpt, req);

	if (ev->unlinked) {
		svcpt->scp_nrqbds_posted--;
		CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
		       svcpt->scp_nrqbds_posted);

		/* Normally, don't complain about 0 buffers posted; LNET won't
		 * drop incoming reqs since we set the portal lazy */
		if (test_req_buffer_pressure &&
		    ev->type != LNET_EVENT_UNLINK &&
		    svcpt->scp_nrqbds_posted == 0)
			CWARN("All %s request buffers busy\n",
			      service->srv_name);

		/* req takes over the network's ref on rqbd */
	} else {
		/* req takes a ref on rqbd */
		rqbd->rqbd_refcount++;
	}

	list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
	svcpt->scp_nreqs_incoming++;

	/* NB everything can disappear under us once the request
	 * has been queued and we unlock, so do the wake now... */
	wake_up(&svcpt->scp_waitq);

	spin_unlock(&svcpt->scp_lock);
}
Exemple #9
0
int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
               struct niobuf_local *lnb, struct obd_trans_info *oti,
               struct lustre_capa *capa)
{
    struct tgt_session_info	*tsi = tgt_ses_info(env);
    struct ofd_device	*ofd = ofd_exp(exp);
    struct ofd_thread_info	*info;
    char			*jobid;
    int			 rc = 0;

    if (*nr_local > PTLRPC_MAX_BRW_PAGES) {
        CERROR("%s: bulk has too many pages %d, which exceeds the"
               "maximum pages per RPC of %d\n",
               exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES);
        RETURN(-EPROTO);
    }

    if (tgt_ses_req(tsi) == NULL) { /* echo client case */
        LASSERT(oti != NULL);
        lu_env_refill((struct lu_env *)env);
        info = ofd_info_init(env, exp);
        ofd_oti2info(info, oti);
        jobid = oti->oti_jobid;
    } else {
        info = tsi2ofd_info(tsi);
        jobid = tsi->tsi_jobid;
    }

    LASSERT(oa != NULL);

    if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) {
        struct ofd_seq		*oseq;

        oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi));
        if (IS_ERR(oseq)) {
            CERROR("%s: Can not find seq for "DOSTID
                   ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi),
                   PTR_ERR(oseq));
            RETURN(-EINVAL);
        }

        if (oseq->os_destroys_in_progress == 0) {
            /* don't fail lookups for orphan recovery, it causes
             * later LBUGs when objects still exist during
             * precreate */
            ofd_seq_put(env, oseq);
            RETURN(-ENOENT);
        }
        ofd_seq_put(env, oseq);
    }

    LASSERT(objcount == 1);
    LASSERT(obj->ioo_bufcnt > 0);

    rc = ostid_to_fid(&info->fti_fid, &oa->o_oi, 0);
    if (unlikely(rc != 0))
        RETURN(rc);

    if (cmd == OBD_BRW_WRITE) {
        rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                           capa, CAPA_OPC_OSS_WRITE);
        if (rc == 0) {
            la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR);
            rc = ofd_preprw_write(env, exp, ofd, &info->fti_fid,
                                  &info->fti_attr, oa, objcount,
                                  obj, rnb, nr_local, lnb, jobid);
        }
    } else if (cmd == OBD_BRW_READ) {
        rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                           capa, CAPA_OPC_OSS_READ);
        if (rc == 0) {
            ofd_grant_prepare_read(env, exp, oa);
            rc = ofd_preprw_read(env, exp, ofd, &info->fti_fid,
                                 &info->fti_attr, obj->ioo_bufcnt,
                                 rnb, nr_local, lnb, jobid);
            obdo_from_la(oa, &info->fti_attr, LA_ATIME);
        }
    } else {
        CERROR("%s: wrong cmd %d received!\n",
               exp->exp_obd->obd_name, cmd);
        rc = -EPROTO;
    }
    RETURN(rc);
}
Exemple #10
0
static int ll_get_name(struct dentry *dentry, char *name,
		       struct dentry *child)
{
	struct inode *dir = d_inode(dentry);
	int rc;
	struct ll_getname_data lgd = {
		.lgd_name = name,
		.lgd_fid = ll_i2info(d_inode(child))->lli_fid,
		.ctx.actor = ll_nfs_get_name_filldir,
	};

	if (!dir || !S_ISDIR(dir->i_mode)) {
		rc = -ENOTDIR;
		goto out;
	}

	if (!dir->i_fop) {
		rc = -EINVAL;
		goto out;
	}

	mutex_lock(&dir->i_mutex);
	rc = ll_dir_read(dir, &lgd.ctx);
	mutex_unlock(&dir->i_mutex);
	if (!rc && !lgd.lgd_found)
		rc = -ENOENT;
out:
	return rc;
}

static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
				      int fh_len, int fh_type)
{
	struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;

	if (fh_type != LUSTRE_NFS_FID)
		return ERR_PTR(-EPROTO);

	return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent);
}

static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
				      int fh_len, int fh_type)
{
	struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;

	if (fh_type != LUSTRE_NFS_FID)
		return ERR_PTR(-EPROTO);

	return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
}

static struct dentry *ll_get_parent(struct dentry *dchild)
{
	struct ptlrpc_request *req = NULL;
	struct inode	  *dir = d_inode(dchild);
	struct ll_sb_info     *sbi;
	struct dentry	 *result = NULL;
	struct mdt_body       *body;
	static char	   dotdot[] = "..";
	struct md_op_data     *op_data;
	int		   rc;
	int		      lmmsize;

	LASSERT(dir && S_ISDIR(dir->i_mode));

	sbi = ll_s2sbi(dir->i_sb);

	CDEBUG(D_INFO, "getting parent for (%lu,"DFID")\n",
			dir->i_ino, PFID(ll_inode2fid(dir)));

	rc = ll_get_default_mdsize(sbi, &lmmsize);
	if (rc != 0)
		return ERR_PTR(rc);

	op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
				     strlen(dotdot), lmmsize,
				     LUSTRE_OPC_ANY, NULL);
	if (IS_ERR(op_data))
		return (void *)op_data;

	rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
	ll_finish_md_op_data(op_data);
	if (rc) {
		CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
		return ERR_PTR(rc);
	}
	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
	LASSERT(body->valid & OBD_MD_FLID);

	CDEBUG(D_INFO, "parent for "DFID" is "DFID"\n",
		PFID(ll_inode2fid(dir)), PFID(&body->fid1));

	result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);

	ptlrpc_req_finished(req);
	return result;
}

struct export_operations lustre_export_operations = {
       .get_parent = ll_get_parent,
       .encode_fh  = ll_encode_fh,
       .get_name   = ll_get_name,
	.fh_to_dentry = ll_fh_to_dentry,
	.fh_to_parent = ll_fh_to_parent,
};
Exemple #11
0
static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                            struct ofd_device *ofd, struct lu_fid *fid,
                            struct lu_attr *la, struct obdo *oa,
                            int objcount, struct obd_ioobj *obj,
                            struct niobuf_remote *rnb, int *nr_local,
                            struct niobuf_local *lnb, char *jobid)
{
    struct ofd_object	*fo;
    int			 i, j, k, rc = 0, tot_bytes = 0;

    ENTRY;
    LASSERT(env != NULL);
    LASSERT(objcount == 1);

    if (unlikely(exp->exp_obd->obd_recovering)) {
        struct ofd_thread_info *info = ofd_info(env);

        /* copied from ofd_precreate_object */
        /* XXX this should be consolidated to use the same code
         *     instead of a copy, due to the ongoing risk of bugs. */
        memset(&info->fti_attr, 0, sizeof(info->fti_attr));
        info->fti_attr.la_valid = LA_TYPE | LA_MODE;
        info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666;
        info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME;
        /* Initialize a/c/m time so any client timestamp will always
         * be newer and update the inode. ctime = 0 is also handled
         * specially in osd_inode_setattr().  See LU-221, LU-1042 */
        info->fti_attr.la_atime = 0;
        info->fti_attr.la_mtime = 0;
        info->fti_attr.la_ctime = 0;

        fo = ofd_object_find_or_create(env, ofd, fid, &info->fti_attr);
    } else {
        fo = ofd_object_find(env, ofd, fid);
    }

    if (IS_ERR(fo))
        GOTO(out, rc = PTR_ERR(fo));
    LASSERT(fo != NULL);

    ofd_read_lock(env, fo);
    if (!ofd_object_exists(fo)) {
        CERROR("%s: BRW to missing obj "DOSTID"\n",
               exp->exp_obd->obd_name, POSTID(&obj->ioo_oid));
        ofd_read_unlock(env, fo);
        ofd_object_put(env, fo);
        GOTO(out, rc = -ENOENT);
    }

    /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some
     * space back if possible */
    ofd_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);

    /* parse remote buffers to local buffers and prepare the latter */
    *nr_local = 0;
    for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
        rc = dt_bufs_get(env, ofd_object_child(fo),
                         rnb + i, lnb + j, 1,
                         ofd_object_capa(env, fo));
        if (unlikely(rc < 0))
            GOTO(err, rc);
        LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
        /* correct index for local buffers to continue with */
        for (k = 0; k < rc; k++) {
            lnb[j+k].lnb_flags = rnb[i].rnb_flags;
            if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED))
                lnb[j+k].lnb_rc = -ENOSPC;

            /* remote client can't break through quota */
            if (exp_connect_rmtclient(exp))
                lnb[j+k].lnb_flags &= ~OBD_BRW_NOQUOTA;
        }
        j += rc;
        *nr_local += rc;
        LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
        tot_bytes += rnb[i].rnb_len;
    }
    LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);

    rc = dt_write_prep(env, ofd_object_child(fo), lnb, *nr_local);
    if (unlikely(rc != 0))
        GOTO(err, rc);

    ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, tot_bytes);
    RETURN(0);
err:
    dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
    ofd_read_unlock(env, fo);
    /* ofd_grant_prepare_write() was called, so we must commit */
    ofd_grant_commit(env, exp, rc);
out:
    /* let's still process incoming grant information packed in the oa,
     * but without enforcing grant since we won't proceed with the write.
     * Just like a read request actually. */
    ofd_grant_prepare_read(env, exp, oa);
    return rc;
}
Exemple #12
0
/* backup plain llog */
int llog_backup(const struct lu_env *env, struct obd_device *obd,
		struct llog_ctxt *ctxt, struct llog_ctxt *bctxt,
		char *name, char *backup)
{
	struct llog_handle	*llh, *bllh;
	int			 rc;



	/* open original log */
	rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
	if (rc < 0) {
		/* the -ENOENT case is also reported to the caller
		 * but silently so it should handle that if needed.
		 */
		if (rc != -ENOENT)
			CERROR("%s: failed to open log %s: rc = %d\n",
			       obd->obd_name, name, rc);
		return rc;
	}

	rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
	if (rc)
		goto out_close;

	/* Make sure there's no old backup log */
	rc = llog_erase(env, bctxt, NULL, backup);
	if (rc < 0 && rc != -ENOENT)
		goto out_close;

	/* open backup log */
	rc = llog_open_create(env, bctxt, &bllh, NULL, backup);
	if (rc) {
		CERROR("%s: failed to open backup logfile %s: rc = %d\n",
		       obd->obd_name, backup, rc);
		goto out_close;
	}

	/* check that backup llog is not the same object as original one */
	if (llh->lgh_obj == bllh->lgh_obj) {
		CERROR("%s: backup llog %s to itself (%s), objects %p/%p\n",
		       obd->obd_name, name, backup, llh->lgh_obj,
		       bllh->lgh_obj);
		rc = -EEXIST;
		goto out_backup;
	}

	rc = llog_init_handle(env, bllh, LLOG_F_IS_PLAIN, NULL);
	if (rc)
		goto out_backup;

	/* Copy log record by record */
	rc = llog_process_or_fork(env, llh, llog_copy_handler, (void *)bllh,
				  NULL, false);
	if (rc)
		CERROR("%s: failed to backup log %s: rc = %d\n",
		       obd->obd_name, name, rc);
out_backup:
	llog_close(env, bllh);
out_close:
	llog_close(env, llh);
	return rc;
}
Exemple #13
0
int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
		     int flags, struct obd_uuid *uuid)
{
	struct llog_log_hdr	*llh;
	int			 rc;

	LASSERT(handle->lgh_hdr == NULL);

	llh = kzalloc(sizeof(*llh), GFP_NOFS);
	if (llh == NULL)
		return -ENOMEM;
	handle->lgh_hdr = llh;
	/* first assign flags to use llog_client_ops */
	llh->llh_flags = flags;
	rc = llog_read_header(env, handle, uuid);
	if (rc == 0) {
		if (unlikely((llh->llh_flags & LLOG_F_IS_PLAIN &&
			      flags & LLOG_F_IS_CAT) ||
			     (llh->llh_flags & LLOG_F_IS_CAT &&
			      flags & LLOG_F_IS_PLAIN))) {
			CERROR("%s: llog type is %s but initializing %s\n",
			       handle->lgh_ctxt->loc_obd->obd_name,
			       llh->llh_flags & LLOG_F_IS_CAT ?
			       "catalog" : "plain",
			       flags & LLOG_F_IS_CAT ? "catalog" : "plain");
			rc = -EINVAL;
			goto out;
		} else if (llh->llh_flags &
			   (LLOG_F_IS_PLAIN | LLOG_F_IS_CAT)) {
			/*
			 * it is possible to open llog without specifying llog
			 * type so it is taken from llh_flags
			 */
			flags = llh->llh_flags;
		} else {
			/* for some reason the llh_flags has no type set */
			CERROR("llog type is not specified!\n");
			rc = -EINVAL;
			goto out;
		}
		if (unlikely(uuid &&
			     !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) {
			CERROR("%s: llog uuid mismatch: %s/%s\n",
			       handle->lgh_ctxt->loc_obd->obd_name,
			       (char *)uuid->uuid,
			       (char *)llh->llh_tgtuuid.uuid);
			rc = -EEXIST;
			goto out;
		}
	}
	if (flags & LLOG_F_IS_CAT) {
		LASSERT(list_empty(&handle->u.chd.chd_head));
		INIT_LIST_HEAD(&handle->u.chd.chd_head);
		llh->llh_size = sizeof(struct llog_logid_rec);
	} else if (!(flags & LLOG_F_IS_PLAIN)) {
		CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
		       handle->lgh_ctxt->loc_obd->obd_name,
		       flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
		rc = -EINVAL;
	}
out:
	if (rc) {
		kfree(llh);
		handle->lgh_hdr = NULL;
	}
	return rc;
}
/**
 * Send request reply from request \a req reply buffer.
 * \a flags defines reply types
 * Returns 0 on sucess or error code
 */
int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
{
        struct ptlrpc_reply_state *rs = req->rq_reply_state;
        struct ptlrpc_connection  *conn;
        int                        rc;

        /* We must already have a reply buffer (only ptlrpc_error() may be
         * called without one). The reply generated by sptlrpc layer (e.g.
         * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
         * have a request buffer which is either the actual (swabbed) incoming
         * request, or a saved copy if this is a req saved in
         * target_queue_final_reply().
         */
        LASSERT (req->rq_no_reply == 0);
        LASSERT (req->rq_reqbuf != NULL);
        LASSERT (rs != NULL);
        LASSERT ((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult);
        LASSERT (req->rq_repmsg != NULL);
        LASSERT (req->rq_repmsg == rs->rs_msg);
        LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback);
        LASSERT (rs->rs_cb_id.cbid_arg == rs);

        /* There may be no rq_export during failover */

        if (unlikely(req->rq_export && req->rq_export->exp_obd &&
                     req->rq_export->exp_obd->obd_fail)) {
                /* Failed obd's only send ENODEV */
                req->rq_type = PTL_RPC_MSG_ERR;
                req->rq_status = -ENODEV;
                CDEBUG(D_HA, "sending ENODEV from failed obd %d\n",
                       req->rq_export->exp_obd->obd_minor);
        }

	/* In order to keep interoprability with the client (< 2.3) which
	 * doesn't have pb_jobid in ptlrpc_body, We have to shrink the
	 * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the
	 * reply buffer on client will be overflow.
	 *
	 * XXX Remove this whenver we drop the interoprability with such client.
	 */
	req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0,
					   sizeof(struct ptlrpc_body_v2), 1);

        if (req->rq_type != PTL_RPC_MSG_ERR)
                req->rq_type = PTL_RPC_MSG_REPLY;

        lustre_msg_set_type(req->rq_repmsg, req->rq_type);
	lustre_msg_set_status(req->rq_repmsg,
			      ptlrpc_status_hton(req->rq_status));
        lustre_msg_set_opc(req->rq_repmsg,
                req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);

        target_pack_pool_reply(req);

        ptlrpc_at_set_reply(req, flags);

        if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
                conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
        else
                conn = ptlrpc_connection_addref(req->rq_export->exp_connection);

        if (unlikely(conn == NULL)) {
                CERROR("not replying on NULL connection\n"); /* bug 9635 */
                return -ENOTCONN;
        }
        ptlrpc_rs_addref(rs);                   /* +1 ref for the network */

        rc = sptlrpc_svc_wrap_reply(req);
        if (unlikely(rc))
                goto out;

        req->rq_sent = cfs_time_current_sec();

        rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
                           (rs->rs_difficult && !rs->rs_no_ack) ?
                           LNET_ACK_REQ : LNET_NOACK_REQ,
			   &rs->rs_cb_id, conn,
			   ptlrpc_req2svc(req)->srv_rep_portal,
                           req->rq_xid, req->rq_reply_off);
out:
        if (unlikely(rc != 0))
                ptlrpc_req_drop_rs(req);
        ptlrpc_connection_put(conn);
        return rc;
}
Exemple #15
0
static int seq_client_rpc(struct lu_client_seq *seq,
                          struct lu_seq_range *output, __u32 opc,
                          const char *opcname)
{
	struct obd_export     *exp = seq->lcs_exp;
	struct ptlrpc_request *req;
	struct lu_seq_range   *out, *in;
	__u32                 *op;
	unsigned int           debug_mask;
	int                    rc;
	ENTRY;

	LASSERT(exp != NULL && !IS_ERR(exp));
	req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY,
					LUSTRE_MDS_VERSION, SEQ_QUERY);
	if (req == NULL)
		RETURN(-ENOMEM);

	/* Init operation code */
	op = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_OPC);
	*op = opc;

	/* Zero out input range, this is not recovery yet. */
	in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE);
	range_init(in);

	ptlrpc_request_set_replen(req);

	in->lsr_index = seq->lcs_space.lsr_index;
	if (seq->lcs_type == LUSTRE_SEQ_METADATA)
		fld_range_set_mdt(in);
	else
		fld_range_set_ost(in);

	if (opc == SEQ_ALLOC_SUPER) {
		req->rq_request_portal = SEQ_CONTROLLER_PORTAL;
		req->rq_reply_portal = MDC_REPLY_PORTAL;
		/* During allocating super sequence for data object,
		 * the current thread might hold the export of MDT0(MDT0
		 * precreating objects on this OST), and it will send the
		 * request to MDT0 here, so we can not keep resending the
		 * request here, otherwise if MDT0 is failed(umounted),
		 * it can not release the export of MDT0 */
		if (seq->lcs_type == LUSTRE_SEQ_DATA)
			req->rq_no_delay = req->rq_no_resend = 1;
		debug_mask = D_CONSOLE;
	} else {
		if (seq->lcs_type == LUSTRE_SEQ_METADATA)
			req->rq_request_portal = SEQ_METADATA_PORTAL;
		else
			req->rq_request_portal = SEQ_DATA_PORTAL;
		debug_mask = D_INFO;
	}

	ptlrpc_at_set_req_timeout(req);

	if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
		mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);

	rc = ptlrpc_queue_wait(req);

	if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
		mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
	if (rc)
		GOTO(out_req, rc);

	out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE);
	*output = *out;

	if (!range_is_sane(output)) {
		CERROR("%s: Invalid range received from server: "
		       DRANGE"\n", seq->lcs_name, PRANGE(output));
		GOTO(out_req, rc = -EINVAL);
	}

	if (range_is_exhausted(output)) {
		CERROR("%s: Range received from server is exhausted: "
		       DRANGE"]\n", seq->lcs_name, PRANGE(output));
		GOTO(out_req, rc = -EINVAL);
	}

	CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence "DRANGE"]\n",
		     seq->lcs_name, opcname, PRANGE(output));

	EXIT;
out_req:
	ptlrpc_req_finished(req);
	return rc;
}
/**
 * Send request \a request.
 * if \a noreply is set, don't expect any reply back and don't set up
 * reply buffers.
 * Returns 0 on success or error code.
 */
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
        int rc;
        int rc2;
        int mpflag = 0;
        struct ptlrpc_connection *connection;
        lnet_handle_me_t  reply_me_h;
        lnet_md_t         reply_md;
        struct obd_device *obd = request->rq_import->imp_obd;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
                RETURN(0);

        LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
        LASSERT(request->rq_wait_ctx == 0);

        /* If this is a re-transmit, we're required to have disengaged
         * cleanly from the previous attempt */
        LASSERT(!request->rq_receiving_reply);

        if (request->rq_import->imp_obd &&
            request->rq_import->imp_obd->obd_fail) {
                CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
                       request->rq_import->imp_obd->obd_name);
                /* this prevents us from waiting in ptlrpc_queue_wait */
                request->rq_err = 1;
                request->rq_status = -ENODEV;
                RETURN(-ENODEV);
        }

        connection = request->rq_import->imp_connection;

        lustre_msg_set_handle(request->rq_reqmsg,
                              &request->rq_import->imp_remote_handle);
        lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
        lustre_msg_set_conn_cnt(request->rq_reqmsg,
                                request->rq_import->imp_conn_cnt);
        lustre_msghdr_set_flags(request->rq_reqmsg,
                                request->rq_import->imp_msghdr_flags);

        if (request->rq_resend)
                lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);

        if (request->rq_memalloc)
                mpflag = cfs_memory_pressure_get_and_set();

        rc = sptlrpc_cli_wrap_request(request);
        if (rc)
                GOTO(out, rc);

        /* bulk register should be done after wrap_request() */
        if (request->rq_bulk != NULL) {
                rc = ptlrpc_register_bulk (request);
                if (rc != 0)
                        GOTO(out, rc);
        }

        if (!noreply) {
                LASSERT (request->rq_replen != 0);
                if (request->rq_repbuf == NULL) {
                        LASSERT(request->rq_repdata == NULL);
                        LASSERT(request->rq_repmsg == NULL);
                        rc = sptlrpc_cli_alloc_repbuf(request,
                                                      request->rq_replen);
                        if (rc) {
                                /* this prevents us from looping in
                                 * ptlrpc_queue_wait */
                                request->rq_err = 1;
                                request->rq_status = rc;
                                GOTO(cleanup_bulk, rc);
                        }
                } else {
                        request->rq_repdata = NULL;
                        request->rq_repmsg = NULL;
                }

                rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
                                  connection->c_peer, request->rq_xid, 0,
                                  LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
                if (rc != 0) {
                        CERROR("LNetMEAttach failed: %d\n", rc);
                        LASSERT (rc == -ENOMEM);
                        GOTO(cleanup_bulk, rc = -ENOMEM);
                }
        }

	spin_lock(&request->rq_lock);
        /* If the MD attach succeeds, there _will_ be a reply_in callback */
        request->rq_receiving_reply = !noreply;
        /* We are responsible for unlinking the reply buffer */
        request->rq_must_unlink = !noreply;
        /* Clear any flags that may be present from previous sends. */
        request->rq_replied = 0;
        request->rq_err = 0;
        request->rq_timedout = 0;
        request->rq_net_err = 0;
        request->rq_resend = 0;
        request->rq_restart = 0;
        request->rq_reply_truncate = 0;
	spin_unlock(&request->rq_lock);

        if (!noreply) {
                reply_md.start     = request->rq_repbuf;
                reply_md.length    = request->rq_repbuf_len;
                /* Allow multiple early replies */
                reply_md.threshold = LNET_MD_THRESH_INF;
                /* Manage remote for early replies */
                reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
                        LNET_MD_MANAGE_REMOTE |
                        LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
                reply_md.user_ptr  = &request->rq_reply_cbid;
                reply_md.eq_handle = ptlrpc_eq_h;

                /* We must see the unlink callback to unset rq_must_unlink,
                   so we can't auto-unlink */
                rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
                                  &request->rq_reply_md_h);
                if (rc != 0) {
                        CERROR("LNetMDAttach failed: %d\n", rc);
                        LASSERT (rc == -ENOMEM);
			spin_lock(&request->rq_lock);
			/* ...but the MD attach didn't succeed... */
			request->rq_receiving_reply = 0;
			spin_unlock(&request->rq_lock);
                        GOTO(cleanup_me, rc = -ENOMEM);
                }

                CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
                       ", portal %u\n",
                       request->rq_repbuf_len, request->rq_xid,
                       request->rq_reply_portal);
        }

        /* add references on request for request_out_callback */
        ptlrpc_request_addref(request);
        if (obd->obd_svc_stats != NULL)
                lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
                        cfs_atomic_read(&request->rq_import->imp_inflight));

        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);

        cfs_gettimeofday(&request->rq_arrival_time);
        request->rq_sent = cfs_time_current_sec();
        /* We give the server rq_timeout secs to process the req, and
           add the network latency for our local timeout. */
        request->rq_deadline = request->rq_sent + request->rq_timeout +
                ptlrpc_at_get_net_latency(request);

        ptlrpc_pinger_sending_on_import(request->rq_import);

        DEBUG_REQ(D_INFO, request, "send flg=%x",
                  lustre_msg_get_flags(request->rq_reqmsg));
        rc = ptl_send_buf(&request->rq_req_md_h,
                          request->rq_reqbuf, request->rq_reqdata_len,
                          LNET_NOACK_REQ, &request->rq_req_cbid,
                          connection,
                          request->rq_request_portal,
                          request->rq_xid, 0);
        if (rc == 0)
                GOTO(out, rc);

        ptlrpc_req_finished(request);
        if (noreply)
                GOTO(out, rc);

 cleanup_me:
        /* MEUnlink is safe; the PUT didn't even get off the ground, and
         * nobody apart from the PUT's target has the right nid+XID to
         * access the reply buffer. */
        rc2 = LNetMEUnlink(reply_me_h);
        LASSERT (rc2 == 0);
        /* UNLINKED callback called synchronously */
        LASSERT(!request->rq_receiving_reply);

 cleanup_bulk:
        /* We do sync unlink here as there was no real transfer here so
         * the chance to have long unlink to sluggish net is smaller here. */
        ptlrpc_unregister_bulk(request, 0);
 out:
        if (request->rq_memalloc)
                cfs_memory_pressure_restore(mpflag);
        return rc;
}
Exemple #17
0
/**
 * Create a memory descriptor and attach it to a ME
 *
 * \param meh A handle for a ME to associate the new MD with.
 * \param umd Provides initial values for the user-visible parts of a MD.
 * Other than its use for initialization, there is no linkage between this
 * structure and the MD maintained by the LNet.
 * \param unlink A flag to indicate whether the MD is automatically unlinked
 * when it becomes inactive, either because the operation threshold drops to
 * zero or because the available memory becomes less than \a umd.max_size.
 * (Note that the check for unlinking a MD only occurs after the completion
 * of a successful operation on the MD.) The value LNET_UNLINK enables auto
 * unlinking; the value LNET_RETAIN disables it.
 * \param handle On successful returns, a handle to the newly created MD is
 * saved here. This handle can be used later in LNetMDUnlink().
 *
 * \retval 0       On success.
 * \retval -EINVAL If \a umd is not valid.
 * \retval -ENOMEM If new MD cannot be allocated.
 * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
 * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
 * calling LNetInvalidateHandle() on it.
 * \retval -EBUSY  If the ME pointed to by \a meh is already associated with
 * a MD.
 */
int
LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
	     lnet_unlink_t unlink, lnet_handle_md_t *handle)
{
	CFS_LIST_HEAD		(matches);
	CFS_LIST_HEAD		(drops);
	struct lnet_me		*me;
	struct lnet_libmd	*md;
	int			cpt;
	int			rc;

        LASSERT (the_lnet.ln_init);
        LASSERT (the_lnet.ln_refcount > 0);

        if (lnet_md_validate(&umd) != 0)
                return -EINVAL;

        if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) {
                CERROR("Invalid option: no MD_OP set\n");
                return -EINVAL;
        }

        md = lnet_md_alloc(&umd);
        if (md == NULL)
                return -ENOMEM;

	rc = lnet_md_build(md, &umd, unlink);
	cpt = lnet_cpt_of_cookie(meh.cookie);

	lnet_res_lock(cpt);
	if (rc != 0)
		goto failed;

	me = lnet_handle2me(&meh);
	if (me == NULL)
		rc = -ENOENT;
	else if (me->me_md != NULL)
		rc = -EBUSY;
	else
		rc = lnet_md_link(md, umd.eq_handle, cpt);

	if (rc != 0)
		goto failed;

	/* attach this MD to portal of ME and check if it matches any
	 * blocked msgs on this portal */
	lnet_ptl_attach_md(me, md, &matches, &drops);

	lnet_md2handle(handle, md);

	lnet_res_unlock(cpt);

	lnet_drop_delayed_msg_list(&drops, "Bad match");
	lnet_recv_delayed_msg_list(&matches);

	return 0;

 failed:
	lnet_md_free_locked(md);

	lnet_res_unlock(cpt);
	return rc;
}
Exemple #18
0
static int lsm_unpackmd_common(struct lov_obd *lov,
			       struct lov_stripe_md *lsm,
			       struct lov_mds_md *lmm,
			       struct lov_ost_data_v1 *objects)
{
	struct lov_oinfo *loi;
	loff_t min_stripe_maxbytes = 0;
	loff_t lov_bytes;
	unsigned int stripe_count;
	unsigned int i;

	/*
	 * This supposes lov_mds_md_v1/v3 first fields are
	 * are the same
	 */
	lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
	lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
	lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
	lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
	lsm->lsm_pool_name[0] = '\0';

	stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count;

	for (i = 0; i < stripe_count; i++) {
		loi = lsm->lsm_oinfo[i];
		ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
		loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
		loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
		if (lov_oinfo_is_dummy(loi))
			continue;

		if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
		    !lov2obd(lov)->obd_process_conf) {
			CERROR("%s: OST index %d more than OST count %d\n",
			       (char*)lov->desc.ld_uuid.uuid,
			       loi->loi_ost_idx, lov->desc.ld_tgt_count);
			lov_dump_lmm_v1(D_WARNING, lmm);
			return -EINVAL;
		}

		if (lov->lov_tgts[loi->loi_ost_idx] == NULL) {
			CERROR("%s: OST index %d missing\n",
			       (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
			lov_dump_lmm_v1(D_WARNING, lmm);
			continue;
		}

		lov_bytes = lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx]);
		if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes)
			min_stripe_maxbytes = lov_bytes;
	}

	if (min_stripe_maxbytes == 0)
		min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;

	stripe_count = lsm->lsm_stripe_count ?: lov->desc.ld_tgt_count;
	lov_bytes = min_stripe_maxbytes * stripe_count;

	if (lov_bytes < min_stripe_maxbytes) /* handle overflow */
		lsm->lsm_maxbytes = MAX_LFS_FILESIZE;
	else
		lsm->lsm_maxbytes = lov_bytes;

	return 0;
}
Exemple #19
0
static int llog_process_thread(void *arg)
{
	struct llog_process_info	*lpi = arg;
	struct llog_handle		*loghandle = lpi->lpi_loghandle;
	struct llog_log_hdr		*llh = loghandle->lgh_hdr;
	struct llog_process_cat_data	*cd  = lpi->lpi_catdata;
	char				*buf;
	__u64				 cur_offset = LLOG_CHUNK_SIZE;
	__u64				 last_offset;
	int				 rc = 0, index = 1, last_index;
	int				 saved_index = 0;
	int				 last_called_index = 0;

	ENTRY;

        LASSERT(llh);

        OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
        if (!buf) {
                lpi->lpi_rc = -ENOMEM;
		RETURN(0);
        }

        if (cd != NULL) {
                last_called_index = cd->lpcd_first_idx;
                index = cd->lpcd_first_idx + 1;
        }
        if (cd != NULL && cd->lpcd_last_idx)
                last_index = cd->lpcd_last_idx;
        else
                last_index = LLOG_BITMAP_BYTES * 8 - 1;

        while (rc == 0) {
                struct llog_rec_hdr *rec;

                /* skip records not set in bitmap */
                while (index <= last_index &&
                       !ext2_test_bit(index, llh->llh_bitmap))
                        ++index;

                LASSERT(index <= last_index + 1);
                if (index == last_index + 1)
                        break;
repeat:
                CDEBUG(D_OTHER, "index: %d last_index %d\n",
                       index, last_index);

                /* get the buf with our target record; avoid old garbage */
                memset(buf, 0, LLOG_CHUNK_SIZE);
                last_offset = cur_offset;
		rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
				     index, &cur_offset, buf, LLOG_CHUNK_SIZE);
                if (rc)
                        GOTO(out, rc);

                /* NB: when rec->lrh_len is accessed it is already swabbed
                 * since it is used at the "end" of the loop and the rec
                 * swabbing is done at the beginning of the loop. */
                for (rec = (struct llog_rec_hdr *)buf;
                     (char *)rec < buf + LLOG_CHUNK_SIZE;
                     rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){

                        CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
                               rec, rec->lrh_type);

                        if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
				lustre_swab_llog_rec(rec);

                        CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
                               rec->lrh_type, rec->lrh_index);

			if (rec->lrh_index == 0) {
				/* probably another rec just got added? */
				if (index <= loghandle->lgh_last_idx)
					GOTO(repeat, rc = 0);
				GOTO(out, rc = 0); /* no more records */
			}
			if (rec->lrh_len == 0 ||
			    rec->lrh_len > LLOG_CHUNK_SIZE) {
                                CWARN("invalid length %d in llog record for "
                                      "index %d/%d\n", rec->lrh_len,
                                      rec->lrh_index, index);
                                GOTO(out, rc = -EINVAL);
                        }

                        if (rec->lrh_index < index) {
                                CDEBUG(D_OTHER, "skipping lrh_index %d\n",
                                       rec->lrh_index);
                                continue;
                        }

                        CDEBUG(D_OTHER,
                               "lrh_index: %d lrh_len: %d (%d remains)\n",
                               rec->lrh_index, rec->lrh_len,
                               (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));

                        loghandle->lgh_cur_idx = rec->lrh_index;
                        loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
                                                    last_offset;

                        /* if set, process the callback on this record */
                        if (ext2_test_bit(index, llh->llh_bitmap)) {
				rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
						 lpi->lpi_cbdata);
				last_called_index = index;
				if (rc == LLOG_PROC_BREAK) {
					GOTO(out, rc);
				} else if (rc == LLOG_DEL_RECORD) {
					rc = llog_cancel_rec(lpi->lpi_env,
							     loghandle,
							     rec->lrh_index);
                                }
                                if (rc)
                                        GOTO(out, rc);
                        } else {
                                CDEBUG(D_OTHER, "Skipped index %d\n", index);
                        }

                        /* next record, still in buffer? */
                        ++index;
                        if (index > last_index)
                                GOTO(out, rc = 0);
                }
        }

out:
        if (cd != NULL)
                cd->lpcd_last_idx = last_called_index;

	if (unlikely(rc == -EIO)) {
		/* something bad happened to the processing, probably I/O
		 * error or the log got corrupted..
		 * to be able to finally release the log we discard any
		 * remaining bits in the header */
		CERROR("llog found corrupted\n");
		while (index <= last_index) {
			if (ext2_test_bit(index, llh->llh_bitmap) != 0)
				llog_cancel_rec(lpi->lpi_env, loghandle, index);
			index++;
		}
		rc = 0;
	}

	OBD_FREE(buf, LLOG_CHUNK_SIZE);
        lpi->lpi_rc = rc;
        return 0;
}