Beispiel #1
0
/**
 * Write fid into last_oid/last_seq file.
 **/
int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
				 struct lu_fid *fid, int sync)
{
	struct osp_thread_info  *oti = osp_env_info(env);
	struct lu_buf	   *lb_oid = &oti->osi_lb;
	struct lu_buf	   *lb_oseq = &oti->osi_lb2;
	loff_t		   oid_off;
	loff_t		   oseq_off;
	struct thandle	  *th;
	int		      rc;
	ENTRY;

	/* Note: through f_oid is only 32bits, it will also write
	 * 64 bits for oid to keep compatiblity with the previous
	 * version. */
	lb_oid->lb_buf = &fid->f_oid;
	lb_oid->lb_len = sizeof(obd_id);
	oid_off = sizeof(obd_id) * osp->opd_index;

	lb_oseq->lb_buf = &fid->f_seq;
	lb_oseq->lb_len = sizeof(obd_id);
	oseq_off = sizeof(obd_id) * osp->opd_index;

	th = dt_trans_create(env, osp->opd_storage);
	if (IS_ERR(th))
		RETURN(PTR_ERR(th));

	th->th_sync |= sync;
	rc = dt_declare_record_write(env, osp->opd_last_used_oid_file,
				     lb_oid->lb_len, oid_off, th);
	if (rc != 0)
		GOTO(out, rc);

	rc = dt_declare_record_write(env, osp->opd_last_used_seq_file,
				     lb_oseq->lb_len, oseq_off, th);
	if (rc != 0)
		GOTO(out, rc);

	rc = dt_trans_start_local(env, osp->opd_storage, th);
	if (rc != 0)
		GOTO(out, rc);

	rc = dt_record_write(env, osp->opd_last_used_oid_file, lb_oid,
			     &oid_off, th);
	if (rc != 0) {
		CERROR("%s: can not write to last seq file: rc = %d\n",
			osp->opd_obd->obd_name, rc);
		GOTO(out, rc);
	}
	rc = dt_record_write(env, osp->opd_last_used_seq_file, lb_oseq,
			     &oseq_off, th);
	if (rc) {
		CERROR("%s: can not write to last seq file: rc = %d\n",
			osp->opd_obd->obd_name, rc);
		GOTO(out, rc);
	}
out:
	dt_trans_stop(env, osp->opd_storage, th);
	RETURN(rc);
}
Beispiel #2
0
int osp_init_pre_fid(struct osp_device *osp)
{
	struct lu_env		env;
	struct osp_thread_info	*osi;
	struct lu_client_seq	*cli_seq;
	struct lu_fid		*last_fid;
	int			rc;
	ENTRY;

	LASSERT(osp->opd_pre != NULL);

	/* Return if last_used fid has been initialized */
	if (!fid_is_zero(&osp->opd_last_used_fid))
		RETURN(0);

	rc = lu_env_init(&env, osp->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags);
	if (rc) {
		CERROR("%s: init env error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		RETURN(rc);
	}

	osi = osp_env_info(&env);
	last_fid = &osi->osi_fid;
	fid_zero(last_fid);
	/* For a freshed fs, it will allocate a new sequence first */
	if (osp_is_fid_client(osp) && osp->opd_group != 0) {
		cli_seq = osp->opd_obd->u.cli.cl_seq;
		rc = seq_client_get_seq(&env, cli_seq, &last_fid->f_seq);
		if (rc != 0) {
			CERROR("%s: alloc fid error: rc = %d\n",
			       osp->opd_obd->obd_name, rc);
			GOTO(out, rc);
		}
	} else {
		last_fid->f_seq = fid_idif_seq(0, osp->opd_index);
	}
	last_fid->f_oid = 1;
	last_fid->f_ver = 0;

	spin_lock(&osp->opd_pre_lock);
	osp->opd_last_used_fid = *last_fid;
	osp->opd_pre_used_fid = *last_fid;
	osp->opd_pre_last_created_fid = *last_fid;
	spin_unlock(&osp->opd_pre_lock);
	rc = osp_write_last_oid_seq_files(&env, osp, last_fid, 1);
	if (rc != 0) {
		CERROR("%s: write fid error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		GOTO(out, rc);
	}
out:
	lu_env_fini(&env);
	RETURN(rc);
}
Beispiel #3
0
static void osp_object_assign_fid(const struct lu_env *env,
				 struct osp_device *d, struct osp_object *o)
{
	struct osp_thread_info *osi = osp_env_info(env);

	LASSERT(fid_is_zero(lu_object_fid(&o->opo_obj.do_lu)));
	LASSERT(o->opo_reserved);
	o->opo_reserved = 0;

	osp_precreate_get_fid(env, d, &osi->osi_fid);

	lu_object_assign_fid(env, &o->opo_obj.do_lu, &osi->osi_fid);
}
Beispiel #4
0
static inline int osp_objs_precreated(const struct lu_env *env,
				      struct osp_device *osp)
{
	struct lu_fid *fid1 = &osp->opd_pre_last_created_fid;
	struct lu_fid *fid2 = &osp->opd_pre_used_fid;

	LASSERTF(fid_seq(fid1) == fid_seq(fid2),
		 "Created fid"DFID" Next fid "DFID"\n", PFID(fid1), PFID(fid2));

	if (fid_is_idif(fid1)) {
		struct ost_id *oi1 = &osp_env_info(env)->osi_oi;
		struct ost_id *oi2 = &osp_env_info(env)->osi_oi2;

		LASSERT(fid_is_idif(fid1) && fid_is_idif(fid2));
		fid_to_ostid(fid1, oi1);
		fid_to_ostid(fid2, oi2);
		LASSERT(ostid_id(oi1) >= ostid_id(oi2));

		return ostid_id(oi1) - ostid_id(oi2);
	}

	return fid_oid(fid1) - fid_oid(fid2);
}
Beispiel #5
0
int osp_precreate_rollover_new_seq(struct lu_env *env, struct osp_device *osp)
{
	struct lu_fid	*fid = &osp_env_info(env)->osi_fid;
	struct lu_fid	*last_fid = &osp->opd_last_used_fid;
	int		rc;
	ENTRY;

	rc = seq_client_get_seq(env, osp->opd_obd->u.cli.cl_seq, &fid->f_seq);
	if (rc != 0) {
		CERROR("%s: alloc fid error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		RETURN(rc);
	}

	fid->f_oid = 1;
	fid->f_ver = 0;
	LASSERTF(fid_seq(fid) != fid_seq(last_fid),
		 "fid "DFID", last_fid "DFID"\n", PFID(fid),
		 PFID(last_fid));

	rc = osp_write_last_oid_seq_files(env, osp, fid, 1);
	if (rc != 0) {
		CERROR("%s: Can not update oid/seq file: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		RETURN(rc);
	}

	LCONSOLE_INFO("%s: update sequence from "LPX64" to "LPX64"\n",
		      osp->opd_obd->obd_name, fid_seq(last_fid),
		      fid_seq(fid));
	/* Update last_xxx to the new seq */
	spin_lock(&osp->opd_pre_lock);
	osp->opd_last_used_fid = *fid;
	osp->opd_gap_start_fid = *fid;
	osp->opd_pre_used_fid = *fid;
	osp->opd_pre_last_created_fid = *fid;
	spin_unlock(&osp->opd_pre_lock);

	RETURN(rc);
}
Beispiel #6
0
/**
 * alloc fids for precreation.
 * rc = 0 Success, @grow is the count of real allocation.
 * rc = 1 Current seq is used up.
 * rc < 0 Other error.
 **/
static int osp_precreate_fids(const struct lu_env *env, struct osp_device *osp,
			      struct lu_fid *fid, int *grow)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	__u64			end;
	int			i = 0;

	if (fid_is_idif(fid)) {
		struct lu_fid	*last_fid;
		struct ost_id	*oi = &osi->osi_oi;

		spin_lock(&osp->opd_pre_lock);
		last_fid = &osp->opd_pre_last_created_fid;
		fid_to_ostid(last_fid, oi);
		end = min(ostid_id(oi) + *grow, IDIF_MAX_OID);
		*grow = end - ostid_id(oi);
		ostid_set_id(oi, ostid_id(oi) + *grow);
		spin_unlock(&osp->opd_pre_lock);

		if (*grow == 0)
			return 1;

		ostid_to_fid(fid, oi, osp->opd_index);
		return 0;
	}

	spin_lock(&osp->opd_pre_lock);
	*fid = osp->opd_pre_last_created_fid;
	end = fid->f_oid;
	end = min((end + *grow), (__u64)LUSTRE_DATA_SEQ_MAX_WIDTH);
	*grow = end - fid->f_oid;
	fid->f_oid += end - fid->f_oid;
	spin_unlock(&osp->opd_pre_lock);

	CDEBUG(D_INFO, "Expect %d, actual %d ["DFID" -- "DFID"]\n",
	       *grow, i, PFID(fid), PFID(&osp->opd_pre_last_created_fid));

	return *grow > 0 ? 0 : 1;
}
Beispiel #7
0
static int osp_object_init(const struct lu_env *env, struct lu_object *o,
			   const struct lu_object_conf *conf)
{
	struct osp_object	*po = lu2osp_obj(o);
	int			rc = 0;
	ENTRY;

	if (is_ost_obj(o)) {
		po->opo_obj.do_ops = &osp_obj_ops;
	} else {
		struct lu_attr		*la = &osp_env_info(env)->osi_attr;

		po->opo_obj.do_ops = &osp_md_obj_ops;
		o->lo_header->loh_attr |= LOHA_REMOTE;
		rc = po->opo_obj.do_ops->do_attr_get(env, lu2dt_obj(o),
						     la, NULL);
		if (rc == 0)
			o->lo_header->loh_attr |=
				LOHA_EXISTS | (la->la_mode & S_IFMT);
		if (rc == -ENOENT)
			rc = 0;
	}
	RETURN(rc);
}
Beispiel #8
0
/**
 * asks OST to clean precreate orphans
 * and gets next id for new objects
 */
static int osp_precreate_cleanup_orphans(struct lu_env *env,
					 struct osp_device *d)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	struct lu_fid		*last_fid = &osi->osi_fid;
	struct ptlrpc_request	*req = NULL;
	struct obd_import	*imp;
	struct ost_body		*body;
	struct l_wait_info	 lwi = { 0 };
	int			 update_status = 0;
	int			 rc;
	int			 diff;

	ENTRY;

	/*
	 * wait for local recovery to finish, so we can cleanup orphans
	 * orphans are all objects since "last used" (assigned), but
	 * there might be objects reserved and in some cases they won't
	 * be used. we can't cleanup them till we're sure they won't be
	 * used. also can't we allow new reservations because they may
	 * end up getting orphans being cleaned up below. so we block
	 * new reservations and wait till all reserved objects either
	 * user or released.
	 */
	spin_lock(&d->opd_pre_lock);
	d->opd_pre_recovering = 1;
	spin_unlock(&d->opd_pre_lock);
	/*
	 * The locking above makes sure the opd_pre_reserved check below will
	 * catch all osp_precreate_reserve() calls who find
	 * "!opd_pre_recovering".
	 */
	l_wait_event(d->opd_pre_waitq,
		     (!d->opd_pre_reserved && d->opd_recovery_completed) ||
		     !osp_precreate_running(d) || d->opd_got_disconnected,
		     &lwi);
	if (!osp_precreate_running(d) || d->opd_got_disconnected)
		GOTO(out, rc = -EAGAIN);

	CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n",
	       d->opd_obd->obd_name, PFID(&d->opd_last_used_fid));

	*last_fid = d->opd_last_used_fid;
	/* The OSP should already get the valid seq now */
	LASSERT(!fid_is_zero(last_fid));
	if (fid_oid(&d->opd_last_used_fid) < 2) {
		/* lastfid looks strange... ask OST */
		rc = osp_get_lastfid_from_ost(env, d);
		if (rc)
			GOTO(out, rc);
	}

	imp = d->opd_obd->u.cli.cl_import;
	LASSERT(imp);

	req = ptlrpc_request_alloc(imp, &RQF_OST_CREATE);
	if (req == NULL)
		GOTO(out, rc = -ENOMEM);

	rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE);
	if (rc) {
		ptlrpc_request_free(req);
		req = NULL;
		GOTO(out, rc);
	}

	body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
	if (body == NULL)
		GOTO(out, rc = -EPROTO);

	body->oa.o_flags = OBD_FL_DELORPHAN;
	body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP;

	fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi);

	ptlrpc_request_set_replen(req);

	/* Don't resend the delorphan req */
	req->rq_no_resend = req->rq_no_delay = 1;

	rc = ptlrpc_queue_wait(req);
	if (rc) {
		update_status = 1;
		GOTO(out, rc);
	}

	body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
	if (body == NULL)
		GOTO(out, rc = -EPROTO);

	/*
	 * OST provides us with id new pool starts from in body->oa.o_id
	 */
	ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index);

	spin_lock(&d->opd_pre_lock);
	diff = lu_fid_diff(&d->opd_last_used_fid, last_fid);
	if (diff > 0) {
		d->opd_pre_grow_count = OST_MIN_PRECREATE + diff;
		d->opd_pre_last_created_fid = d->opd_last_used_fid;
	} else {
		d->opd_pre_grow_count = OST_MIN_PRECREATE;
		d->opd_pre_last_created_fid = *last_fid;
	}
	/*
	 * This empties the pre-creation pool and effectively blocks any new
	 * reservations.
	 */
	LASSERT(fid_oid(&d->opd_pre_last_created_fid) <=
		LUSTRE_DATA_SEQ_MAX_WIDTH);
	d->opd_pre_used_fid = d->opd_pre_last_created_fid;
	d->opd_pre_grow_slow = 0;
	spin_unlock(&d->opd_pre_lock);

	CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID
	       "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid),
	       PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_last_used_fid));
out:
	if (req)
		ptlrpc_req_finished(req);

	spin_lock(&d->opd_pre_lock);
	d->opd_pre_recovering = 0;
	spin_unlock(&d->opd_pre_lock);

	/*
	 * If rc is zero, the pre-creation window should have been emptied.
	 * Since waking up the herd would be useless without pre-created
	 * objects, we defer the signal to osp_precreate_send() in that case.
	 */
	if (rc != 0) {
		if (update_status) {
			CERROR("%s: cannot cleanup orphans: rc = %d\n",
			       d->opd_obd->obd_name, rc);
			/* we can't proceed from here, OST seem to
			 * be in a bad shape, better to wait for
			 * a new instance of the server and repeat
			 * from the beginning. notify possible waiters
			 * this OSP isn't quite functional yet */
			osp_pre_update_status(d, rc);
		} else {
			wake_up(&d->opd_pre_user_waitq);
		}
	}

	RETURN(rc);
}
Beispiel #9
0
static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
{
	struct osp_thread_info	*oti = osp_env_info(env);
	struct ptlrpc_request	*req;
	struct obd_import	*imp;
	struct ost_body		*body;
	int			 rc, grow, diff;
	struct lu_fid		*fid = &oti->osi_fid;
	ENTRY;

	/* don't precreate new objects till OST healthy and has free space */
	if (unlikely(d->opd_pre_status)) {
		CDEBUG(D_INFO, "%s: don't send new precreate: rc = %d\n",
		       d->opd_obd->obd_name, d->opd_pre_status);
		RETURN(0);
	}

	/*
	 * if not connection/initialization is compeleted, ignore
	 */
	imp = d->opd_obd->u.cli.cl_import;
	LASSERT(imp);

	req = ptlrpc_request_alloc(imp, &RQF_OST_CREATE);
	if (req == NULL)
		RETURN(-ENOMEM);
	req->rq_request_portal = OST_CREATE_PORTAL;
	/* we should not resend create request - anyway we will have delorphan
	 * and kill these objects */
	req->rq_no_delay = req->rq_no_resend = 1;

	rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE);
	if (rc) {
		ptlrpc_request_free(req);
		RETURN(rc);
	}

	spin_lock(&d->opd_pre_lock);
	if (d->opd_pre_grow_count > d->opd_pre_max_grow_count / 2)
		d->opd_pre_grow_count = d->opd_pre_max_grow_count / 2;
	grow = d->opd_pre_grow_count;
	spin_unlock(&d->opd_pre_lock);

	body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
	LASSERT(body);

	*fid = d->opd_pre_last_created_fid;
	rc = osp_precreate_fids(env, d, fid, &grow);
	if (rc == 1) {
		/* Current seq has been used up*/
		if (!osp_is_fid_client(d)) {
			osp_pre_update_status(d, -ENOSPC);
			rc = -ENOSPC;
		}
		wake_up(&d->opd_pre_waitq);
		GOTO(out_req, rc);
	}

	if (!osp_is_fid_client(d)) {
		/* Non-FID client will always send seq 0 because of
		 * compatiblity */
		LASSERTF(fid_is_idif(fid), "Invalid fid "DFID"\n", PFID(fid));
		fid->f_seq = 0;
	}

	fid_to_ostid(fid, &body->oa.o_oi);
	body->oa.o_valid = OBD_MD_FLGROUP;

	ptlrpc_request_set_replen(req);

	rc = ptlrpc_queue_wait(req);
	if (rc) {
		CERROR("%s: can't precreate: rc = %d\n", d->opd_obd->obd_name,
		       rc);
		GOTO(out_req, rc);
	}
	LASSERT(req->rq_transno == 0);

	body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
	if (body == NULL)
		GOTO(out_req, rc = -EPROTO);

	ostid_to_fid(fid, &body->oa.o_oi, d->opd_index);
	LASSERTF(lu_fid_diff(fid, &d->opd_pre_used_fid) > 0,
		 "reply fid "DFID" pre used fid "DFID"\n", PFID(fid),
		 PFID(&d->opd_pre_used_fid));

	diff = lu_fid_diff(fid, &d->opd_pre_last_created_fid);

	spin_lock(&d->opd_pre_lock);
	if (diff < grow) {
		/* the OST has not managed to create all the
		 * objects we asked for */
		d->opd_pre_grow_count = max(diff, OST_MIN_PRECREATE);
		d->opd_pre_grow_slow = 1;
	} else {
		/* the OST is able to keep up with the work,
		 * we could consider increasing grow_count
		 * next time if needed */
		d->opd_pre_grow_slow = 0;
	}

	d->opd_pre_last_created_fid = *fid;
	spin_unlock(&d->opd_pre_lock);

	CDEBUG(D_HA, "%s: current precreated pool: "DFID"-"DFID"\n",
	       d->opd_obd->obd_name, PFID(&d->opd_pre_used_fid),
	       PFID(&d->opd_pre_last_created_fid));
out_req:
	/* now we can wakeup all users awaiting for objects */
	osp_pre_update_status(d, rc);
	wake_up(&d->opd_pre_user_waitq);

	ptlrpc_req_finished(req);
	RETURN(rc);
}
/**
 * Implementation of dt_index_operations::dio_lookup
 *
 * Look up record by key under a remote index object. It packs lookup update
 * into RPC, sends to the remote OUT and waits for the lookup result.
 *
 * \param[in] env	execution environment
 * \param[in] dt	index object to lookup
 * \param[out] rec	record in which to return lookup result
 * \param[in] key	key of index which will be looked up
 *
 * \retval		1 if the lookup succeeds.
 * \retval              negative errno if the lookup fails.
 */
static int osp_md_index_lookup(const struct lu_env *env, struct dt_object *dt,
			       struct dt_rec *rec, const struct dt_key *key)
{
	struct lu_buf		*lbuf	= &osp_env_info(env)->osi_lb2;
	struct osp_device	*osp	= lu2osp_dev(dt->do_lu.lo_dev);
	struct dt_device	*dt_dev	= &osp->opd_dt_dev;
	struct osp_update_request   *update;
	struct object_update_reply *reply;
	struct ptlrpc_request	   *req = NULL;
	struct lu_fid		   *fid;
	int			   rc;
	ENTRY;

	/* Because it needs send the update buffer right away,
	 * just create an update buffer, instead of attaching the
	 * update_remote list of the thandle.
	 */
	update = osp_update_request_create(dt_dev);
	if (IS_ERR(update))
		RETURN(PTR_ERR(update));

	rc = osp_update_rpc_pack(env, index_lookup, update, OUT_INDEX_LOOKUP,
				 lu_object_fid(&dt->do_lu), rec, key);
	if (rc != 0) {
		CERROR("%s: Insert update error: rc = %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name, rc);
		GOTO(out, rc);
	}

	rc = osp_remote_sync(env, osp, update, &req);
	if (rc < 0)
		GOTO(out, rc);

	reply = req_capsule_server_sized_get(&req->rq_pill,
					     &RMF_OUT_UPDATE_REPLY,
					     OUT_UPDATE_REPLY_SIZE);
	if (reply->ourp_magic != UPDATE_REPLY_MAGIC) {
		CERROR("%s: Wrong version %x expected %x: rc = %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name,
		       reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO);
		GOTO(out, rc = -EPROTO);
	}

	rc = object_update_result_data_get(reply, lbuf, 0);
	if (rc < 0)
		GOTO(out, rc);

	if (lbuf->lb_len != sizeof(*fid)) {
		CERROR("%s: lookup "DFID" %s wrong size %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name,
		       PFID(lu_object_fid(&dt->do_lu)), (char *)key,
		       (int)lbuf->lb_len);
		GOTO(out, rc = -EINVAL);
	}

	fid = lbuf->lb_buf;
	if (ptlrpc_rep_need_swab(req))
		lustre_swab_lu_fid(fid);
	if (!fid_is_sane(fid)) {
		CERROR("%s: lookup "DFID" %s invalid fid "DFID"\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name,
		       PFID(lu_object_fid(&dt->do_lu)), (char *)key, PFID(fid));
		GOTO(out, rc = -EINVAL);
	}

	memcpy(rec, fid, sizeof(*fid));

	GOTO(out, rc = 1);

out:
	if (req != NULL)
		ptlrpc_req_finished(req);

	osp_update_request_destroy(update);

	return rc;
}
static ssize_t osp_md_read(const struct lu_env *env, struct dt_object *dt,
			   struct lu_buf *rbuf, loff_t *pos)
{
	struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev);
	struct dt_device *dt_dev	= &osp->opd_dt_dev;
	struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2;
	char *ptr = rbuf->lb_buf;
	struct osp_update_request *update;
	struct ptlrpc_request *req = NULL;
	struct out_read_reply *orr;
	struct ptlrpc_bulk_desc *desc;
	struct object_update_reply *reply;
	__u32 left_size;
	int nbufs;
	int i;
	int rc;
	ENTRY;

	/* Because it needs send the update buffer right away,
	 * just create an update buffer, instead of attaching the
	 * update_remote list of the thandle.  */
	update = osp_update_request_create(dt_dev);
	if (IS_ERR(update))
		RETURN(PTR_ERR(update));

	rc = osp_update_rpc_pack(env, read, update, OUT_READ,
				 lu_object_fid(&dt->do_lu),
				 rbuf->lb_len, *pos);
	if (rc != 0) {
		CERROR("%s: cannot insert update: rc = %d\n",
		       dt_dev->dd_lu_dev.ld_obd->obd_name, rc);
		GOTO(out_update, rc);
	}

	CDEBUG(D_INFO, "%s "DFID" read offset %llu size %zu\n",
	       dt_dev->dd_lu_dev.ld_obd->obd_name,
	       PFID(lu_object_fid(&dt->do_lu)), *pos, rbuf->lb_len);
	rc = osp_prep_update_req(env, osp->opd_obd->u.cli.cl_import, update,
				 &req);
	if (rc != 0)
		GOTO(out_update, rc);

	nbufs = (rbuf->lb_len + OUT_BULK_BUFFER_SIZE - 1) /
					OUT_BULK_BUFFER_SIZE;
	/* allocate bulk descriptor */
	desc = ptlrpc_prep_bulk_imp(req, nbufs, 1,
				    PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KVEC,
				    MDS_BULK_PORTAL, &ptlrpc_bulk_kvec_ops);
	if (desc == NULL)
		GOTO(out, rc = -ENOMEM);

	/* split the buffer into small chunk size */
	left_size = rbuf->lb_len;
	for (i = 0; i < nbufs; i++) {
		int read_size;

		read_size = left_size > OUT_BULK_BUFFER_SIZE ?
				OUT_BULK_BUFFER_SIZE : left_size;
		desc->bd_frag_ops->add_iov_frag(desc, ptr, read_size);

		ptr += read_size;
	}

	/* This will only be called with read-only update, and these updates
	 * might be used to retrieve update log during recovery process, so
	 * it will be allowed to send during recovery process */
	req->rq_allow_replay = 1;
	req->rq_bulk_read = 1;
	/* send request to master and wait for RPC to complete */
	rc = ptlrpc_queue_wait(req);
	if (rc != 0)
		GOTO(out, rc);

	rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
					  req->rq_bulk->bd_nob_transferred);
	if (rc < 0)
		GOTO(out, rc);

	reply = req_capsule_server_sized_get(&req->rq_pill,
					     &RMF_OUT_UPDATE_REPLY,
					     OUT_UPDATE_REPLY_SIZE);

	if (reply->ourp_magic != UPDATE_REPLY_MAGIC) {
		CERROR("%s: invalid update reply magic %x expected %x:"
		       " rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name,
		       reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO);
		GOTO(out, rc = -EPROTO);
	}

	rc = object_update_result_data_get(reply, lbuf, 0);
	if (rc < 0)
		GOTO(out, rc);

	if (lbuf->lb_len < sizeof(*orr))
		GOTO(out, rc = -EPROTO);

	orr = lbuf->lb_buf;
	orr_le_to_cpu(orr, orr);
	rc = orr->orr_size;
	*pos = orr->orr_offset;
out:
	ptlrpc_req_finished(req);

out_update:
	osp_update_request_destroy(update);

	RETURN(rc);
}
Beispiel #12
0
static int osp_object_create(const struct lu_env *env, struct dt_object *dt,
			     struct lu_attr *attr,
			     struct dt_allocation_hint *hint,
			     struct dt_object_format *dof, struct thandle *th)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	struct osp_device	*d = lu2osp_dev(dt->do_lu.lo_dev);
	struct osp_object	*o = dt2osp_obj(dt);
	int			rc = 0;
	struct lu_fid		*fid = &osi->osi_fid;
	ENTRY;

	if (o->opo_reserved) {
		/* regular case, fid is assigned holding trunsaction open */
		 osp_object_assign_fid(env, d, o);
	}

	memcpy(fid, lu_object_fid(&dt->do_lu), sizeof(*fid));

	LASSERTF(fid_is_sane(fid), "fid for osp_obj %p is insane"DFID"!\n",
		 osp_obj, PFID(fid));

	if (!o->opo_reserved) {
		/* special case, id was assigned outside of transaction
		 * see comments in osp_declare_attr_set */
		spin_lock(&d->opd_pre_lock);
		osp_update_last_fid(d, fid);
		spin_unlock(&d->opd_pre_lock);
	}

	CDEBUG(D_INODE, "fid for osp_obj %p is "DFID"!\n", osp_obj, PFID(fid));

	/* If the precreate ends, it means it will be ready to rollover to
	 * the new sequence soon, all the creation should be synchronized,
	 * otherwise during replay, the replay fid will be inconsistent with
	 * last_used/create fid */
	if (osp_precreate_end_seq(env, d) && osp_is_fid_client(d))
		th->th_sync = 1;

	/*
	 * it's OK if the import is inactive by this moment - id was created
	 * by OST earlier, we just need to maintain it consistently on the disk
	 * once import is reconnected, OSP will claim this and other objects
	 * used and OST either keep them, if they exist or recreate
	 */

	/* we might have lost precreated objects */
	if (unlikely(d->opd_gap_count) > 0) {
		spin_lock(&d->opd_pre_lock);
		if (d->opd_gap_count > 0) {
			int count = d->opd_gap_count;

			ostid_set_id(&osi->osi_oi,
				     fid_oid(&d->opd_gap_start_fid));
			d->opd_gap_count = 0;
			spin_unlock(&d->opd_pre_lock);

			CDEBUG(D_HA, "Writting gap "DFID"+%d in llog\n",
			       PFID(&d->opd_gap_start_fid), count);
			/* real gap handling is disabled intil ORI-692 will be
			 * fixed, now we only report gaps */
		} else {
			spin_unlock(&d->opd_pre_lock);
		}
	}

	/* new object, the very first ->attr_set()
	 * initializing attributes needs no logging */
	o->opo_new = 1;

	/* Only need update last_used oid file, seq file will only be update
	 * during seq rollover */
	osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off,
			   &d->opd_last_used_fid.f_oid, d->opd_index);

	rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb,
			     &osi->osi_off, th);

	CDEBUG(D_HA, "%s: Wrote last used FID: "DFID", index %d: %d\n",
	       d->opd_obd->obd_name, PFID(fid), d->opd_index, rc);

	RETURN(rc);
}
Beispiel #13
0
static int osp_declare_object_create(const struct lu_env *env,
				     struct dt_object *dt,
				     struct lu_attr *attr,
				     struct dt_allocation_hint *hint,
				     struct dt_object_format *dof,
				     struct thandle *th)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	struct osp_device	*d = lu2osp_dev(dt->do_lu.lo_dev);
	struct osp_object	*o = dt2osp_obj(dt);
	const struct lu_fid	*fid;
	int			 rc = 0;

	ENTRY;

	/* should happen to non-0 OSP only so that at least one object
	 * has been already declared in the scenario and LOD should
	 * cleanup that */
	if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL) && d->opd_index == 1)
		RETURN(-ENOSPC);

	LASSERT(d->opd_last_used_oid_file);
	fid = lu_object_fid(&dt->do_lu);

	/*
	 * There can be gaps in precreated ids and record to unlink llog
	 * XXX: we do not handle gaps yet, implemented before solution
	 *	was found to be racy, so we disabled that. there is no
	 *	point in making useless but expensive llog declaration.
	 */
	/* rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th); */

	if (unlikely(!fid_is_zero(fid))) {
		/* replay case: caller knows fid */
		osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
		rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
					     sizeof(osi->osi_id), osi->osi_off,
					     th);
		RETURN(rc);
	}

	/*
	 * in declaration we need to reserve object so that we don't block
	 * awaiting precreation RPC to complete
	 */
	rc = osp_precreate_reserve(env, d);
	/*
	 * we also need to declare update to local "last used id" file for
	 * recovery if object isn't used for a reason, we need to release
	 * reservation, this can be made in osd_object_release()
	 */
	if (rc == 0) {
		/* mark id is reserved: in create we don't want to talk
		 * to OST */
		LASSERT(o->opo_reserved == 0);
		o->opo_reserved = 1;

		/* common for all OSPs file hystorically */
		osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
		rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
					     sizeof(osi->osi_id), osi->osi_off,
					     th);
	} else {
		/* not needed in the cache anymore */
		set_bit(LU_OBJECT_HEARD_BANSHEE,
			    &dt->do_lu.lo_header->loh_flags);
	}
	RETURN(rc);
}