Exemplo n.º 1
0
int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
			  obd_id id, struct ofd_seq *oseq, int nr, int sync)
{
	struct ofd_thread_info	*info = ofd_info(env);
	struct ofd_object	*fo = NULL;
	struct dt_object	*next;
	struct thandle		*th;
	struct ofd_object	**batch;
	struct lu_fid		*fid = &info->fti_fid;
	obd_id			 tmp;
	int			 rc;
	int			 i;
	int			 objects = 0;
	int			 nr_saved = nr;

	ENTRY;

	/* Don't create objects beyond the valid range for this SEQ */
	if (unlikely(fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
		     (id + nr) >= IDIF_MAX_OID)) {
		CERROR("%s:"DOSTID" hit the IDIF_MAX_OID (1<<48)!\n",
		       ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
		RETURN(rc = -ENOSPC);
	} else if (unlikely(!fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
			    (id + nr) >= OBIF_MAX_OID)) {
		CERROR("%s:"DOSTID" hit the OBIF_MAX_OID (1<<32)!\n",
		       ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
		RETURN(rc = -ENOSPC);
	}

	OBD_ALLOC(batch, nr_saved * sizeof(struct ofd_object *));
	if (batch == NULL)
		RETURN(-ENOMEM);

	info->fti_attr.la_valid = LA_TYPE | LA_MODE;
	/*
	 * We mark object SUID+SGID to flag it for accepting UID+GID from
	 * client on first write.  Currently the permission bits on the OST are
	 * never used, so this is OK.
	 */
	info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666;
	info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);

	/* Initialize a/c/m time so any client timestamp will always
	 * be newer and update the inode. ctime = 0 is also handled
	 * specially in osd_inode_setattr(). See LU-221, LU-1042 */
	info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME;
	info->fti_attr.la_atime = 0;
	info->fti_attr.la_mtime = 0;
	info->fti_attr.la_ctime = 0;

	LASSERT(id != 0);

	/* prepare objects */
	*fid = *lu_object_fid(&oseq->os_lastid_obj->do_lu);
	for (i = 0; i < nr; i++) {
		rc = fid_set_id(fid, id + i);
		if (rc != 0) {
			if (i == 0)
				GOTO(out, rc);

			nr = i;
			break;
		}

		fo = ofd_object_find(env, ofd, fid);
		if (IS_ERR(fo)) {
			if (i == 0)
				GOTO(out, rc = PTR_ERR(fo));

			nr = i;
			break;
		}

		ofd_write_lock(env, fo);
		batch[i] = fo;
	}
	info->fti_buf.lb_buf = &tmp;
	info->fti_buf.lb_len = sizeof(tmp);
	info->fti_off = 0;

	th = ofd_trans_create(env, ofd);
	if (IS_ERR(th))
		GOTO(out, rc = PTR_ERR(th));

	th->th_sync |= sync;

	rc = dt_declare_record_write(env, oseq->os_lastid_obj, &info->fti_buf,
				     info->fti_off, th);
	if (rc)
		GOTO(trans_stop, rc);

	for (i = 0; i < nr; i++) {
		fo = batch[i];
		LASSERT(fo);

		if (unlikely(ofd_object_exists(fo))) {
			/* object may exist being re-created by write replay */
			CDEBUG(D_INODE, "object "LPX64"/"LPX64" exists: "
			       DFID"\n", ostid_seq(&oseq->os_oi), id,
			       PFID(lu_object_fid(&fo->ofo_obj.do_lu)));
			continue;
		}

		next = ofd_object_child(fo);
		LASSERT(next != NULL);

		rc = dt_declare_create(env, next, &info->fti_attr, NULL,
				       &info->fti_dof, th);
		if (rc) {
			nr = i;
			break;
		}
	}

	rc = dt_trans_start_local(env, ofd->ofd_osd, th);
	if (rc)
		GOTO(trans_stop, rc);

	CDEBUG(D_OTHER, "%s: create new object "DFID" nr %d\n",
	       ofd_name(ofd), PFID(fid), nr);

	LASSERT(nr > 0);

	 /* When the LFSCK scanning the whole device to verify the LAST_ID file
	  * consistency, it will load the last_id into RAM firstly, and compare
	  * the last_id with each OST-object's ID. If the later one is larger,
	  * then it will regard the LAST_ID file crashed. But during the LFSCK
	  * scanning, the OFD may continue to create new OST-objects. Those new
	  * created OST-objects will have larger IDs than the LFSCK known ones.
	  * So from the LFSCK view, it needs to re-load the last_id from disk
	  * file, and if the latest last_id is still smaller than the object's
	  * ID, then the LAST_ID file is real crashed.
	  *
	  * To make above mechanism to work, before OFD pre-create OST-objects,
	  * it needs to update the LAST_ID file firstly, otherwise, the LFSCK
	  * may cannot get latest last_id although new OST-object created. */
	if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_SKIP_LASTID)) {
		tmp = cpu_to_le64(id + nr - 1);
		dt_write_lock(env, oseq->os_lastid_obj, 0);
		rc = dt_record_write(env, oseq->os_lastid_obj,
				     &info->fti_buf, &info->fti_off, th);
		dt_write_unlock(env, oseq->os_lastid_obj);
		if (rc != 0)
			GOTO(trans_stop, rc);
	}

	for (i = 0; i < nr; i++) {
		fo = batch[i];
		LASSERT(fo);

		/* Only the new created objects need to be recorded. */
		if (ofd->ofd_osd->dd_record_fid_accessed) {
			lfsck_pack_rfa(&ofd_info(env)->fti_lr,
				       lu_object_fid(&fo->ofo_obj.do_lu));
			lfsck_in_notify(env, ofd->ofd_osd,
					&ofd_info(env)->fti_lr);
		}

		if (likely(!ofd_object_exists(fo) &&
			   !OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING))) {
			next = ofd_object_child(fo);
			LASSERT(next != NULL);

			rc = dt_create(env, next, &info->fti_attr, NULL,
				       &info->fti_dof, th);
			if (rc)
				break;
			LASSERT(ofd_object_exists(fo));
		}
		ofd_seq_last_oid_set(oseq, id + i);
	}

	objects = i;
	/* NOT all the wanted objects have been created,
	 * set the LAST_ID as the real created. */
	if (unlikely(objects < nr)) {
		int rc1;

		info->fti_off = 0;
		tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
		dt_write_lock(env, oseq->os_lastid_obj, 0);
		rc1 = dt_record_write(env, oseq->os_lastid_obj,
				      &info->fti_buf, &info->fti_off, th);
		dt_write_unlock(env, oseq->os_lastid_obj);
		if (rc1 != 0)
			CERROR("%s: fail to reset the LAST_ID for seq ("LPX64
			       ") from "LPU64" to "LPU64"\n", ofd_name(ofd),
			       ostid_seq(&oseq->os_oi), id + nr - 1,
			       ofd_seq_last_oid(oseq));
	}

trans_stop:
	ofd_trans_stop(env, ofd, th, rc);
out:
	for (i = 0; i < nr_saved; i++) {
		fo = batch[i];
		if (fo) {
			ofd_write_unlock(env, fo);
			ofd_object_put(env, fo);
		}
	}
	OBD_FREE(batch, nr_saved * sizeof(struct ofd_object *));

	CDEBUG((objects == 0 && rc == 0) ? D_ERROR : D_OTHER,
	       "created %d/%d objects: %d\n", objects, nr_saved, rc);

	LASSERT(ergo(objects == 0, rc < 0));
	RETURN(objects > 0 ? objects : rc);
}
Exemplo n.º 2
0
/**
 * Object updates between Targets. Because all the updates has been
 * dis-assemblied into object updates at sender side, so OUT will
 * call OSD API directly to execute these updates.
 *
 * In DNE phase I all of the updates in the request need to be executed
 * in one transaction, and the transaction has to be synchronously.
 *
 * Please refer to lustre/include/lustre/lustre_idl.h for req/reply
 * format.
 */
int out_handle(struct tgt_session_info *tsi)
{
	const struct lu_env		*env = tsi->tsi_env;
	struct tgt_thread_info		*tti = tgt_th_info(env);
	struct thandle_exec_args	*ta = &tti->tti_tea;
	struct req_capsule		*pill = tsi->tsi_pill;
	struct dt_device		*dt = tsi->tsi_tgt->lut_bottom;
	struct out_update_header	*ouh;
	struct out_update_buffer	*oub = NULL;
	struct object_update		*update;
	struct object_update_reply	*reply;
	struct ptlrpc_bulk_desc		*desc = NULL;
	struct l_wait_info		lwi;
	void				**update_bufs;
	int				current_batchid = -1;
	__u32				update_buf_count;
	unsigned int			i;
	unsigned int			reply_index = 0;
	int				rc = 0;
	int				rc1 = 0;
	int				ouh_size, reply_size;
	int				updates;
	ENTRY;

	req_capsule_set(pill, &RQF_OUT_UPDATE);
	ouh_size = req_capsule_get_size(pill, &RMF_OUT_UPDATE_HEADER,
					RCL_CLIENT);
	if (ouh_size <= 0)
		RETURN(err_serious(-EPROTO));

	ouh = req_capsule_client_get(pill, &RMF_OUT_UPDATE_HEADER);
	if (ouh == NULL)
		RETURN(err_serious(-EPROTO));

	if (ouh->ouh_magic != OUT_UPDATE_HEADER_MAGIC) {
		CERROR("%s: invalid update buffer magic %x expect %x: "
		       "rc = %d\n", tgt_name(tsi->tsi_tgt), ouh->ouh_magic,
		       UPDATE_REQUEST_MAGIC, -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	update_buf_count = ouh->ouh_count;
	if (update_buf_count == 0)
		RETURN(err_serious(-EPROTO));

	OBD_ALLOC(update_bufs, sizeof(*update_bufs) * update_buf_count);
	if (update_bufs == NULL)
		RETURN(err_serious(-ENOMEM));

	if (ouh->ouh_inline_length > 0) {
		update_bufs[0] = ouh->ouh_inline_data;
	} else {
		struct out_update_buffer *tmp;

		oub = req_capsule_client_get(pill, &RMF_OUT_UPDATE_BUF);
		if (oub == NULL)
			GOTO(out_free, rc = err_serious(-EPROTO));

		desc = ptlrpc_prep_bulk_exp(pill->rc_req, update_buf_count,
					    PTLRPC_BULK_OPS_COUNT,
					    PTLRPC_BULK_GET_SINK |
					    PTLRPC_BULK_BUF_KVEC,
					    MDS_BULK_PORTAL,
					    &ptlrpc_bulk_kvec_ops);
		if (desc == NULL)
			GOTO(out_free, rc = err_serious(-ENOMEM));

		tmp = oub;
		for (i = 0; i < update_buf_count; i++, tmp++) {
			if (tmp->oub_size >= OUT_MAXREQSIZE)
				GOTO(out_free, rc = err_serious(-EPROTO));

			OBD_ALLOC(update_bufs[i], tmp->oub_size);
			if (update_bufs[i] == NULL)
				GOTO(out_free, rc = err_serious(-ENOMEM));

			desc->bd_frag_ops->add_iov_frag(desc, update_bufs[i],
							tmp->oub_size);
		}

		pill->rc_req->rq_bulk_write = 1;
		rc = sptlrpc_svc_prep_bulk(pill->rc_req, desc);
		if (rc != 0)
			GOTO(out_free, rc = err_serious(rc));

		rc = target_bulk_io(pill->rc_req->rq_export, desc, &lwi);
		if (rc < 0)
			GOTO(out_free, rc = err_serious(rc));
	}
	/* validate the request and calculate the total update count and
	 * set it to reply */
	reply_size = 0;
	updates = 0;
	for (i = 0; i < update_buf_count; i++) {
		struct object_update_request	*our;
		int				 j;

		our = update_bufs[i];
		if (ptlrpc_req_need_swab(pill->rc_req))
			lustre_swab_object_update_request(our);

		if (our->ourq_magic != UPDATE_REQUEST_MAGIC) {
			CERROR("%s: invalid update buffer magic %x"
			       " expect %x: rc = %d\n",
			       tgt_name(tsi->tsi_tgt), our->ourq_magic,
			       UPDATE_REQUEST_MAGIC, -EPROTO);
			GOTO(out_free, rc = err_serious(-EPROTO));
		}
		updates += our->ourq_count;

		/* need to calculate reply size */
		for (j = 0; j < our->ourq_count; j++) {
			update = object_update_request_get(our, j, NULL);
			if (update == NULL)
				GOTO(out, rc = err_serious(-EPROTO));
			if (ptlrpc_req_need_swab(pill->rc_req))
				lustre_swab_object_update(update);

			if (!fid_is_sane(&update->ou_fid)) {
				CERROR("%s: invalid FID "DFID": rc = %d\n",
				       tgt_name(tsi->tsi_tgt),
				       PFID(&update->ou_fid), -EPROTO);
				GOTO(out, rc = err_serious(-EPROTO));
			}

			/* XXX: what ou_result_size can be considered safe? */

			reply_size += sizeof(reply->ourp_lens[0]);
			reply_size += sizeof(struct object_update_result);
			reply_size += update->ou_result_size;
		}
 	}
	reply_size += sizeof(*reply);

	if (unlikely(reply_size > ouh->ouh_reply_size)) {
		CERROR("%s: too small reply buf %u for %u, need %u at least\n",
		       tgt_name(tsi->tsi_tgt), ouh->ouh_reply_size,
		       updates, reply_size);
		GOTO(out_free, rc = err_serious(-EPROTO));
	}

	req_capsule_set_size(pill, &RMF_OUT_UPDATE_REPLY, RCL_SERVER,
			     ouh->ouh_reply_size);
	rc = req_capsule_server_pack(pill);
	if (rc != 0) {
		CERROR("%s: Can't pack response: rc = %d\n",
		       tgt_name(tsi->tsi_tgt), rc);
		GOTO(out_free, rc = err_serious(-EPROTO));
	}

	/* Prepare the update reply buffer */
	reply = req_capsule_server_get(pill, &RMF_OUT_UPDATE_REPLY);
	if (reply == NULL)
		GOTO(out_free, rc = -EPROTO);
	reply->ourp_magic = UPDATE_REPLY_MAGIC;
	reply->ourp_count = updates;
	tti->tti_u.update.tti_update_reply = reply;
	tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
 
	/* Walk through updates in the request to execute them */
	for (i = 0; i < update_buf_count; i++) {
		struct tgt_handler	*h;
		struct dt_object	*dt_obj;
		int			update_count;
		struct object_update_request *our;
		int			j;

		our = update_bufs[i];
		update_count = our->ourq_count;
		for (j = 0; j < update_count; j++) {
			update = object_update_request_get(our, j, NULL);

			dt_obj = dt_locate(env, dt, &update->ou_fid);
			if (IS_ERR(dt_obj))
				GOTO(out, rc = PTR_ERR(dt_obj));

			if (dt->dd_record_fid_accessed) {
				lfsck_pack_rfa(&tti->tti_lr,
					       lu_object_fid(&dt_obj->do_lu),
					       LE_FID_ACCESSED,
					       LFSCK_TYPE_LAYOUT);
				tgt_lfsck_in_notify(env, dt, &tti->tti_lr,
						    NULL);
			}

			tti->tti_u.update.tti_dt_object = dt_obj;
			tti->tti_u.update.tti_update = update;
			tti->tti_u.update.tti_update_reply_index = reply_index;

			h = out_handler_find(update->ou_type);
			if (unlikely(h == NULL)) {
				CERROR("%s: unsupported opc: 0x%x\n",
				       tgt_name(tsi->tsi_tgt), update->ou_type);
				GOTO(next, rc = -ENOTSUPP);
			}

			/* Check resend case only for modifying RPC */
			if (h->th_flags & MUTABOR) {
				struct ptlrpc_request *req = tgt_ses_req(tsi);

				if (out_check_resent(env, dt, dt_obj, req,
						     out_reconstruct, reply,
						     reply_index))
					GOTO(next, rc = 0);
			}

			/* start transaction for modification RPC only */
			if (h->th_flags & MUTABOR && current_batchid == -1) {
				current_batchid = update->ou_batchid;
				rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
				if (rc != 0)
					GOTO(next, rc);

				if (update->ou_flags & UPDATE_FL_SYNC)
					ta->ta_handle->th_sync = 1;
			}

			/* Stop the current update transaction, if the update
			 * has different batchid, or read-only update */
			if (((current_batchid != update->ou_batchid) ||
			     !(h->th_flags & MUTABOR)) &&
			     ta->ta_handle != NULL) {
				rc = out_tx_end(env, ta, rc);
				current_batchid = -1;
				if (rc != 0)
					GOTO(next, rc);

				/* start a new transaction if needed */
				if (h->th_flags & MUTABOR) {
					rc = out_tx_start(env, dt, ta,
							  tsi->tsi_exp);
					if (rc != 0)
						GOTO(next, rc);
					if (update->ou_flags & UPDATE_FL_SYNC)
						ta->ta_handle->th_sync = 1;
					current_batchid = update->ou_batchid;
				}
			}

			rc = h->th_act(tsi);
next:
			reply_index++;
			lu_object_put(env, &dt_obj->do_lu);
			if (rc < 0)
				GOTO(out, rc);
		}
	}
out:
	if (current_batchid != -1) {
		rc1 = out_tx_end(env, ta, rc);
		if (rc == 0)
			rc = rc1;
	}

out_free:
	if (update_bufs != NULL) {
		if (oub != NULL) {
			for (i = 0; i < update_buf_count; i++, oub++) {
				if (update_bufs[i] != NULL)
					OBD_FREE(update_bufs[i], oub->oub_size);
			}
		}

		OBD_FREE(update_bufs, sizeof(*update_bufs) * update_buf_count);
	}

	if (desc != NULL)
		ptlrpc_free_bulk(desc);

	RETURN(rc);
}
Exemplo n.º 3
0
/**
 * Object updates between Targets. Because all the updates has been
 * dis-assemblied into object updates at sender side, so OUT will
 * call OSD API directly to execute these updates.
 *
 * In DNE phase I all of the updates in the request need to be executed
 * in one transaction, and the transaction has to be synchronously.
 *
 * Please refer to lustre/include/lustre/lustre_idl.h for req/reply
 * format.
 */
int out_handle(struct tgt_session_info *tsi)
{
	const struct lu_env		*env = tsi->tsi_env;
	struct tgt_thread_info		*tti = tgt_th_info(env);
	struct thandle_exec_args	*ta = &tti->tti_tea;
	struct req_capsule		*pill = tsi->tsi_pill;
	struct dt_device		*dt = tsi->tsi_tgt->lut_bottom;
	struct object_update_request	*ureq;
	struct object_update		*update;
	struct object_update_reply	*reply;
	int				 bufsize;
	int				 count;
	int				 old_batchid = -1;
	int				 i;
	int				 rc = 0;
	int				 rc1 = 0;

	ENTRY;

	req_capsule_set(pill, &RQF_OUT_UPDATE);
	ureq = req_capsule_client_get(pill, &RMF_OUT_UPDATE);
	if (ureq == NULL) {
		CERROR("%s: No buf!: rc = %d\n", tgt_name(tsi->tsi_tgt),
		       -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	bufsize = req_capsule_get_size(pill, &RMF_OUT_UPDATE, RCL_CLIENT);
	if (bufsize != object_update_request_size(ureq)) {
		CERROR("%s: invalid bufsize %d: rc = %d\n",
		       tgt_name(tsi->tsi_tgt), bufsize, -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	if (ureq->ourq_magic != UPDATE_REQUEST_MAGIC) {
		CERROR("%s: invalid update buffer magic %x expect %x: "
		       "rc = %d\n", tgt_name(tsi->tsi_tgt), ureq->ourq_magic,
		       UPDATE_REQUEST_MAGIC, -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	count = ureq->ourq_count;
	if (count <= 0) {
		CERROR("%s: empty update: rc = %d\n", tgt_name(tsi->tsi_tgt),
		       -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	req_capsule_set_size(pill, &RMF_OUT_UPDATE_REPLY, RCL_SERVER,
			     OUT_UPDATE_REPLY_SIZE);
	rc = req_capsule_server_pack(pill);
	if (rc != 0) {
		CERROR("%s: Can't pack response: rc = %d\n",
		       tgt_name(tsi->tsi_tgt), rc);
		RETURN(rc);
	}

	/* Prepare the update reply buffer */
	reply = req_capsule_server_get(pill, &RMF_OUT_UPDATE_REPLY);
	if (reply == NULL)
		RETURN(err_serious(-EPROTO));
	object_update_reply_init(reply, count);
	tti->tti_u.update.tti_update_reply = reply;

	rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
	if (rc != 0)
		RETURN(rc);

	tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));

	/* Walk through updates in the request to execute them synchronously */
	for (i = 0; i < count; i++) {
		struct tgt_handler	*h;
		struct dt_object	*dt_obj;

		update = object_update_request_get(ureq, i, NULL);
		if (update == NULL)
			GOTO(out, rc = -EPROTO);

		if (ptlrpc_req_need_swab(pill->rc_req))
			lustre_swab_object_update(update);

		if (old_batchid == -1) {
			old_batchid = update->ou_batchid;
		} else if (old_batchid != update->ou_batchid) {
			/* Stop the current update transaction,
			 * create a new one */
			rc = out_tx_end(env, ta);
			if (rc < 0)
				RETURN(rc);

			rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
			if (rc != 0)
				RETURN(rc);
			old_batchid = update->ou_batchid;
		}

		if (!fid_is_sane(&update->ou_fid)) {
			CERROR("%s: invalid FID "DFID": rc = %d\n",
			       tgt_name(tsi->tsi_tgt), PFID(&update->ou_fid),
			       -EPROTO);
			GOTO(out, rc = err_serious(-EPROTO));
		}

		dt_obj = dt_locate(env, dt, &update->ou_fid);
		if (IS_ERR(dt_obj))
			GOTO(out, rc = PTR_ERR(dt_obj));

		if (dt->dd_record_fid_accessed) {
			lfsck_pack_rfa(&tti->tti_lr,
				       lu_object_fid(&dt_obj->do_lu));
			tgt_lfsck_in_notify(env, dt, &tti->tti_lr);
		}

		tti->tti_u.update.tti_dt_object = dt_obj;
		tti->tti_u.update.tti_update = update;
		tti->tti_u.update.tti_update_reply_index = i;

		h = out_handler_find(update->ou_type);
		if (likely(h != NULL)) {
			/* For real modification RPC, check if the update
			 * has been executed */
			if (h->th_flags & MUTABOR) {
				struct ptlrpc_request *req = tgt_ses_req(tsi);

				if (out_check_resent(env, dt, dt_obj, req,
						     out_reconstruct, reply, i))
					GOTO(next, rc);
			}

			rc = h->th_act(tsi);
		} else {
			CERROR("%s: The unsupported opc: 0x%x\n",
			       tgt_name(tsi->tsi_tgt), update->ou_type);
			lu_object_put(env, &dt_obj->do_lu);
			GOTO(out, rc = -ENOTSUPP);
		}
next:
		lu_object_put(env, &dt_obj->do_lu);
		if (rc < 0)
			GOTO(out, rc);
	}
out:
	rc1 = out_tx_end(env, ta);
	if (rc == 0)
		rc = rc1;
	RETURN(rc);
}