Beispiel #1
0
static int llog_osd_pad(const struct lu_env *env, struct dt_object *o,
			loff_t *off, int len, int index, struct thandle *th)
{
	struct llog_thread_info	*lgi = llog_info(env);
	int			 rc;

	LASSERT(th);
	LASSERT(off);
	LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);

	lgi->lgi_tail.lrt_len = lgi->lgi_lrh.lrh_len = len;
	lgi->lgi_tail.lrt_index = lgi->lgi_lrh.lrh_index = index;
	lgi->lgi_lrh.lrh_type = LLOG_PAD_MAGIC;

	lgi->lgi_buf.lb_buf = &lgi->lgi_lrh;
	lgi->lgi_buf.lb_len = sizeof(lgi->lgi_lrh);
	dt_write_lock(env, o, 0);
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc) {
		CERROR("%s: error writing padding record: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);
		GOTO(out, rc);
	}

	lgi->lgi_buf.lb_buf = &lgi->lgi_tail;
	lgi->lgi_buf.lb_len = sizeof(lgi->lgi_tail);
	*off += len - sizeof(lgi->lgi_lrh) - sizeof(lgi->lgi_tail);
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc)
		CERROR("%s: error writing padding record: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);
out:
	dt_write_unlock(env, o);
	return rc;
}
Beispiel #2
0
/**
 * Write the special file which contains the list of llog catalogs IDs
 *
 * This function writes the CATALOG file which contains the array of llog
 * catalogs IDs. It is used mostly to store OSP llogs indexed by OST/MDT
 * number.
 *
 * \param[in]  env	execution environment
 * \param[in]  d	corresponding storage device
 * \param[in]  idx	position to start from, usually OST/MDT index
 * \param[in]  count	how many catalog IDs to write
 * \param[out] idarray	the buffer with the data to write.
 * \param[in]  fid	LLOG_CATALOGS_OID for CATALOG object
 *
 * \retval		0 on successful write of catalog IDs
 * \retval		negative value on error
 */
int llog_osd_put_cat_list(const struct lu_env *env, struct dt_device *d,
			  int idx, int count, struct llog_catid *idarray,
			  const struct lu_fid *fid)
{
	struct llog_thread_info	*lgi = llog_info(env);
	struct dt_object	*o = NULL;
	struct thandle		*th;
	int			 rc, size;

	if (count == 0)
		RETURN(0);

	LASSERT(d);

	size = sizeof(*idarray) * count;
	lgi->lgi_off = idx * sizeof(*idarray);
	lgi->lgi_fid = *fid;

	o = dt_locate(env, d, &lgi->lgi_fid);
	if (IS_ERR(o))
		RETURN(PTR_ERR(o));

	if (!dt_object_exists(o))
		GOTO(out, rc = -ENOENT);

	rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA);
	if (rc)
		GOTO(out, rc);

	if (!S_ISREG(lgi->lgi_attr.la_mode)) {
		CERROR("%s: CATALOGS is not a regular file!: mode = %o\n",
		       o->do_lu.lo_dev->ld_obd->obd_name,
		       lgi->lgi_attr.la_mode);
		GOTO(out, rc = -ENOENT);
	}

	th = dt_trans_create(env, d);
	if (IS_ERR(th))
		GOTO(out, rc = PTR_ERR(th));

	lgi->lgi_buf.lb_len = size;
	lgi->lgi_buf.lb_buf = idarray;
	rc = dt_declare_record_write(env, o, &lgi->lgi_buf, lgi->lgi_off, th);
	if (rc)
		GOTO(out, rc);

	rc = dt_trans_start_local(env, d, th);
	if (rc)
		GOTO(out_trans, rc);

	rc = dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
	if (rc)
		CDEBUG(D_INODE, "can't write CATALOGS at index %d: rc = %d\n",
		       idx, rc);
out_trans:
	dt_trans_stop(env, d, th);
out:
	lu_object_put(env, &o->do_lu);
	RETURN(rc);
}
Beispiel #3
0
int lfsck_bookmark_store(const struct lu_env *env, struct lfsck_instance *lfsck)
{
	struct thandle    *handle;
	struct dt_object  *obj    = lfsck->li_bookmark_obj;
	loff_t		   pos    = 0;
	int		   len    = sizeof(struct lfsck_bookmark);
	int		   rc;
	ENTRY;

	lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk,
				 &lfsck->li_bookmark_ram);
	handle = dt_trans_create(env, lfsck->li_bottom);
	if (IS_ERR(handle)) {
		rc = PTR_ERR(handle);
		CERROR("%s: fail to create trans for storing lfsck_bookmark: "
		       "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
		RETURN(rc);
	}

	rc = dt_declare_record_write(env, obj,
				     lfsck_buf_get(env,
				     &lfsck->li_bookmark_disk, len),
				     0, handle);
	if (rc != 0) {
		CERROR("%s: fail to declare trans for storing lfsck_bookmark: "
		       "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
		GOTO(out, rc);
	}

	rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
	if (rc != 0) {
		CERROR("%s: fail to start trans for storing lfsck_bookmark: "
		       "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
		GOTO(out, rc);
	}

	rc = dt_record_write(env, obj,
			     lfsck_buf_get(env, &lfsck->li_bookmark_disk, len),
			     &pos, handle);
	if (rc != 0)
		CERROR("%s: fail to store lfsck_bookmark: expected = %d, "
		       "rc = %d\n", lfsck_lfsck2name(lfsck), len, rc);

	GOTO(out, rc);

out:
	dt_trans_stop(env, lfsck->li_bottom, handle);
	return rc;
}
Beispiel #4
0
static int out_tx_write_exec(const struct lu_env *env, struct thandle *th,
			     struct tx_arg *arg)
{
	struct dt_object *dt_obj = arg->object;
	int rc;

	dt_write_lock(env, dt_obj, MOR_TGT_CHILD);
	rc = dt_record_write(env, dt_obj, &arg->u.write.buf,
			     &arg->u.write.pos, th);
	dt_write_unlock(env, dt_obj);

	if (rc == 0)
		rc = arg->u.write.buf.lb_len;

	object_update_result_insert(arg->reply, NULL, 0, arg->index, rc);

	return rc > 0 ? 0 : rc;
}
Beispiel #5
0
static int write_capa_keys(const struct lu_env *env,
                           struct mdt_device *mdt,
                           struct lustre_capa_key *keys)
{
        struct mdt_thread_info *mti;
        struct lustre_capa_key *tmp;
        struct thandle *th;
        loff_t off = 0;
        int i, rc;

        mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
	th = dt_trans_create(env, mdt->mdt_bottom);
	if (IS_ERR(th))
		RETURN(PTR_ERR(th));

	rc = dt_declare_record_write(env, mdt->mdt_ck_obj,
				     mdt_buf_const(env, NULL,
				     sizeof(*tmp) * 3), 0, th);
	if (rc)
		goto stop;

	rc = dt_trans_start_local(env, mdt->mdt_bottom, th);
        if (rc)
                goto stop;

        tmp = &mti->mti_capa_key;

        for (i = 0; i < 2; i++) {
                lck_cpu_to_le(tmp, &keys[i]);

                rc = dt_record_write(env, mdt->mdt_ck_obj,
                                     mdt_buf_const(env, tmp, sizeof(*tmp)),
                                     &off, th);
                if (rc)
                        break;
        }

stop:
	dt_trans_stop(env, mdt->mdt_bottom, th);

        CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
        return rc;
}
Beispiel #6
0
int lfsck_bookmark_store(const struct lu_env *env, struct lfsck_instance *lfsck)
{
	struct thandle    *handle;
	struct dt_object  *obj    = lfsck->li_bookmark_obj;
	struct dt_device  *dev    = lfsck_obj2dev(obj);
	loff_t		   pos    = 0;
	int		   len    = sizeof(struct lfsck_bookmark);
	int		   rc;
	ENTRY;

	lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk,
				 &lfsck->li_bookmark_ram);
	handle = dt_trans_create(env, dev);
	if (IS_ERR(handle))
		GOTO(log, rc = PTR_ERR(handle));

	rc = dt_declare_record_write(env, obj,
				     lfsck_buf_get(env,
				     &lfsck->li_bookmark_disk, len),
				     0, handle);
	if (rc != 0)
		GOTO(out, rc);

	rc = dt_trans_start_local(env, dev, handle);
	if (rc != 0)
		GOTO(out, rc);

	rc = dt_record_write(env, obj,
			     lfsck_buf_get(env, &lfsck->li_bookmark_disk, len),
			     &pos, handle);

	GOTO(out, rc);

out:
	dt_trans_stop(env, dev, handle);

log:
	if (rc != 0)
		CDEBUG(D_LFSCK, "%s: fail to store lfsck_bookmark: rc = %d\n",
		       lfsck_lfsck2name(lfsck), rc);
	return rc;
}
Beispiel #7
0
/**
 * Write a padding record to the llog
 *
 * This function writes a padding record to the end of llog. That may
 * be needed if llog contains records of variable size, e.g. config logs
 * or changelogs.
 * The padding record just aligns llog to the LLOG_CHUNK_SIZE boundary if
 * the current record doesn't fit in the remaining space.
 *
 * It allocates full length to avoid two separate writes for header and tail.
 * Such 2-steps scheme needs extra protection and complex error handling.
 *
 * \param[in]     env	execution environment
 * \param[in]     o	dt_object to create
 * \param[in,out] off	pointer to the padding start offset
 * \param[in]     len	padding length
 * \param[in]     index	index of the padding record in a llog
 * \param[in]     th	current transaction handle
 *
 * \retval		0 on successful padding write
 * \retval		negative error if write failed
 */
static int llog_osd_pad(const struct lu_env *env, struct dt_object *o,
			loff_t *off, int len, int index, struct thandle *th)
{
	struct llog_thread_info	*lgi = llog_info(env);
	struct llog_rec_hdr	*rec;
	struct llog_rec_tail	*tail;
	int			 rc;

	ENTRY;

	LASSERT(th);
	LASSERT(off);
	LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);

	OBD_ALLOC(rec, len);
	if (rec == NULL)
		RETURN(-ENOMEM);

	rec->lrh_len = len;
	rec->lrh_index = index;
	rec->lrh_type = LLOG_PAD_MAGIC;

	tail = rec_tail(rec);
	tail->lrt_len = len;
	tail->lrt_index = index;

	lgi->lgi_buf.lb_buf = rec;
	lgi->lgi_buf.lb_len = len;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc)
		CERROR("%s: error writing padding record: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);

	OBD_FREE(rec, len);
	RETURN(rc);
}
Beispiel #8
0
/**
 * Implementation of the llog_operations::lop_write
 *
 * This function writes the new record in the llog or modify the existed one.
 *
 * \param[in]  env		execution environment
 * \param[in]  loghandle	llog handle of the current llog
 * \param[in]  rec		llog record header. This is a real header of
 *				the full llog record to write. This is
 *				the beginning of buffer to write, the length
 *				of buffer is stored in \a rec::lrh_len
 * \param[out] reccookie	pointer to the cookie to return back if needed.
 *				It is used for further cancel of this llog
 *				record.
 * \param[in]  idx		index of the llog record. If \a idx == -1 then
 *				this is append case, otherwise \a idx is
 *				the index of record to modify
 * \param[in]  th		current transaction handle
 *
 * \retval			0 on successful write && \a reccookie == NULL
 *				1 on successful write && \a reccookie != NULL
 * \retval			negative error if write failed
 */
static int llog_osd_write_rec(const struct lu_env *env,
			      struct llog_handle *loghandle,
			      struct llog_rec_hdr *rec,
			      struct llog_cookie *reccookie,
			      int idx, struct thandle *th)
{
	struct llog_thread_info	*lgi = llog_info(env);
	struct llog_log_hdr	*llh;
	int			 reclen = rec->lrh_len;
	int			 index, rc;
	struct llog_rec_tail	*lrt;
	struct dt_object	*o;
	size_t			 left;
	bool			 header_is_updated = false;

	ENTRY;

	LASSERT(env);
	llh = loghandle->lgh_hdr;
	LASSERT(llh);
	o = loghandle->lgh_obj;
	LASSERT(o);
	LASSERT(th);

	CDEBUG(D_OTHER, "new record %x to "DFID"\n",
	       rec->lrh_type, PFID(lu_object_fid(&o->do_lu)));

	/* record length should not bigger than LLOG_CHUNK_SIZE */
	if (reclen > LLOG_CHUNK_SIZE)
		RETURN(-E2BIG);

	rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL);
	if (rc)
		RETURN(rc);

	/**
	 * The modification case.
	 * If idx set then the record with that index must be modified.
	 * There are three cases possible:
	 * 1) the common case is the llog header update (idx == 0)
	 * 2) the llog record modification during llog process.
	 *    This is indicated by the \a loghandle::lgh_cur_idx > 0.
	 *    In that case the \a loghandle::lgh_cur_offset
	 * 3) otherwise this is assumed that llog consist of records of
	 *    fixed size, i.e. catalog. The llog header must has llh_size
	 *    field equal to record size. The record offset is calculated
	 *    just by /a idx value
	 *
	 * During modification we don't need extra header update because
	 * the bitmap and record count are not changed. The record header
	 * and tail remains the same too.
	 */
	if (idx != LLOG_NEXT_IDX) {
		/* llog can be empty only when first record is being written */
		LASSERT(ergo(idx > 0, lgi->lgi_attr.la_size > 0));

		if (!ext2_test_bit(idx, llh->llh_bitmap)) {
			CERROR("%s: modify unset record %u\n",
			       o->do_lu.lo_dev->ld_obd->obd_name, idx);
			RETURN(-ENOENT);
		}

		if (idx != rec->lrh_index) {
			CERROR("%s: modify index mismatch %d %u\n",
			       o->do_lu.lo_dev->ld_obd->obd_name, idx,
			       rec->lrh_index);
			RETURN(-EFAULT);
		}

		if (idx == LLOG_HEADER_IDX) {
			/* llog header update */
			LASSERT(reclen == sizeof(struct llog_log_hdr));
			LASSERT(rec == &llh->llh_hdr);

			lgi->lgi_off = 0;
			lgi->lgi_buf.lb_len = reclen;
			lgi->lgi_buf.lb_buf = rec;
			rc = dt_record_write(env, o, &lgi->lgi_buf,
					     &lgi->lgi_off, th);
			RETURN(rc);
		} else if (loghandle->lgh_cur_idx > 0) {
			/**
			 * The lgh_cur_offset can be used only if index is
			 * the same.
			 */
			if (idx != loghandle->lgh_cur_idx) {
				CERROR("%s: modify index mismatch %d %d\n",
				       o->do_lu.lo_dev->ld_obd->obd_name, idx,
				       loghandle->lgh_cur_idx);
				RETURN(-EFAULT);
			}

			lgi->lgi_off = loghandle->lgh_cur_offset;
			CDEBUG(D_OTHER, "modify record "DOSTID": idx:%d, "
			       "len:%u offset %llu\n",
			       POSTID(&loghandle->lgh_id.lgl_oi), idx,
			       rec->lrh_len, (long long)lgi->lgi_off);
		} else if (llh->llh_size > 0) {
			if (llh->llh_size != rec->lrh_len) {
				CERROR("%s: wrong record size, llh_size is %u"
				       " but record size is %u\n",
				       o->do_lu.lo_dev->ld_obd->obd_name,
				       llh->llh_size, rec->lrh_len);
				RETURN(-EINVAL);
			}
			lgi->lgi_off = sizeof(*llh) + (idx - 1) * reclen;
		} else {
			/* This can be result of lgh_cur_idx is not set during
			 * llog processing or llh_size is not set to proper
			 * record size for fixed records llog. Therefore it is
			 * impossible to get record offset. */
			CERROR("%s: can't get record offset, idx:%d, "
			       "len:%u.\n", o->do_lu.lo_dev->ld_obd->obd_name,
			       idx, rec->lrh_len);
			RETURN(-EFAULT);
		}

		/* update only data, header and tail remain the same */
		lgi->lgi_off += sizeof(struct llog_rec_hdr);
		lgi->lgi_buf.lb_len = REC_DATA_LEN(rec);
		lgi->lgi_buf.lb_buf = REC_DATA(rec);
		rc = dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
		if (rc == 0 && reccookie) {
			reccookie->lgc_lgl = loghandle->lgh_id;
			reccookie->lgc_index = idx;
			rc = 1;
		}
		RETURN(rc);
	}

	/**
	 * The append case.
	 * The most common case of using llog. The new index is assigned to
	 * the new record, new bit is set in llog bitmap and llog count is
	 * incremented.
	 *
	 * Make sure that records don't cross a chunk boundary, so we can
	 * process them page-at-a-time if needed.  If it will cross a chunk
	 * boundary, write in a fake (but referenced) entry to pad the chunk.
	 */
	LASSERT(lgi->lgi_attr.la_valid & LA_SIZE);
	lgi->lgi_off = lgi->lgi_attr.la_size;
	left = LLOG_CHUNK_SIZE - (lgi->lgi_off & (LLOG_CHUNK_SIZE - 1));
	/* NOTE: padding is a record, but no bit is set */
	if (left != 0 && left != reclen &&
	    left < (reclen + LLOG_MIN_REC_SIZE)) {
		index = loghandle->lgh_last_idx + 1;
		rc = llog_osd_pad(env, o, &lgi->lgi_off, left, index, th);
		if (rc)
			RETURN(rc);
		loghandle->lgh_last_idx++; /* for pad rec */
	}
	/* if it's the last idx in log file, then return -ENOSPC */
	if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1)
		RETURN(-ENOSPC);

	/* increment the last_idx along with llh_tail index, they should
	 * be equal for a llog lifetime */
	loghandle->lgh_last_idx++;
	index = loghandle->lgh_last_idx;
	llh->llh_tail.lrt_index = index;
	/**
	 * NB: the caller should make sure only 1 process access
	 * the lgh_last_idx, e.g. append should be exclusive.
	 * Otherwise it might hit the assert.
	 */
	LASSERT(index < LLOG_BITMAP_SIZE(llh));
	rec->lrh_index = index;
	lrt = rec_tail(rec);
	lrt->lrt_len = rec->lrh_len;
	lrt->lrt_index = rec->lrh_index;

	/* the lgh_hdr_lock protects llog header data from concurrent
	 * update/cancel, the llh_count and llh_bitmap are protected */
	spin_lock(&loghandle->lgh_hdr_lock);
	if (ext2_set_bit(index, llh->llh_bitmap)) {
		CERROR("%s: index %u already set in log bitmap\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, index);
		spin_unlock(&loghandle->lgh_hdr_lock);
		LBUG(); /* should never happen */
	}
	llh->llh_count++;
	spin_unlock(&loghandle->lgh_hdr_lock);

	lgi->lgi_off = 0;
	lgi->lgi_buf.lb_len = llh->llh_hdr.lrh_len;
	lgi->lgi_buf.lb_buf = &llh->llh_hdr;
	rc = dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
	if (rc)
		GOTO(out, rc);

	header_is_updated = true;
	rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL);
	if (rc)
		GOTO(out, rc);

	LASSERT(lgi->lgi_attr.la_valid & LA_SIZE);
	lgi->lgi_off = lgi->lgi_attr.la_size;
	lgi->lgi_buf.lb_len = reclen;
	lgi->lgi_buf.lb_buf = rec;
	rc = dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
	if (rc < 0)
		GOTO(out, rc);

	CDEBUG(D_OTHER, "added record "DOSTID": idx: %u, %u\n",
	       POSTID(&loghandle->lgh_id.lgl_oi), index, rec->lrh_len);
	if (reccookie != NULL) {
		reccookie->lgc_lgl = loghandle->lgh_id;
		reccookie->lgc_index = index;
		if ((rec->lrh_type == MDS_UNLINK_REC) ||
		    (rec->lrh_type == MDS_SETATTR64_REC))
			reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
		else if (rec->lrh_type == OST_SZ_REC)
			reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
		else
			reccookie->lgc_subsys = -1;
		rc = 1;
	}
	RETURN(rc);
out:
	/* cleanup llog for error case */
	spin_lock(&loghandle->lgh_hdr_lock);
	ext2_clear_bit(index, llh->llh_bitmap);
	llh->llh_count--;
	spin_unlock(&loghandle->lgh_hdr_lock);

	/* restore llog last_idx */
	loghandle->lgh_last_idx--;
	llh->llh_tail.lrt_index = loghandle->lgh_last_idx;

	/* restore the header on disk if it was written */
	if (header_is_updated) {
		lgi->lgi_off = 0;
		lgi->lgi_buf.lb_len = llh->llh_hdr.lrh_len;
		lgi->lgi_buf.lb_buf = &llh->llh_hdr;
		dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
	}

	RETURN(rc);
}
Beispiel #9
0
static int llog_osd_write_blob(const struct lu_env *env, struct dt_object *o,
			       struct llog_rec_hdr *rec, void *buf,
			       loff_t *off, struct thandle *th)
{
	struct llog_thread_info	*lgi = llog_info(env);
	int			 buflen = rec->lrh_len;
	int			 rc;

	ENTRY;

	LASSERT(env);
	LASSERT(o);

	if (buflen == 0)
		CWARN("0-length record\n");

	CDEBUG(D_OTHER, "write blob with type %x, buf %p/%u at off %llu\n",
	       rec->lrh_type, buf, buflen, *off);

	if (!buf) {
		lgi->lgi_buf.lb_len = buflen;
		lgi->lgi_buf.lb_buf = rec;
		rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
		if (rc)
			CERROR("%s: error writing log record: rc = %d\n",
			       o->do_lu.lo_dev->ld_obd->obd_name, rc);
		RETURN(rc);
	}

	/* the buf case */
	/* protect the following 3 writes from concurrent read */
	dt_write_lock(env, o, 0);
	rec->lrh_len = sizeof(*rec) + buflen + sizeof(lgi->lgi_tail);
	lgi->lgi_buf.lb_len = sizeof(*rec);
	lgi->lgi_buf.lb_buf = rec;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc) {
		CERROR("%s: error writing log hdr: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);
		GOTO(out, rc);
	}

	lgi->lgi_buf.lb_len = buflen;
	lgi->lgi_buf.lb_buf = buf;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc) {
		CERROR("%s: error writing log buffer: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name,  rc);
		GOTO(out, rc);
	}

	lgi->lgi_tail.lrt_len = rec->lrh_len;
	lgi->lgi_tail.lrt_index = rec->lrh_index;
	lgi->lgi_buf.lb_len = sizeof(lgi->lgi_tail);
	lgi->lgi_buf.lb_buf = &lgi->lgi_tail;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc)
		CERROR("%s: error writing log tail: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);
out:
	dt_write_unlock(env, o);
	RETURN(rc);
}
Beispiel #10
0
int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
			  obd_id id, struct ofd_seq *oseq, int nr, int sync)
{
	struct ofd_thread_info	*info = ofd_info(env);
	struct ofd_object	*fo = NULL;
	struct dt_object	*next;
	struct thandle		*th;
	struct ofd_object	**batch;
	struct lu_fid		*fid = &info->fti_fid;
	obd_id			 tmp;
	int			 rc;
	int			 i;
	int			 objects = 0;
	int			 nr_saved = nr;

	ENTRY;

	/* Don't create objects beyond the valid range for this SEQ */
	if (unlikely(fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
		     (id + nr) >= IDIF_MAX_OID)) {
		CERROR("%s:"DOSTID" hit the IDIF_MAX_OID (1<<48)!\n",
		       ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
		RETURN(rc = -ENOSPC);
	} else if (unlikely(!fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
			    (id + nr) >= OBIF_MAX_OID)) {
		CERROR("%s:"DOSTID" hit the OBIF_MAX_OID (1<<32)!\n",
		       ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
		RETURN(rc = -ENOSPC);
	}

	OBD_ALLOC(batch, nr_saved * sizeof(struct ofd_object *));
	if (batch == NULL)
		RETURN(-ENOMEM);

	info->fti_attr.la_valid = LA_TYPE | LA_MODE;
	/*
	 * We mark object SUID+SGID to flag it for accepting UID+GID from
	 * client on first write.  Currently the permission bits on the OST are
	 * never used, so this is OK.
	 */
	info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666;
	info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);

	/* Initialize a/c/m time so any client timestamp will always
	 * be newer and update the inode. ctime = 0 is also handled
	 * specially in osd_inode_setattr(). See LU-221, LU-1042 */
	info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME;
	info->fti_attr.la_atime = 0;
	info->fti_attr.la_mtime = 0;
	info->fti_attr.la_ctime = 0;

	LASSERT(id != 0);

	/* prepare objects */
	*fid = *lu_object_fid(&oseq->os_lastid_obj->do_lu);
	for (i = 0; i < nr; i++) {
		rc = fid_set_id(fid, id + i);
		if (rc != 0) {
			if (i == 0)
				GOTO(out, rc);

			nr = i;
			break;
		}

		fo = ofd_object_find(env, ofd, fid);
		if (IS_ERR(fo)) {
			if (i == 0)
				GOTO(out, rc = PTR_ERR(fo));

			nr = i;
			break;
		}

		ofd_write_lock(env, fo);
		batch[i] = fo;
	}
	info->fti_buf.lb_buf = &tmp;
	info->fti_buf.lb_len = sizeof(tmp);
	info->fti_off = 0;

	th = ofd_trans_create(env, ofd);
	if (IS_ERR(th))
		GOTO(out, rc = PTR_ERR(th));

	th->th_sync |= sync;

	rc = dt_declare_record_write(env, oseq->os_lastid_obj, &info->fti_buf,
				     info->fti_off, th);
	if (rc)
		GOTO(trans_stop, rc);

	for (i = 0; i < nr; i++) {
		fo = batch[i];
		LASSERT(fo);

		if (unlikely(ofd_object_exists(fo))) {
			/* object may exist being re-created by write replay */
			CDEBUG(D_INODE, "object "LPX64"/"LPX64" exists: "
			       DFID"\n", ostid_seq(&oseq->os_oi), id,
			       PFID(lu_object_fid(&fo->ofo_obj.do_lu)));
			continue;
		}

		next = ofd_object_child(fo);
		LASSERT(next != NULL);

		rc = dt_declare_create(env, next, &info->fti_attr, NULL,
				       &info->fti_dof, th);
		if (rc) {
			nr = i;
			break;
		}
	}

	rc = dt_trans_start_local(env, ofd->ofd_osd, th);
	if (rc)
		GOTO(trans_stop, rc);

	CDEBUG(D_OTHER, "%s: create new object "DFID" nr %d\n",
	       ofd_name(ofd), PFID(fid), nr);

	LASSERT(nr > 0);

	 /* When the LFSCK scanning the whole device to verify the LAST_ID file
	  * consistency, it will load the last_id into RAM firstly, and compare
	  * the last_id with each OST-object's ID. If the later one is larger,
	  * then it will regard the LAST_ID file crashed. But during the LFSCK
	  * scanning, the OFD may continue to create new OST-objects. Those new
	  * created OST-objects will have larger IDs than the LFSCK known ones.
	  * So from the LFSCK view, it needs to re-load the last_id from disk
	  * file, and if the latest last_id is still smaller than the object's
	  * ID, then the LAST_ID file is real crashed.
	  *
	  * To make above mechanism to work, before OFD pre-create OST-objects,
	  * it needs to update the LAST_ID file firstly, otherwise, the LFSCK
	  * may cannot get latest last_id although new OST-object created. */
	if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_SKIP_LASTID)) {
		tmp = cpu_to_le64(id + nr - 1);
		dt_write_lock(env, oseq->os_lastid_obj, 0);
		rc = dt_record_write(env, oseq->os_lastid_obj,
				     &info->fti_buf, &info->fti_off, th);
		dt_write_unlock(env, oseq->os_lastid_obj);
		if (rc != 0)
			GOTO(trans_stop, rc);
	}

	for (i = 0; i < nr; i++) {
		fo = batch[i];
		LASSERT(fo);

		/* Only the new created objects need to be recorded. */
		if (ofd->ofd_osd->dd_record_fid_accessed) {
			lfsck_pack_rfa(&ofd_info(env)->fti_lr,
				       lu_object_fid(&fo->ofo_obj.do_lu));
			lfsck_in_notify(env, ofd->ofd_osd,
					&ofd_info(env)->fti_lr);
		}

		if (likely(!ofd_object_exists(fo) &&
			   !OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING))) {
			next = ofd_object_child(fo);
			LASSERT(next != NULL);

			rc = dt_create(env, next, &info->fti_attr, NULL,
				       &info->fti_dof, th);
			if (rc)
				break;
			LASSERT(ofd_object_exists(fo));
		}
		ofd_seq_last_oid_set(oseq, id + i);
	}

	objects = i;
	/* NOT all the wanted objects have been created,
	 * set the LAST_ID as the real created. */
	if (unlikely(objects < nr)) {
		int rc1;

		info->fti_off = 0;
		tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
		dt_write_lock(env, oseq->os_lastid_obj, 0);
		rc1 = dt_record_write(env, oseq->os_lastid_obj,
				      &info->fti_buf, &info->fti_off, th);
		dt_write_unlock(env, oseq->os_lastid_obj);
		if (rc1 != 0)
			CERROR("%s: fail to reset the LAST_ID for seq ("LPX64
			       ") from "LPU64" to "LPU64"\n", ofd_name(ofd),
			       ostid_seq(&oseq->os_oi), id + nr - 1,
			       ofd_seq_last_oid(oseq));
	}

trans_stop:
	ofd_trans_stop(env, ofd, th, rc);
out:
	for (i = 0; i < nr_saved; i++) {
		fo = batch[i];
		if (fo) {
			ofd_write_unlock(env, fo);
			ofd_object_put(env, fo);
		}
	}
	OBD_FREE(batch, nr_saved * sizeof(struct ofd_object *));

	CDEBUG((objects == 0 && rc == 0) ? D_ERROR : D_OTHER,
	       "created %d/%d objects: %d\n", objects, nr_saved, rc);

	LASSERT(ergo(objects == 0, rc < 0));
	RETURN(objects > 0 ? objects : rc);
}
Beispiel #11
0
int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
			  obd_id id, struct ofd_seq *oseq, int nr, int sync)
{
	struct ofd_thread_info	*info = ofd_info(env);
	struct ofd_object	*fo = NULL;
	struct dt_object	*next;
	struct thandle		*th;
	struct ofd_object	**batch;
	obd_id			 tmp;
	int			 rc;
	int			 i;
	int			 objects = 0;
	int			 nr_saved = nr;

	ENTRY;

	/* Don't create objects beyond the valid range for this SEQ */
	if (unlikely(fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
		     (id + nr) >= IDIF_MAX_OID)) {
		CERROR("%s:"DOSTID" hit the IDIF_MAX_OID (1<<48)!\n",
		       ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
		RETURN(rc = -ENOSPC);
	} else if (unlikely(!fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
			    (id + nr) >= OBIF_MAX_OID)) {
		CERROR("%s:"DOSTID" hit the OBIF_MAX_OID (1<<32)!\n",
		       ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
		RETURN(rc = -ENOSPC);
	}

	OBD_ALLOC(batch, nr_saved * sizeof(struct ofd_object *));
	if (batch == NULL)
		RETURN(-ENOMEM);

	info->fti_attr.la_valid = LA_TYPE | LA_MODE;
	/*
	 * We mark object SUID+SGID to flag it for accepting UID+GID from
	 * client on first write.  Currently the permission bits on the OST are
	 * never used, so this is OK.
	 */
	info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666;
	info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);

	/* Initialize a/c/m time so any client timestamp will always
	 * be newer and update the inode. ctime = 0 is also handled
	 * specially in osd_inode_setattr(). See LU-221, LU-1042 */
	info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME;
	info->fti_attr.la_atime = 0;
	info->fti_attr.la_mtime = 0;
	info->fti_attr.la_ctime = 0;

	/* prepare objects */
	ostid_set_seq(&info->fti_ostid, ostid_seq(&oseq->os_oi));
	for (i = 0; i < nr; i++) {
		ostid_set_id(&info->fti_ostid, id + i);
		rc = ostid_to_fid(&info->fti_fid, &info->fti_ostid, 0);
		if (rc) {
			if (i == 0)
				GOTO(out, rc);

			nr = i;
			break;
		}

		fo = ofd_object_find(env, ofd, &info->fti_fid);
		if (IS_ERR(fo)) {
			if (i == 0)
				GOTO(out, rc = PTR_ERR(fo));

			nr = i;
			break;
		}

		ofd_write_lock(env, fo);
		batch[i] = fo;
	}
	info->fti_buf.lb_buf = &tmp;
	info->fti_buf.lb_len = sizeof(tmp);
	info->fti_off = 0;

	th = ofd_trans_create(env, ofd);
	if (IS_ERR(th))
		GOTO(out, rc = PTR_ERR(th));

	th->th_sync |= sync;

	rc = dt_declare_record_write(env, oseq->os_lastid_obj, sizeof(tmp),
				     info->fti_off, th);
	if (rc)
		GOTO(trans_stop, rc);

	for (i = 0; i < nr; i++) {
		fo = batch[i];
		LASSERT(fo);

		if (unlikely(ofd_object_exists(fo))) {
			/* object may exist being re-created by write replay */
			CDEBUG(D_INODE, "object "LPX64"/"LPX64" exists: "
			       DFID"\n", ostid_seq(&oseq->os_oi), id,
			       PFID(&info->fti_fid));
			continue;
		}

		next = ofd_object_child(fo);
		LASSERT(next != NULL);

		rc = dt_declare_create(env, next, &info->fti_attr, NULL,
				       &info->fti_dof, th);
		if (rc) {
			nr = i;
			break;
		}
	}

	rc = dt_trans_start_local(env, ofd->ofd_osd, th);
	if (rc)
		GOTO(trans_stop, rc);

	CDEBUG(D_OTHER, "%s: create new object "DFID" nr %d\n",
	       ofd_name(ofd), PFID(&info->fti_fid), nr);

	for (i = 0; i < nr; i++) {
		fo = batch[i];
		LASSERT(fo);

		if (likely(!ofd_object_exists(fo))) {
			next = ofd_object_child(fo);
			LASSERT(next != NULL);

			rc = dt_create(env, next, &info->fti_attr, NULL,
				       &info->fti_dof, th);
			if (rc)
				break;
			LASSERT(ofd_object_exists(fo));
		}
		ofd_seq_last_oid_set(oseq, id + i);
	}

	objects = i;
	if (objects > 0) {
		tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
		rc = dt_record_write(env, oseq->os_lastid_obj,
				     &info->fti_buf, &info->fti_off, th);
	}
trans_stop:
	ofd_trans_stop(env, ofd, th, rc);
out:
	for (i = 0; i < nr_saved; i++) {
		fo = batch[i];
		if (fo) {
			ofd_write_unlock(env, fo);
			ofd_object_put(env, fo);
		}
	}
	OBD_FREE(batch, nr_saved * sizeof(struct ofd_object *));

	CDEBUG((objects == 0 && rc == 0) ? D_ERROR : D_OTHER,
	       "created %d/%d objects: %d\n", objects, nr_saved, rc);

	LASSERT(ergo(objects == 0, rc < 0));
	RETURN(objects > 0 ? objects : rc);
}
Beispiel #12
0
static int osp_object_create(const struct lu_env *env, struct dt_object *dt,
			     struct lu_attr *attr,
			     struct dt_allocation_hint *hint,
			     struct dt_object_format *dof, struct thandle *th)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	struct osp_device	*d = lu2osp_dev(dt->do_lu.lo_dev);
	struct osp_object	*o = dt2osp_obj(dt);
	int			rc = 0;
	struct lu_fid		*fid = &osi->osi_fid;
	ENTRY;

	if (o->opo_reserved) {
		/* regular case, fid is assigned holding trunsaction open */
		 osp_object_assign_fid(env, d, o);
	}

	memcpy(fid, lu_object_fid(&dt->do_lu), sizeof(*fid));

	LASSERTF(fid_is_sane(fid), "fid for osp_obj %p is insane"DFID"!\n",
		 osp_obj, PFID(fid));

	if (!o->opo_reserved) {
		/* special case, id was assigned outside of transaction
		 * see comments in osp_declare_attr_set */
		spin_lock(&d->opd_pre_lock);
		osp_update_last_fid(d, fid);
		spin_unlock(&d->opd_pre_lock);
	}

	CDEBUG(D_INODE, "fid for osp_obj %p is "DFID"!\n", osp_obj, PFID(fid));

	/* If the precreate ends, it means it will be ready to rollover to
	 * the new sequence soon, all the creation should be synchronized,
	 * otherwise during replay, the replay fid will be inconsistent with
	 * last_used/create fid */
	if (osp_precreate_end_seq(env, d) && osp_is_fid_client(d))
		th->th_sync = 1;

	/*
	 * it's OK if the import is inactive by this moment - id was created
	 * by OST earlier, we just need to maintain it consistently on the disk
	 * once import is reconnected, OSP will claim this and other objects
	 * used and OST either keep them, if they exist or recreate
	 */

	/* we might have lost precreated objects */
	if (unlikely(d->opd_gap_count) > 0) {
		spin_lock(&d->opd_pre_lock);
		if (d->opd_gap_count > 0) {
			int count = d->opd_gap_count;

			ostid_set_id(&osi->osi_oi,
				     fid_oid(&d->opd_gap_start_fid));
			d->opd_gap_count = 0;
			spin_unlock(&d->opd_pre_lock);

			CDEBUG(D_HA, "Writting gap "DFID"+%d in llog\n",
			       PFID(&d->opd_gap_start_fid), count);
			/* real gap handling is disabled intil ORI-692 will be
			 * fixed, now we only report gaps */
		} else {
			spin_unlock(&d->opd_pre_lock);
		}
	}

	/* new object, the very first ->attr_set()
	 * initializing attributes needs no logging */
	o->opo_new = 1;

	/* Only need update last_used oid file, seq file will only be update
	 * during seq rollover */
	osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off,
			   &d->opd_last_used_fid.f_oid, d->opd_index);

	rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb,
			     &osi->osi_off, th);

	CDEBUG(D_HA, "%s: Wrote last used FID: "DFID", index %d: %d\n",
	       d->opd_obd->obd_name, PFID(fid), d->opd_index, rc);

	RETURN(rc);
}
Beispiel #13
0
static int llog_osd_write_blob(const struct lu_env *env, struct dt_object *o,
			       struct llog_rec_hdr *rec, void *buf,
			       loff_t *off, struct thandle *th)
{
	struct llog_thread_info	*lgi = llog_info(env);
	int			 buflen = rec->lrh_len;
	int			 rc;

	LASSERT(env);
	LASSERT(o);

	if (buflen == 0)
		CWARN("0-length record\n");

	CDEBUG(D_OTHER, "write blob with type %x, buf %p/%u at off %llu\n",
	       rec->lrh_type, buf, buflen, *off);

	lgi->lgi_attr.la_valid = LA_SIZE;
	lgi->lgi_attr.la_size = *off;

	if (!buf) {
		lgi->lgi_buf.lb_len = buflen;
		lgi->lgi_buf.lb_buf = rec;
		rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
		if (rc)
			CERROR("%s: error writing log record: rc = %d\n",
			       o->do_lu.lo_dev->ld_obd->obd_name, rc);
		GOTO(out, rc);
	}

	/* the buf case */
	/* protect the following 3 writes from concurrent read */
	dt_write_lock(env, o, 0);
	rec->lrh_len = sizeof(*rec) + buflen + sizeof(lgi->lgi_tail);
	lgi->lgi_buf.lb_len = sizeof(*rec);
	lgi->lgi_buf.lb_buf = rec;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc) {
		CERROR("%s: error writing log hdr: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);
		GOTO(out_unlock, rc);
	}

	lgi->lgi_buf.lb_len = buflen;
	lgi->lgi_buf.lb_buf = buf;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc) {
		CERROR("%s: error writing log buffer: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name,  rc);
		GOTO(out_unlock, rc);
	}

	lgi->lgi_tail.lrt_len = rec->lrh_len;
	lgi->lgi_tail.lrt_index = rec->lrh_index;
	lgi->lgi_buf.lb_len = sizeof(lgi->lgi_tail);
	lgi->lgi_buf.lb_buf = &lgi->lgi_tail;
	rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
	if (rc)
		CERROR("%s: error writing log tail: rc = %d\n",
		       o->do_lu.lo_dev->ld_obd->obd_name, rc);

out_unlock:
	dt_write_unlock(env, o);

out:
	/* cleanup the content written above */
	if (rc) {
		dt_punch(env, o, lgi->lgi_attr.la_size, OBD_OBJECT_EOF, th,
			 BYPASS_CAPA);
		dt_attr_set(env, o, &lgi->lgi_attr, th, BYPASS_CAPA);
	}

	return rc;
}