Ejemplo n.º 1
0
Archivo: rw26.c Proyecto: rread/lustre
static inline int ll_get_user_pages(int rw, unsigned long user_addr,
                                    size_t size, struct page ***pages,
                                    int *max_pages)
{
    int result = -ENOMEM;

    /* set an arbitrary limit to prevent arithmetic overflow */
    if (size > MAX_DIRECTIO_SIZE) {
        *pages = NULL;
        return -EFBIG;
    }

    *max_pages = (user_addr + size + PAGE_SIZE - 1) >>
                 PAGE_SHIFT;
    *max_pages -= user_addr >> PAGE_SHIFT;

    OBD_ALLOC_LARGE(*pages, *max_pages * sizeof(**pages));
    if (*pages) {
        down_read(&current->mm->mmap_sem);
        result = get_user_pages(current, current->mm, user_addr,
                                *max_pages, (rw == READ), 0, *pages,
                                NULL);
        up_read(&current->mm->mmap_sem);
        if (unlikely(result <= 0))
            OBD_FREE_LARGE(*pages, *max_pages * sizeof(**pages));
    }

    return result;
}
Ejemplo n.º 2
0
struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size)
{
	struct lov_stripe_md *lsm;
	struct lov_oinfo     *loi;
	int		   i, oinfo_ptrs_size;

	LASSERT(stripe_count <= LOV_MAX_STRIPE_COUNT);

	oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count;
	*size = sizeof(struct lov_stripe_md) + oinfo_ptrs_size;

	OBD_ALLOC_LARGE(lsm, *size);
	if (!lsm)
		return NULL;;

	for (i = 0; i < stripe_count; i++) {
		OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, __GFP_IO);
		if (loi == NULL)
			goto err;
		lsm->lsm_oinfo[i] = loi;
	}
	lsm->lsm_stripe_count = stripe_count;
	return lsm;

err:
	while (--i >= 0)
		OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab, sizeof(*loi));
	OBD_FREE_LARGE(lsm, *size);
	return NULL;
}
Ejemplo n.º 3
0
static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count)
{
	int rc;

	if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
		char *buffer;
		int sz;

		CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
		       le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
		sz = lmm_bytes * 2 + 1;
		OBD_ALLOC_LARGE(buffer, sz);
		if (buffer != NULL) {
			int i;

			for (i = 0; i < lmm_bytes; i++)
				sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
			buffer[sz - 1] = '\0';
			CERROR("%s\n", buffer);
			OBD_FREE_LARGE(buffer, sz);
		}
		return -EINVAL;
	}
	rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
				     lmm_bytes, stripe_count);
	return rc;
}
Ejemplo n.º 4
0
static inline void enc_pools_free(void)
{
    LASSERT(page_pools.epp_max_pools);
    LASSERT(page_pools.epp_pools);

    OBD_FREE_LARGE(page_pools.epp_pools,
                   page_pools.epp_max_pools *
                   sizeof(*page_pools.epp_pools));
}
Ejemplo n.º 5
0
static
void null_free_rs(struct ptlrpc_reply_state *rs)
{
	LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1);
	atomic_dec(&rs->rs_svc_ctx->sc_refcount);

	if (!rs->rs_prealloc)
		OBD_FREE_LARGE(rs, rs->rs_size);
}
Ejemplo n.º 6
0
static
void null_free_repbuf(struct ptlrpc_sec *sec,
		      struct ptlrpc_request *req)
{
	LASSERT(req->rq_repbuf);

	OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
	req->rq_repbuf = NULL;
	req->rq_repbuf_len = 0;
}
Ejemplo n.º 7
0
void lsm_free_plain(struct lov_stripe_md *lsm)
{
	__u16 stripe_count = lsm->lsm_stripe_count;
	int i;

	for (i = 0; i < stripe_count; i++)
		OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab,
			      sizeof(struct lov_oinfo));
	OBD_FREE_LARGE(lsm, sizeof(struct lov_stripe_md) +
		       stripe_count * sizeof(struct lov_oinfo *));
}
Ejemplo n.º 8
0
static
int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
			struct ptlrpc_request *req,
			int segment, int newsize)
{
	struct lustre_msg      *newbuf;
	struct lustre_msg      *oldbuf = req->rq_reqmsg;
	int		     oldsize, newmsg_size, alloc_size;

	LASSERT(req->rq_reqbuf);
	LASSERT(req->rq_reqbuf == req->rq_reqmsg);
	LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
	LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf));

	/* compute new message size */
	oldsize = req->rq_reqbuf->lm_buflens[segment];
	req->rq_reqbuf->lm_buflens[segment] = newsize;
	newmsg_size = lustre_packed_msg_size(oldbuf);
	req->rq_reqbuf->lm_buflens[segment] = oldsize;

	/* request from pool should always have enough buffer */
	LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size);

	if (req->rq_reqbuf_len < newmsg_size) {
		alloc_size = size_roundup_power2(newmsg_size);

		OBD_ALLOC_LARGE(newbuf, alloc_size);
		if (newbuf == NULL)
			return -ENOMEM;

		/* Must lock this, so that otherwise unprotected change of
		 * rq_reqmsg is not racing with parallel processing of
		 * imp_replay_list traversing threads. See LU-3333
		 * This is a bandaid at best, we really need to deal with this
		 * in request enlarging code before unpacking that's already
		 * there */
		if (req->rq_import)
			spin_lock(&req->rq_import->imp_lock);
		memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);

		OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
		req->rq_reqbuf = req->rq_reqmsg = newbuf;
		req->rq_reqbuf_len = alloc_size;

		if (req->rq_import)
			spin_unlock(&req->rq_import->imp_lock);
	}

	_sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
	req->rq_reqlen = newmsg_size;

	return 0;
}
Ejemplo n.º 9
0
void rawobj_free(rawobj_t *obj)
{
        LASSERT(obj);

        if (obj->len) {
                LASSERT(obj->data);
                OBD_FREE_LARGE(obj->data, obj->len);
                obj->len = 0;
                obj->data = NULL;
        } else
                LASSERT(!obj->data);
}
Ejemplo n.º 10
0
static void lov_fini_raid0(const struct lu_env *env, struct lov_object *lov,
			   union lov_layout_state *state)
{
	struct lov_layout_raid0 *r0 = &state->raid0;

	if (r0->lo_sub != NULL) {
		OBD_FREE_LARGE(r0->lo_sub, r0->lo_nr * sizeof(r0->lo_sub[0]));
		r0->lo_sub = NULL;
	}

	dump_lsm(D_INODE, lov->lo_lsm);
	lov_free_memmd(&lov->lo_lsm);
}
Ejemplo n.º 11
0
/*
 * Free llog handle and header data if exists. Used in llog_close() only
 */
static void llog_free_handle(struct llog_handle *loghandle)
{
	LASSERT(loghandle != NULL);

	/* failed llog_init_handle */
	if (loghandle->lgh_hdr == NULL)
		goto out;

	if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
		LASSERT(list_empty(&loghandle->u.phd.phd_entry));
	else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
		LASSERT(list_empty(&loghandle->u.chd.chd_head));
	OBD_FREE_LARGE(loghandle->lgh_hdr, loghandle->lgh_hdr_size);
out:
	OBD_FREE_PTR(loghandle);
}
Ejemplo n.º 12
0
static
void null_free_reqbuf(struct ptlrpc_sec *sec,
		      struct ptlrpc_request *req)
{
	if (!req->rq_pool) {
		LASSERTF(req->rq_reqmsg == req->rq_reqbuf,
			 "req %p: reqmsg %p is not reqbuf %p in null sec\n",
			 req, req->rq_reqmsg, req->rq_reqbuf);
		LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen,
			 "req %p: reqlen %d should smaller than buflen %d\n",
			 req, req->rq_reqlen, req->rq_reqbuf_len);

		OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
		req->rq_reqbuf = NULL;
		req->rq_reqbuf_len = 0;
	}
}
Ejemplo n.º 13
0
int llog_catalog_list(const struct lu_env *env, struct dt_device *d,
		      int count, struct obd_ioctl_data *data,
		      const struct lu_fid *fid)
{
	int			 size, i;
	struct llog_catid	*idarray;
	struct llog_logid	*id;
	char			*out;
	int			 l, remains, rc = 0;

	ENTRY;

	if (count == 0) { /* get total number of logs */
		rc = llog_osd_get_cat_list(env, d, 0, 0, NULL, fid);
		if (rc < 0)
			RETURN(rc);
		count = rc;
	}

	size = sizeof(*idarray) * count;

	OBD_ALLOC_LARGE(idarray, size);
	if (!idarray)
		RETURN(-ENOMEM);

	rc = llog_osd_get_cat_list(env, d, 0, count, idarray, fid);
	if (rc)
		GOTO(out, rc);

	out = data->ioc_bulk;
	remains = data->ioc_inllen1;
	for (i = 0; i < count; i++) {
		id = &idarray[i].lci_logid;
		l = snprintf(out, remains,
			     "catalog log: #"DOSTID"#%08x\n",
			     POSTID(&id->lgl_oi),
			     id->lgl_ogen);
		out += l;
		remains -= l;
		if (remains <= 0)
			break;
	}
out:
	OBD_FREE_LARGE(idarray, size);
	RETURN(rc);
}
Ejemplo n.º 14
0
static int mdt_rmtlsetfacl(struct mdt_thread_info *info,
                           struct md_object *next,
                           const char *xattr_name,
                           ext_acl_xattr_header *header,
                           posix_acl_xattr_header **out)
{
        struct ptlrpc_request  *req = mdt_info_req(info);
        struct mdt_export_data *med = mdt_req2med(req);
        struct md_ucred        *uc = mdt_ucred(info);
        struct lu_buf          *buf = &info->mti_buf;
        int                     rc;
        ENTRY;

        rc = lustre_ext_acl_xattr_id2server(uc, med->med_idmap, header);
        if (rc)
                RETURN(rc);

        rc = mo_xattr_get(info->mti_env, next, &LU_BUF_NULL, xattr_name);
        if (rc == -ENODATA)
                rc = 0;
        else if (rc < 0)
                RETURN(rc);

        buf->lb_len = rc;
        if (buf->lb_len > 0) {
                OBD_ALLOC_LARGE(buf->lb_buf, buf->lb_len);
                if (unlikely(buf->lb_buf == NULL))
                        RETURN(-ENOMEM);

                rc = mo_xattr_get(info->mti_env, next, buf, xattr_name);
                if (rc < 0) {
                        CERROR("getxattr failed: %d\n", rc);
                        GOTO(_out, rc);
                }
        } else
                buf->lb_buf = NULL;

        rc = lustre_acl_xattr_merge2posix((posix_acl_xattr_header *)(buf->lb_buf),
                                          buf->lb_len, header, out);
        EXIT;

_out:
        if (rc <= 0 && buf->lb_buf != NULL)
                OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
        return rc;
}
Ejemplo n.º 15
0
Archivo: rw26.c Proyecto: rread/lustre
/*  ll_free_user_pages - tear down page struct array
 *  @pages: array of page struct pointers underlying target buffer */
static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
{
    int i;

    for (i = 0; i < npages; i++) {
        if (pages[i] == NULL)
            break;
        if (do_dirty)
            set_page_dirty_lock(pages[i]);
        put_page(pages[i]);
    }

#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW)
    kvfree(pages);
#else
    OBD_FREE_LARGE(pages, npages * sizeof(*pages));
#endif
}
Ejemplo n.º 16
0
int llog_catalog_list(struct obd_device *obd, int count,
                      struct obd_ioctl_data *data)
{
        int size, i;
        struct llog_catid *idarray;
        struct llog_logid *id;
        char name[32] = CATLIST;
        char *out;
        int l, remains, rc = 0;

        ENTRY;
        size = sizeof(*idarray) * count;

        OBD_ALLOC_LARGE(idarray, size);
        if (!idarray)
                RETURN(-ENOMEM);

        cfs_mutex_down(&obd->obd_olg.olg_cat_processing);
        rc = llog_get_cat_list(obd, name, 0, count, idarray);
        if (rc)
                GOTO(out, rc);

        out = data->ioc_bulk;
        remains = data->ioc_inllen1;
        for (i = 0; i < count; i++) {
                id = &idarray[i].lci_logid;
                l = snprintf(out, remains,
                             "catalog log: #"LPX64"#"LPX64"#%08x\n",
                             id->lgl_oid, id->lgl_oseq, id->lgl_ogen);
                out += l;
                remains -= l;
                if (remains <= 0) {
                        CWARN("not enough memory for catlog list\n");
                        break;
                }
        }
out:
        /* release semaphore */
        cfs_mutex_up(&obd->obd_olg.olg_cat_processing);

        OBD_FREE_LARGE(idarray, size);
        RETURN(rc);

}
Ejemplo n.º 17
0
static
int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
                        struct ptlrpc_request *req,
                        int segment, int newsize)
{
        struct lustre_msg      *newbuf;
        struct lustre_msg      *oldbuf = req->rq_reqmsg;
        int                     oldsize, newmsg_size, alloc_size;

        LASSERT(req->rq_reqbuf);
        LASSERT(req->rq_reqbuf == req->rq_reqmsg);
        LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
        LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf));

        /* compute new message size */
        oldsize = req->rq_reqbuf->lm_buflens[segment];
        req->rq_reqbuf->lm_buflens[segment] = newsize;
        newmsg_size = lustre_packed_msg_size(oldbuf);
        req->rq_reqbuf->lm_buflens[segment] = oldsize;

        /* request from pool should always have enough buffer */
        LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size);

        if (req->rq_reqbuf_len < newmsg_size) {
                alloc_size = size_roundup_power2(newmsg_size);

                OBD_ALLOC_LARGE(newbuf, alloc_size);
                if (newbuf == NULL)
                        return -ENOMEM;

                memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);

                OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
                req->rq_reqbuf = req->rq_reqmsg = newbuf;
                req->rq_reqbuf_len = alloc_size;
        }

        _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
        req->rq_reqlen = newmsg_size;

        return 0;
}
Ejemplo n.º 18
0
static int mdt_reint_setattr(struct mdt_thread_info *info,
                             struct mdt_lock_handle *lhc)
{
        struct md_attr          *ma = &info->mti_attr;
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct mdt_export_data  *med = &req->rq_export->exp_mdt_data;
        struct mdt_file_data    *mfd;
        struct mdt_object       *mo;
        struct md_object        *next;
        struct mdt_body         *repbody;
        int                      som_au, rc;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "setattr "DFID" %x", PFID(rr->rr_fid1),
                  (unsigned int)ma->ma_attr.la_valid);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
        mo = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1,
                             MDT_OBJ_MUST_EXIST);
        if (IS_ERR(mo))
                GOTO(out, rc = PTR_ERR(mo));

        /* start a log jounal handle if needed */
        if (!(mdt_conn_flags(info) & OBD_CONNECT_SOM)) {
                if ((ma->ma_attr.la_valid & LA_SIZE) ||
                    (rr->rr_flags & MRF_OPEN_TRUNC)) {
                        /* Check write access for the O_TRUNC case */
                        if (mdt_write_read(mo) < 0)
                                GOTO(out_put, rc = -ETXTBSY);
                }
        } else if (info->mti_ioepoch &&
                   (info->mti_ioepoch->flags & MF_EPOCH_OPEN)) {
                /* Truncate case. IOEpoch is opened. */
                rc = mdt_write_get(mo);
                if (rc)
                        GOTO(out_put, rc);

                mfd = mdt_mfd_new();
                if (mfd == NULL) {
                        mdt_write_put(mo);
                        GOTO(out_put, rc = -ENOMEM);
                }

                mdt_ioepoch_open(info, mo, 0);
                repbody->ioepoch = mo->mot_ioepoch;

                mdt_object_get(info->mti_env, mo);
                mdt_mfd_set_mode(mfd, MDS_FMODE_TRUNC);
                mfd->mfd_object = mo;
                mfd->mfd_xid = req->rq_xid;

                cfs_spin_lock(&med->med_open_lock);
                cfs_list_add(&mfd->mfd_list, &med->med_open_head);
                cfs_spin_unlock(&med->med_open_lock);
                repbody->handle.cookie = mfd->mfd_handle.h_cookie;
        }

        som_au = info->mti_ioepoch && info->mti_ioepoch->flags & MF_SOM_CHANGE;
        if (som_au) {
                /* SOM Attribute update case. Find the proper mfd and update
                 * SOM attributes on the proper object. */
                LASSERT(mdt_conn_flags(info) & OBD_CONNECT_SOM);
                LASSERT(info->mti_ioepoch);

                cfs_spin_lock(&med->med_open_lock);
                mfd = mdt_handle2mfd(info, &info->mti_ioepoch->handle);
                if (mfd == NULL) {
                        cfs_spin_unlock(&med->med_open_lock);
                        CDEBUG(D_INODE, "no handle for file close: "
                               "fid = "DFID": cookie = "LPX64"\n",
                               PFID(info->mti_rr.rr_fid1),
                               info->mti_ioepoch->handle.cookie);
                        GOTO(out_put, rc = -ESTALE);
                }
                LASSERT(mfd->mfd_mode == MDS_FMODE_SOM);
                LASSERT(!(info->mti_ioepoch->flags & MF_EPOCH_CLOSE));

                class_handle_unhash(&mfd->mfd_handle);
                cfs_list_del_init(&mfd->mfd_list);
                cfs_spin_unlock(&med->med_open_lock);

                /* Close the found mfd, update attributes. */
                ma->ma_lmm_size = info->mti_mdt->mdt_max_mdsize;
                OBD_ALLOC_LARGE(ma->ma_lmm, info->mti_mdt->mdt_max_mdsize);
                if (ma->ma_lmm == NULL)
                        GOTO(out_put, rc = -ENOMEM);

                mdt_mfd_close(info, mfd);

                OBD_FREE_LARGE(ma->ma_lmm, info->mti_mdt->mdt_max_mdsize);
        } else {
                rc = mdt_attr_set(info, mo, ma, rr->rr_flags);
                if (rc)
                        GOTO(out_put, rc);
        }

        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
        next = mdt_object_child(mo);
        rc = mo_attr_get(info->mti_env, next, ma);
        if (rc != 0)
                GOTO(out_put, rc);

        mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo));

        if (info->mti_mdt->mdt_opts.mo_oss_capa &&
            info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA &&
            S_ISREG(lu_object_attr(&mo->mot_obj.mo_lu)) &&
            (ma->ma_attr.la_valid & LA_SIZE) && !som_au) {
                struct lustre_capa *capa;

                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
                LASSERT(capa);
                capa->lc_opc = CAPA_OPC_OSS_DEFAULT | CAPA_OPC_OSS_TRUNC;
                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
                if (rc)
                        GOTO(out_put, rc);
                repbody->valid |= OBD_MD_FLOSSCAPA;
        }

        EXIT;
out_put:
        mdt_object_put(info->mti_env, mo);
out:
        if (rc == 0)
                mdt_counter_incr(req->rq_export, LPROC_MDT_SETATTR);

        mdt_client_compatibility(info);
        mdt_shrink_reply(info);
        return rc;
}
Ejemplo n.º 19
0
/* Retrieve object striping information.
 *
 * @lump is a pointer to an in-core struct with lmm_ost_count indicating
 * the maximum number of OST indices which will fit in the user buffer.
 * lmm_magic must be LOV_USER_MAGIC.
 *
 * If @size > 0, User specified limited buffer size, usually the buffer is from
 * ll_lov_setstripe(), and the buffer can only hold basic layout template info.
 */
int lov_getstripe(const struct lu_env *env, struct lov_object *obj,
		  struct lov_stripe_md *lsm, struct lov_user_md __user *lump,
		  size_t size)
{
	/* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
	struct lov_mds_md *lmmk, *lmm;
	struct lov_user_md_v1 lum;
	size_t	lmmk_size;
	ssize_t	lmm_size, lum_size = 0;
	static bool printed;
	int	rc = 0;
	ENTRY;

	if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3 &&
	    lsm->lsm_magic != LOV_MAGIC_COMP_V1) {
		CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
		       lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
		GOTO(out, rc = -EIO);
	}

	if (!printed) {
		LCONSOLE_WARN("%s: using old ioctl(LL_IOC_LOV_GETSTRIPE) on "
			      DFID", use llapi_layout_get_by_path()\n",
			      current->comm,
			      PFID(&obj->lo_cl.co_lu.lo_header->loh_fid));
		printed = true;
	}

	lmmk_size = lov_comp_md_size(lsm);

	OBD_ALLOC_LARGE(lmmk, lmmk_size);
	if (lmmk == NULL)
		GOTO(out, rc = -ENOMEM);

	lmm_size = lov_lsm_pack(lsm, lmmk, lmmk_size);
	if (lmm_size < 0)
		GOTO(out_free, rc = lmm_size);

	if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
		if (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) ||
		    lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
			lustre_swab_lov_mds_md(lmmk);
			lustre_swab_lov_user_md_objects(
				(struct lov_user_ost_data *)lmmk->lmm_objects,
				lmmk->lmm_stripe_count);
		} else if (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_COMP_V1)) {
			lustre_swab_lov_comp_md_v1(
					(struct lov_comp_md_v1 *)lmmk);
		}
	}

	/* Legacy appication passes limited buffer, we need to figure out
	 * the user buffer size by the passed in lmm_stripe_count. */
	if (copy_from_user(&lum, lump, sizeof(struct lov_user_md_v1)))
		GOTO(out_free, rc = -EFAULT);

	if (lum.lmm_magic == LOV_USER_MAGIC_V1 ||
	    lum.lmm_magic == LOV_USER_MAGIC_V3)
		lum_size = lov_user_md_size(lum.lmm_stripe_count,
					    lum.lmm_magic);

	if (lum_size != 0) {
		struct lov_mds_md *comp_md = lmmk;

		/* Legacy app (ADIO for instance) treats the layout as V1/V3
		 * blindly, we'd return a reasonable V1/V3 for them. */
		if (lmmk->lmm_magic == LOV_MAGIC_COMP_V1) {
			struct lov_comp_md_v1 *comp_v1;
			struct cl_object *cl_obj;
			struct cl_attr attr;
			int i;

			attr.cat_size = 0;
			cl_obj = cl_object_top(&obj->lo_cl);
			cl_object_attr_get(env, cl_obj, &attr);

			/* return the last instantiated component if file size
			 * is non-zero, otherwise, return the last component.*/
			comp_v1 = (struct lov_comp_md_v1 *)lmmk;
			i = attr.cat_size == 0 ? comp_v1->lcm_entry_count : 0;
			for (; i < comp_v1->lcm_entry_count; i++) {
				if (!(comp_v1->lcm_entries[i].lcme_flags &
						LCME_FL_INIT))
					break;
			}
			if (i > 0)
				i--;
			comp_md = (struct lov_mds_md *)((char *)comp_v1 +
					comp_v1->lcm_entries[i].lcme_offset);
		}

		lmm = comp_md;
		lmm_size = lum_size;
	} else {
		lmm = lmmk;
		lmm_size = lmmk_size;
	}
	/**
	 * User specified limited buffer size, usually the buffer is
	 * from ll_lov_setstripe(), and the buffer can only hold basic
	 * layout template info.
	 */
	if (size == 0 || size > lmm_size)
		size = lmm_size;
	if (copy_to_user(lump, lmm, size))
		GOTO(out_free, rc = -EFAULT);

out_free:
	OBD_FREE_LARGE(lmmk, lmmk_size);
out:
	RETURN(rc);
}
Ejemplo n.º 20
0
static int llog_catinfo_deletions(struct obd_device *obd, char *buf,
                                  int buf_len)
{
        struct mds_obd *mds = &obd->u.mds;
        struct llog_handle *handle;
        struct lvfs_run_ctxt saved;
        int size, i, count;
        struct llog_catid *idarray;
        char name[32] = CATLIST;
        struct cb_data data;
        struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
        int rc;
        ENTRY;

        if (ctxt == NULL || mds == NULL)
                GOTO(release_ctxt, rc = -ENODEV);

        count = mds->mds_lov_desc.ld_tgt_count;
        size = sizeof(*idarray) * count;

        OBD_ALLOC_LARGE(idarray, size);
        if (!idarray)
                GOTO(release_ctxt, rc = -ENOMEM);

        cfs_mutex_lock(&obd->obd_olg.olg_cat_processing);
        rc = llog_get_cat_list(obd, name, 0, count, idarray);
        if (rc)
                GOTO(out_free, rc);

        push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);

        data.ctxt = ctxt;
        data.out = buf;
        data.remains = buf_len;
        for (i = 0; i < count; i++) {
                int l, index, uncanceled = 0;

                rc = llog_create(ctxt, &handle, &idarray[i].lci_logid, NULL);
                if (rc)
                        GOTO(out_pop, rc);
                rc = llog_init_handle(handle, 0, NULL);
                if (rc) {
                        llog_close(handle);
                        GOTO(out_pop, rc = -ENOENT);
                }
                for (index = 1; index < (LLOG_BITMAP_BYTES * 8); index++) {
                        if (ext2_test_bit(index, handle->lgh_hdr->llh_bitmap))
                                uncanceled++;
                }
                l = snprintf(data.out, data.remains,
                             "\n[Catlog ID]: #"LPX64"#"LPX64"#%08x  "
                             "[Log Count]: %d\n",
                             idarray[i].lci_logid.lgl_oid,
                             idarray[i].lci_logid.lgl_oseq,
                             idarray[i].lci_logid.lgl_ogen, uncanceled);

                data.out += l;
                data.remains -= l;
                data.init = 1;

                llog_process(handle, llog_catinfo_cb, &data, NULL);
                llog_close(handle);

                if (data.remains <= 0)
                        break;
        }
        EXIT;
out_pop:
        pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
out_free:
        cfs_mutex_unlock(&obd->obd_olg.olg_cat_processing);
        OBD_FREE_LARGE(idarray, size);
release_ctxt:
        llog_ctxt_put(ctxt);
        return rc;
}
Ejemplo n.º 21
0
/* Pack LOV object metadata for disk storage.  It is packed in LE byte
 * order and is opaque to the networking layer.
 *
 * XXX In the future, this will be enhanced to get the EA size from the
 *     underlying OSC device(s) to get their EA sizes so we can stack
 *     LOVs properly.  For now lov_mds_md_size() just assumes one obd_id
 *     per stripe.
 */
int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
	       struct lov_stripe_md *lsm)
{
	struct obd_device *obd = class_exp2obd(exp);
	struct lov_obd *lov = &obd->u.lov;
	struct lov_mds_md_v1 *lmmv1;
	struct lov_mds_md_v3 *lmmv3;
	__u16 stripe_count;
	struct lov_ost_data_v1 *lmm_objects;
	int lmm_size, lmm_magic;
	int i;
	int cplen = 0;

	if (lsm) {
		lmm_magic = lsm->lsm_magic;
	} else {
		if (lmmp && *lmmp)
			lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
		else
			/* lsm == NULL and lmmp == NULL */
			lmm_magic = LOV_MAGIC;
	}

	if ((lmm_magic != LOV_MAGIC_V1) &&
	    (lmm_magic != LOV_MAGIC_V3)) {
		CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
			lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
		return -EINVAL;

	}

	if (lsm) {
		/* If we are just sizing the EA, limit the stripe count
		 * to the actual number of OSTs in this filesystem. */
		if (!lmmp) {
			stripe_count = lov_get_stripecnt(lov, lmm_magic,
							lsm->lsm_stripe_count);
			lsm->lsm_stripe_count = stripe_count;
		} else if (!lsm_is_released(lsm)) {
			stripe_count = lsm->lsm_stripe_count;
		} else {
			stripe_count = 0;
		}
	} else {
		/* No need to allocate more than maximum supported stripes.
		 * Anyway, this is pretty inaccurate since ld_tgt_count now
		 * represents max index and we should rely on the actual number
		 * of OSTs instead */
		stripe_count = lov_mds_md_max_stripe_count(
			lov->lov_ocd.ocd_max_easize, lmm_magic);

		if (stripe_count > lov->desc.ld_tgt_count)
			stripe_count = lov->desc.ld_tgt_count;
	}

	/* XXX LOV STACKING call into osc for sizes */
	lmm_size = lov_mds_md_size(stripe_count, lmm_magic);

	if (!lmmp)
		return lmm_size;

	if (*lmmp && !lsm) {
		stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count);
		lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
		OBD_FREE_LARGE(*lmmp, lmm_size);
		*lmmp = NULL;
		return 0;
	}

	if (!*lmmp) {
		OBD_ALLOC_LARGE(*lmmp, lmm_size);
		if (!*lmmp)
			return -ENOMEM;
	}

	CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
	       lmm_magic, lmm_size);

	lmmv1 = *lmmp;
	lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
	if (lmm_magic == LOV_MAGIC_V3)
		lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
	else
		lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);

	if (!lsm)
		return lmm_size;

	/* lmmv1 and lmmv3 point to the same struct and have the
	 * same first fields
	 */
	lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi);
	lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
	lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count);
	lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
	lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
	if (lsm->lsm_magic == LOV_MAGIC_V3) {
		cplen = strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
				sizeof(lmmv3->lmm_pool_name));
		if (cplen >= sizeof(lmmv3->lmm_pool_name))
			return -E2BIG;
		lmm_objects = lmmv3->lmm_objects;
	} else {
		lmm_objects = lmmv1->lmm_objects;
	}

	for (i = 0; i < stripe_count; i++) {
		struct lov_oinfo *loi = lsm->lsm_oinfo[i];
		/* XXX LOV STACKING call down to osc_packmd() to do packing */
		LASSERTF(ostid_id(&loi->loi_oi) != 0, "lmm_oi "DOSTID
			 " stripe %u/%u idx %u\n", POSTID(&lmmv1->lmm_oi),
			 i, stripe_count, loi->loi_ost_idx);
		ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
		lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
		lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
	}

	return lmm_size;
}
Ejemplo n.º 22
0
int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
		     int flags, struct obd_uuid *uuid)
{
	struct llog_log_hdr	*llh;
	enum llog_flag		 fmt = flags & LLOG_F_EXT_MASK;
	int			 rc;
	int			chunk_size = handle->lgh_ctxt->loc_chunk_size;
	ENTRY;

	LASSERT(handle->lgh_hdr == NULL);

	LASSERT(chunk_size >= LLOG_MIN_CHUNK_SIZE);
	OBD_ALLOC_LARGE(llh, chunk_size);
	if (llh == NULL)
		RETURN(-ENOMEM);

	handle->lgh_hdr = llh;
	handle->lgh_hdr_size = chunk_size;
	/* first assign flags to use llog_client_ops */
	llh->llh_flags = flags;
	rc = llog_read_header(env, handle, uuid);
	if (rc == 0) {
		if (unlikely((llh->llh_flags & LLOG_F_IS_PLAIN &&
			      flags & LLOG_F_IS_CAT) ||
			     (llh->llh_flags & LLOG_F_IS_CAT &&
			      flags & LLOG_F_IS_PLAIN))) {
			CERROR("%s: llog type is %s but initializing %s\n",
			       handle->lgh_ctxt->loc_obd->obd_name,
			       llh->llh_flags & LLOG_F_IS_CAT ?
			       "catalog" : "plain",
			       flags & LLOG_F_IS_CAT ? "catalog" : "plain");
			GOTO(out, rc = -EINVAL);
		} else if (llh->llh_flags &
			   (LLOG_F_IS_PLAIN | LLOG_F_IS_CAT)) {
			/*
			 * it is possible to open llog without specifying llog
			 * type so it is taken from llh_flags
			 */
			flags = llh->llh_flags;
		} else {
			/* for some reason the llh_flags has no type set */
			CERROR("llog type is not specified!\n");
			GOTO(out, rc = -EINVAL);
		}
		if (unlikely(uuid &&
			     !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) {
			CERROR("%s: llog uuid mismatch: %s/%s\n",
			       handle->lgh_ctxt->loc_obd->obd_name,
			       (char *)uuid->uuid,
			       (char *)llh->llh_tgtuuid.uuid);
			GOTO(out, rc = -EEXIST);
		}
	}
	if (flags & LLOG_F_IS_CAT) {
		LASSERT(list_empty(&handle->u.chd.chd_head));
		INIT_LIST_HEAD(&handle->u.chd.chd_head);
		llh->llh_size = sizeof(struct llog_logid_rec);
		llh->llh_flags |= LLOG_F_IS_FIXSIZE;
	} else if (!(flags & LLOG_F_IS_PLAIN)) {
		CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
		       handle->lgh_ctxt->loc_obd->obd_name,
		       flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
		rc = -EINVAL;
	}
	llh->llh_flags |= fmt;
out:
	if (rc) {
		OBD_FREE_LARGE(llh, chunk_size);
		handle->lgh_hdr = NULL;
	}
	RETURN(rc);
}
Ejemplo n.º 23
0
static int llog_process_thread(void *arg)
{
	struct llog_process_info	*lpi = arg;
	struct llog_handle		*loghandle = lpi->lpi_loghandle;
	struct llog_log_hdr		*llh = loghandle->lgh_hdr;
	struct llog_process_cat_data	*cd  = lpi->lpi_catdata;
	char				*buf;
	size_t				 chunk_size;
	__u64				 cur_offset, tmp_offset;
	int				 rc = 0, index = 1, last_index;
	int				 saved_index = 0;
	int				 last_called_index = 0;

	ENTRY;

	if (llh == NULL)
		RETURN(-EINVAL);

	cur_offset = chunk_size = llh->llh_hdr.lrh_len;
	/* expect chunk_size to be power of two */
	LASSERT(is_power_of_2(chunk_size));

	OBD_ALLOC_LARGE(buf, chunk_size);
	if (buf == NULL) {
		lpi->lpi_rc = -ENOMEM;
		RETURN(0);
	}

	if (cd != NULL) {
		last_called_index = cd->lpcd_first_idx;
		index = cd->lpcd_first_idx + 1;
	}
	if (cd != NULL && cd->lpcd_last_idx)
		last_index = cd->lpcd_last_idx;
	else
		last_index = LLOG_HDR_BITMAP_SIZE(llh) - 1;

	while (rc == 0) {
		struct llog_rec_hdr *rec;
		off_t chunk_offset;
		unsigned int buf_offset = 0;
		bool partial_chunk;

		/* skip records not set in bitmap */
		while (index <= last_index &&
		       !ext2_test_bit(index, LLOG_HDR_BITMAP(llh)))
			++index;

		/* There are no indices prior the last_index */
		if (index > last_index)
			break;

		CDEBUG(D_OTHER, "index: %d last_index %d\n", index,
		       last_index);

repeat:
		/* get the buf with our target record; avoid old garbage */
		memset(buf, 0, chunk_size);
		rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
				     index, &cur_offset, buf, chunk_size);
		if (rc != 0)
			GOTO(out, rc);

		/* NB: after llog_next_block() call the cur_offset is the
		 * offset of the next block after read one.
		 * The absolute offset of the current chunk is calculated
		 * from cur_offset value and stored in chunk_offset variable.
		 */
		tmp_offset = cur_offset;
		if (do_div(tmp_offset, chunk_size) != 0) {
			partial_chunk = true;
			chunk_offset = cur_offset & ~(chunk_size - 1);
		} else {
			partial_chunk = false;
			chunk_offset = cur_offset - chunk_size;
		}

		/* NB: when rec->lrh_len is accessed it is already swabbed
		 * since it is used at the "end" of the loop and the rec
		 * swabbing is done at the beginning of the loop. */
		for (rec = (struct llog_rec_hdr *)(buf + buf_offset);
		     (char *)rec < buf + chunk_size;
		     rec = llog_rec_hdr_next(rec)) {

			CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
			       rec, rec->lrh_type);

			if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
				lustre_swab_llog_rec(rec);

			CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
			       rec->lrh_type, rec->lrh_index);

			/* for partial chunk the end of it is zeroed, check
			 * for index 0 to distinguish it. */
			if (partial_chunk && rec->lrh_index == 0) {
				/* concurrent llog_add() might add new records
				 * while llog_processing, check this is not
				 * the case and re-read the current chunk
				 * otherwise. */
				if (index > loghandle->lgh_last_idx)
					GOTO(out, rc = 0);
				CDEBUG(D_OTHER, "Re-read last llog buffer for "
				       "new records, index %u, last %u\n",
				       index, loghandle->lgh_last_idx);
				/* save offset inside buffer for the re-read */
				buf_offset = (char *)rec - (char *)buf;
				cur_offset = chunk_offset;
				goto repeat;
			}

			if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) {
				CWARN("invalid length %d in llog record for "
				      "index %d/%d\n", rec->lrh_len,
				      rec->lrh_index, index);
				GOTO(out, rc = -EINVAL);
			}

			if (rec->lrh_index < index) {
				CDEBUG(D_OTHER, "skipping lrh_index %d\n",
				       rec->lrh_index);
				continue;
			}

			if (rec->lrh_index != index) {
				CERROR("%s: Invalid record: index %u but "
				       "expected %u\n",
				       loghandle->lgh_ctxt->loc_obd->obd_name,
				       rec->lrh_index, index);
				GOTO(out, rc = -ERANGE);
			}

			CDEBUG(D_OTHER,
			       "lrh_index: %d lrh_len: %d (%d remains)\n",
			       rec->lrh_index, rec->lrh_len,
			       (int)(buf + chunk_size - (char *)rec));

			loghandle->lgh_cur_idx = rec->lrh_index;
			loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
						    chunk_offset;

			/* if set, process the callback on this record */
			if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) {
				rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
						 lpi->lpi_cbdata);
				last_called_index = index;
				if (rc == LLOG_PROC_BREAK) {
					GOTO(out, rc);
				} else if (rc == LLOG_DEL_RECORD) {
					rc = llog_cancel_rec(lpi->lpi_env,
							     loghandle,
							     rec->lrh_index);
				}
				if (rc)
					GOTO(out, rc);
			}
			/* exit if the last index is reached */
			if (index >= last_index)
				GOTO(out, rc = 0);
			++index;
		}
	}

out:
	if (cd != NULL)
		cd->lpcd_last_idx = last_called_index;

	if (unlikely(rc == -EIO && loghandle->lgh_obj != NULL)) {
		if (dt_object_remote(loghandle->lgh_obj)) {
			/* If it is remote object, then -EIO might means
			 * disconnection or eviction, let's return -EAGAIN,
			 * so for update recovery log processing, it will
			 * retry until the umount or abort recovery, see
			 * lod_sub_recovery_thread() */
			CERROR("%s retry remote llog process\n",
			       loghandle->lgh_ctxt->loc_obd->obd_name);
			rc = -EAGAIN;
		} else {
			/* something bad happened to the processing of a local
			 * llog file, probably I/O error or the log got
			 * corrupted to be able to finally release the log we
			 * discard any remaining bits in the header */
			CERROR("Local llog found corrupted\n");
			while (index <= last_index) {
				if (ext2_test_bit(index,
						  LLOG_HDR_BITMAP(llh)) != 0)
					llog_cancel_rec(lpi->lpi_env, loghandle,
							index);
				index++;
			}
			rc = 0;
		}
	}

	OBD_FREE_LARGE(buf, chunk_size);
        lpi->lpi_rc = rc;
        return 0;
}
Ejemplo n.º 24
0
int llog_reverse_process(const struct lu_env *env,
			 struct llog_handle *loghandle, llog_cb_t cb,
			 void *data, void *catdata)
{
        struct llog_log_hdr *llh = loghandle->lgh_hdr;
        struct llog_process_cat_data *cd = catdata;
        void *buf;
        int rc = 0, first_index = 1, index, idx;
	__u32	chunk_size = llh->llh_hdr.lrh_len;
        ENTRY;

	OBD_ALLOC_LARGE(buf, chunk_size);
	if (buf == NULL)
		RETURN(-ENOMEM);

	if (cd != NULL)
		first_index = cd->lpcd_first_idx + 1;
	if (cd != NULL && cd->lpcd_last_idx)
		index = cd->lpcd_last_idx;
	else
		index = LLOG_HDR_BITMAP_SIZE(llh) - 1;

	while (rc == 0) {
		struct llog_rec_hdr *rec;
		struct llog_rec_tail *tail;

		/* skip records not set in bitmap */
		while (index >= first_index &&
		       !ext2_test_bit(index, LLOG_HDR_BITMAP(llh)))
			--index;

		LASSERT(index >= first_index - 1);
		if (index == first_index - 1)
			break;

		/* get the buf with our target record; avoid old garbage */
		memset(buf, 0, chunk_size);
		rc = llog_prev_block(env, loghandle, index, buf, chunk_size);
		if (rc)
			GOTO(out, rc);

		rec = buf;
		idx = rec->lrh_index;
		CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
                while (idx < index) {
			rec = (void *)rec + rec->lrh_len;
			if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
				lustre_swab_llog_rec(rec);
                        idx ++;
                }
		LASSERT(idx == index);
		tail = (void *)rec + rec->lrh_len - sizeof(*tail);

		/* process records in buffer, starting where we found one */
		while ((void *)tail > buf) {
			if (tail->lrt_index == 0)
				GOTO(out, rc = 0); /* no more records */

			/* if set, process the callback on this record */
			if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) {
				rec = (void *)tail - tail->lrt_len +
				      sizeof(*tail);

				rc = cb(env, loghandle, rec, data);
				if (rc == LLOG_PROC_BREAK) {
					GOTO(out, rc);
				} else if (rc == LLOG_DEL_RECORD) {
					rc = llog_cancel_rec(env, loghandle,
							     tail->lrt_index);
				}
                                if (rc)
                                        GOTO(out, rc);
                        }

                        /* previous record, still in buffer? */
                        --index;
                        if (index < first_index)
                                GOTO(out, rc = 0);
			tail = (void *)tail - tail->lrt_len;
                }
        }

out:
	if (buf != NULL)
		OBD_FREE_LARGE(buf, chunk_size);
        RETURN(rc);
}
Ejemplo n.º 25
0
/**
 * Break down the FIEMAP request and send appropriate calls to individual OSTs.
 * This also handles the restarting of FIEMAP calls in case mapping overflows
 * the available number of extents in single call.
 *
 * \param env [in]		lustre environment
 * \param obj [in]		file object
 * \param fmkey [in]		fiemap request header and other info
 * \param fiemap [out]		fiemap buffer holding retrived map extents
 * \param buflen [in/out]	max buffer length of @fiemap, when iterate
 *				each OST, it is used to limit max map needed
 * \retval 0	success
 * \retval < 0	error
 */
static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
			     struct ll_fiemap_info_key *fmkey,
			     struct fiemap *fiemap, size_t *buflen)
{
	struct lov_stripe_md	*lsm;
	struct cl_object	*subobj = NULL;
	struct lov_obd		*lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
	struct fiemap		*fm_local = NULL;
	struct fiemap_extent	*lcl_fm_ext;
	loff_t			fm_start;
	loff_t			fm_end;
	loff_t			fm_length;
	loff_t			fm_end_offset;
	int			count_local;
	int			ost_index = 0;
	int			start_stripe;
	int			current_extent = 0;
	int			rc = 0;
	int			last_stripe;
	int			cur_stripe = 0;
	int			cur_stripe_wrap = 0;
	int			stripe_count;
	unsigned int		buffer_size = FIEMAP_BUFFER_SIZE;
	/* Whether have we collected enough extents */
	bool			enough = false;
	/* EOF for object */
	bool			ost_eof = false;
	/* done with required mapping for this OST? */
	bool			ost_done = false;
	ENTRY;

	lsm = lov_lsm_addref(cl2lov(obj));
	if (lsm == NULL)
		RETURN(-ENODATA);

	/**
	 * If the stripe_count > 1 and the application does not understand
	 * DEVICE_ORDER flag, it cannot interpret the extents correctly.
	 */
	if (lsm->lsm_stripe_count > 1 && !(fiemap->fm_flags &
					   FIEMAP_FLAG_DEVICE_ORDER))
		GOTO(out_lsm, rc = -ENOTSUPP);

	if (lsm_is_released(lsm)) {
		if (fiemap->fm_start < fmkey->lfik_oa.o_size) {
			/**
			 * released file, return a minimal FIEMAP if
			 * request fits in file-size.
			 */
			fiemap->fm_mapped_extents = 1;
			fiemap->fm_extents[0].fe_logical = fiemap->fm_start;
			if (fiemap->fm_start + fiemap->fm_length <
			    fmkey->lfik_oa.o_size)
				fiemap->fm_extents[0].fe_length =
					fiemap->fm_length;
			else
				fiemap->fm_extents[0].fe_length =
					fmkey->lfik_oa.o_size -
					fiemap->fm_start;
			fiemap->fm_extents[0].fe_flags |=
				FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
		}
		GOTO(out_lsm, rc = 0);
	}

	if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size)
		buffer_size = fiemap_count_to_size(fiemap->fm_extent_count);

	OBD_ALLOC_LARGE(fm_local, buffer_size);
	if (fm_local == NULL)
		GOTO(out_lsm, rc = -ENOMEM);
	lcl_fm_ext = &fm_local->fm_extents[0];
	count_local = fiemap_size_to_count(buffer_size);

	fm_start = fiemap->fm_start;
	fm_length = fiemap->fm_length;
	/* Calculate start stripe, last stripe and length of mapping */
	start_stripe = lov_stripe_number(lsm, fm_start);
	fm_end = (fm_length == ~0ULL) ? fmkey->lfik_oa.o_size :
					fm_start + fm_length - 1;
	/* If fm_length != ~0ULL but fm_start_fm_length-1 exceeds file size */
	if (fm_end > fmkey->lfik_oa.o_size)
		fm_end = fmkey->lfik_oa.o_size;

	last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
					      start_stripe, &stripe_count);
	fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end,
						  &start_stripe);
	if (fm_end_offset == -EINVAL)
		GOTO(out_fm_local, rc = -EINVAL);

	/**
	 * Requested extent count exceeds the fiemap buffer size, shrink our
	 * ambition.
	 */
	if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen)
		fiemap->fm_extent_count = fiemap_size_to_count(*buflen);
	if (fiemap->fm_extent_count == 0)
		count_local = 0;

	/* Check each stripe */
	for (cur_stripe = start_stripe; stripe_count > 0;
	     --stripe_count,
	     cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
		loff_t req_fm_len; /* Stores length of required mapping */
		loff_t len_mapped_single_call;
		loff_t lun_start;
		loff_t lun_end;
		loff_t obd_object_end;
		unsigned int ext_count;

		cur_stripe_wrap = cur_stripe;

		/* Find out range of mapping on this stripe */
		if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
					   &lun_start, &obd_object_end)) == 0)
			continue;

		if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe]))
			GOTO(out_fm_local, rc = -EIO);

		/* If this is a continuation FIEMAP call and we are on
		 * starting stripe then lun_start needs to be set to
		 * fm_end_offset */
		if (fm_end_offset != 0 && cur_stripe == start_stripe)
			lun_start = fm_end_offset;

		if (fm_length != ~0ULL) {
			/* Handle fm_start + fm_length overflow */
			if (fm_start + fm_length < fm_start)
				fm_length = ~0ULL - fm_start;
			lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
						     cur_stripe);
		} else {
			lun_end = ~0ULL;
		}

		if (lun_start == lun_end)
			continue;

		req_fm_len = obd_object_end - lun_start;
		fm_local->fm_length = 0;
		len_mapped_single_call = 0;

		/* find lobsub object */
		subobj = lov_find_subobj(env, cl2lov(obj), lsm,
					     cur_stripe);
		if (IS_ERR(subobj))
			GOTO(out_fm_local, rc = PTR_ERR(subobj));
		/* If the output buffer is very large and the objects have many
		 * extents we may need to loop on a single OST repeatedly */
		ost_eof = false;
		ost_done = false;
		do {
			if (fiemap->fm_extent_count > 0) {
				/* Don't get too many extents. */
				if (current_extent + count_local >
				    fiemap->fm_extent_count)
					count_local = fiemap->fm_extent_count -
						      current_extent;
			}

			lun_start += len_mapped_single_call;
			fm_local->fm_length = req_fm_len -
					      len_mapped_single_call;
			req_fm_len = fm_local->fm_length;
			fm_local->fm_extent_count = enough ? 1 : count_local;
			fm_local->fm_mapped_extents = 0;
			fm_local->fm_flags = fiemap->fm_flags;

			ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;

			if (ost_index < 0 ||
			    ost_index >= lov->desc.ld_tgt_count)
				GOTO(obj_put, rc = -EINVAL);
			/* If OST is inactive, return extent with UNKNOWN
			 * flag. */
			if (!lov->lov_tgts[ost_index]->ltd_active) {
				fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
				fm_local->fm_mapped_extents = 1;

				lcl_fm_ext[0].fe_logical = lun_start;
				lcl_fm_ext[0].fe_length = obd_object_end -
							  lun_start;
				lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;

				goto inactive_tgt;
			}

			fm_local->fm_start = lun_start;
			fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
			memcpy(&fmkey->lfik_fiemap, fm_local,
			       sizeof(*fm_local));
			*buflen = fiemap_count_to_size(
						fm_local->fm_extent_count);

			rc = cl_object_fiemap(env, subobj, fmkey, fm_local,
					      buflen);
			if (rc != 0)
				GOTO(obj_put, rc);
inactive_tgt:
			ext_count = fm_local->fm_mapped_extents;
			if (ext_count == 0) {
				ost_done = true;
				/* If last stripe has hold at the end,
				 * we need to return */
				if (cur_stripe_wrap == last_stripe) {
					fiemap->fm_mapped_extents = 0;
					goto finish;
				}
				break;
			} else if (enough) {
				/*
				 * We've collected enough extents and there are
				 * more extents after it.
				 */
				goto finish;
			}

			/* If we just need num of extents, got to next device */
			if (fiemap->fm_extent_count == 0) {
				current_extent += ext_count;
				break;
			}

			/* prepare to copy retrived map extents */
			len_mapped_single_call =
				lcl_fm_ext[ext_count - 1].fe_logical -
				lun_start + lcl_fm_ext[ext_count - 1].fe_length;

			/* Have we finished mapping on this device? */
			if (req_fm_len <= len_mapped_single_call)
				ost_done = true;

			/* Clear the EXTENT_LAST flag which can be present on
			 * the last extent */
			if (lcl_fm_ext[ext_count - 1].fe_flags &
			    FIEMAP_EXTENT_LAST)
				lcl_fm_ext[ext_count - 1].fe_flags &=
							~FIEMAP_EXTENT_LAST;
			if (lov_stripe_size(lsm,
					lcl_fm_ext[ext_count - 1].fe_logical +
					lcl_fm_ext[ext_count - 1].fe_length,
					cur_stripe) >= fmkey->lfik_oa.o_size)
				ost_eof = true;

			fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
						     ost_index, ext_count,
						     current_extent);
			current_extent += ext_count;

			/* Ran out of available extents? */
			if (current_extent >= fiemap->fm_extent_count)
				enough = true;
		} while (!ost_done && !ost_eof);

		cl_object_put(env, subobj);
		subobj = NULL;

		if (cur_stripe_wrap == last_stripe)
			goto finish;
	} /* for each stripe */
finish:
	/* Indicate that we are returning device offsets unless file just has
	 * single stripe */
	if (lsm->lsm_stripe_count > 1)
		fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;

	if (fiemap->fm_extent_count == 0)
		goto skip_last_device_calc;

	/* Check if we have reached the last stripe and whether mapping for that
	 * stripe is done. */
	if ((cur_stripe_wrap == last_stripe) && (ost_done || ost_eof))
		fiemap->fm_extents[current_extent - 1].fe_flags |=
							     FIEMAP_EXTENT_LAST;
skip_last_device_calc:
	fiemap->fm_mapped_extents = current_extent;
obj_put:
	if (subobj != NULL)
		cl_object_put(env, subobj);
out_fm_local:
	OBD_FREE_LARGE(fm_local, buffer_size);

out_lsm:
	lov_lsm_put(lsm);

	return rc;
}