예제 #1
0
static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev,
			  struct lu_fid *fid, uint64_t *oid)
{
	struct l_wait_info lwi = { 0 };
	struct lustre_scrub *scrub = &dev->od_scrub;
	struct ptlrpc_thread *thread = &scrub->os_thread;
	struct osd_otable_it *it = dev->od_otable_it;
	struct lustre_mdt_attrs *lma = NULL;
	nvlist_t *nvbuf = NULL;
	int size = 0;
	int rc = 0;
	ENTRY;

	if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) {
		lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL);
		if (likely(lwi.lwi_timeout > 0)) {
			l_wait_event(thread->t_ctl_waitq,
				!list_empty(&scrub->os_inconsistent_items) ||
				!thread_is_running(thread),
				&lwi);
			if (unlikely(!thread_is_running(thread)))
				RETURN(SCRUB_NEXT_EXIT);
		}
	}

	if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
		spin_lock(&scrub->os_lock);
		thread_set_flags(thread, SVC_STOPPING);
		spin_unlock(&scrub->os_lock);
		RETURN(SCRUB_NEXT_CRASH);
	}

	if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL))
		RETURN(SCRUB_NEXT_FATAL);

again:
	if (nvbuf) {
		nvlist_free(nvbuf);
		nvbuf = NULL;
		lma = NULL;
	}

	if (!list_empty(&scrub->os_inconsistent_items)) {
		spin_lock(&scrub->os_lock);
		if (likely(!list_empty(&scrub->os_inconsistent_items))) {
			struct osd_inconsistent_item *oii;

			oii = list_entry(scrub->os_inconsistent_items.next,
				struct osd_inconsistent_item, oii_list);
			*fid = oii->oii_cache.oic_fid;
			*oid = oii->oii_cache.oic_dnode;
			scrub->os_in_prior = 1;
			spin_unlock(&scrub->os_lock);

			GOTO(out, rc = 0);
		}
		spin_unlock(&scrub->os_lock);
	}
예제 #2
0
int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
               struct ptlrpc_request **request)
{
        CFS_LIST_HEAD(cancels);
        struct obd_device *obd = class_exp2obd(exp);
        struct ptlrpc_request *req = *request;
        int count = 0, rc;
        ENTRY;

        LASSERT(req == NULL);

	if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
	    (fid_is_sane(&op_data->op_fid1)) &&
	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
		count = mdc_resource_get_unused(exp, &op_data->op_fid1,
						&cancels, LCK_EX,
						MDS_INODELOCK_UPDATE);
	if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
	    (fid_is_sane(&op_data->op_fid3)) &&
	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
		count += mdc_resource_get_unused(exp, &op_data->op_fid3,
						 &cancels, LCK_EX,
						 MDS_INODELOCK_FULL);
        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                   &RQF_MDS_REINT_UNLINK);
        if (req == NULL) {
                ldlm_lock_list_put(&cancels, l_bl_ast, count);
                RETURN(-ENOMEM);
        }
        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
        req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
                             op_data->op_namelen + 1);

	rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
	if (rc) {
		ptlrpc_request_free(req);
		RETURN(rc);
	}

        mdc_unlink_pack(req, op_data);

        req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
                             obd->u.cli.cl_max_mds_easize);
        req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
                             obd->u.cli.cl_max_mds_cookiesize);
        ptlrpc_request_set_replen(req);

        *request = req;

        rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
        if (rc == -ERESTARTSYS)
                rc = 0;
        RETURN(rc);
}
예제 #3
0
파일: osc_object.c 프로젝트: 19Dan01/linux
int osc_object_is_contended(struct osc_object *obj)
{
	struct osc_device *dev  = lu2osc_dev(obj->oo_cl.co_lu.lo_dev);
	int osc_contention_time = dev->od_contention_time;
	unsigned long cur_time     = cfs_time_current();
	unsigned long retry_time;

	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION))
		return 1;

	if (!obj->oo_contended)
		return 0;

	/*
	 * I like copy-paste. the code is copied from
	 * ll_file_is_contended.
	 */
	retry_time = cfs_time_add(obj->oo_contention_time,
				  cfs_time_seconds(osc_contention_time));
	if (cfs_time_after(cur_time, retry_time)) {
		osc_object_clear_contended(obj);
		return 0;
	}
	return 1;
}
예제 #4
0
파일: vvp_req.c 프로젝트: acton393/linux
/**
 * Implementation of struct cl_req_operations::cro_attr_set() for VVP
 * layer. VVP is responsible for
 *
 *    - o_[mac]time
 *
 *    - o_mode
 *
 *    - o_parent_seq
 *
 *    - o_[ug]id
 *
 *    - o_parent_oid
 *
 *    - o_parent_ver
 *
 *    - o_ioepoch,
 *
 */
static void vvp_req_attr_set(const struct lu_env *env,
			     const struct cl_req_slice *slice,
			     const struct cl_object *obj,
			     struct cl_req_attr *attr, u64 flags)
{
	struct inode *inode;
	struct obdo  *oa;
	u32	      valid_flags;

	oa = attr->cra_oa;
	inode = vvp_object_inode(obj);
	valid_flags = OBD_MD_FLTYPE;

	if (slice->crs_req->crq_type == CRT_WRITE) {
		if (flags & OBD_MD_FLEPOCH) {
			oa->o_valid |= OBD_MD_FLEPOCH;
			oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
			valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
				       OBD_MD_FLUID | OBD_MD_FLGID;
		}
	}
	obdo_from_inode(oa, inode, valid_flags & flags);
	obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
		oa->o_parent_oid++;
	memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
	       LUSTRE_JOBID_SIZE);
}
예제 #5
0
/**
 * Disconnect a bulk desc from the network. Idempotent. Not
 * thread-safe (i.e. only interlocks with completion callback).
 * Returns 1 on success or 0 if network unregistration failed for whatever
 * reason.
 */
int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	wait_queue_head_t *wq;
	int rc;

	LASSERT(!in_interrupt());     /* might sleep */

	/* Let's setup deadline for reply unlink. */
	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
	    async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
		req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		return 1;				/* never registered */

	LASSERT(desc->bd_req == req);  /* bd_req NULL until registered */

	/* the unlink ensures the callback happens ASAP and is the last
	 * one.  If it fails, it must be because completion just happened,
	 * but we must still wait_event() in this case to give liblustre
	 * a chance to run client_bulk_callback()
	 */
	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		return 1;				/* never registered */

	/* Move to "Unregistering" phase as bulk was not unlinked yet. */
	ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);

	/* Do not wait for unlink to finish. */
	if (async)
		return 0;

	if (req->rq_set)
		wq = &req->rq_set->set_waitq;
	else
		wq = &req->rq_reply_waitq;

	for (;;) {
		/* Network access will complete in finite time but the HUGE
		 * timeout lets us CWARN for visibility of sluggish LNDs
		 */
		int cnt = 0;
		while (cnt < LONG_UNLINK &&
		       (rc = wait_event_idle_timeout(*wq,
						     !ptlrpc_client_bulk_active(req),
						     HZ)) == 0)
			cnt += 1;
		if (rc > 0) {
			ptlrpc_rqphase_move(req, req->rq_next_phase);
			return 1;
		}

		DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
			  desc);
	}
	return 0;
}
예제 #6
0
int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
		  const struct lu_buf *buf, const char *name, int fl,
		  struct thandle *handle)
{
	struct osd_object  *obj = osd_dt_obj(dt);
	struct osd_thandle *oh;
	int rc = 0;
	ENTRY;

	LASSERT(handle != NULL);
	LASSERT(osd_invariant(obj));

	if (!osd_obj2dev(obj)->od_posix_acl &&
	    (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0 ||
	     strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0))
		RETURN(-EOPNOTSUPP);

	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_OVERFLOW) &&
	    strcmp(name, XATTR_NAME_LINK) == 0)
		RETURN(-ENOSPC);

	oh = container_of0(handle, struct osd_thandle, ot_super);

	down_write(&obj->oo_guard);
	CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
		name, (int)buf->lb_len);
	rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
	up_write(&obj->oo_guard);

	RETURN(rc);
}
예제 #7
0
/**
 * Disconnect a bulk desc from the network. Idempotent. Not
 * thread-safe (i.e. only interlocks with completion callback).
 * Returns 1 on success or 0 if network unregistration failed for whatever
 * reason.
 */
int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	struct l_wait_info       lwi;
	int                      rc;
	ENTRY;

	LASSERT(!in_interrupt());     /* might sleep */

	/* Let's setup deadline for reply unlink. */
	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
	    async && req->rq_bulk_deadline == 0)
		req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK;

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		RETURN(1);				/* never registered */

	LASSERT(desc->bd_req == req);  /* bd_req NULL until registered */

	/* the unlink ensures the callback happens ASAP and is the last
	 * one.  If it fails, it must be because completion just happened,
	 * but we must still l_wait_event() in this case to give liblustre
	 * a chance to run client_bulk_callback() */
	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		RETURN(1);				/* never registered */

        /* Move to "Unregistering" phase as bulk was not unlinked yet. */
        ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);

        /* Do not wait for unlink to finish. */
        if (async)
                RETURN(0);

        for (;;) {
#ifdef __KERNEL__
		/* The wq argument is ignored by user-space wait_event macros */
		wait_queue_head_t *wq = (req->rq_set != NULL) ?
					&req->rq_set->set_waitq :
					&req->rq_reply_waitq;
#endif
                /* Network access will complete in finite time but the HUGE
                 * timeout lets us CWARN for visibility of sluggish NALs */
                lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
                                           cfs_time_seconds(1), NULL, NULL);
                rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi);
                if (rc == 0) {
                        ptlrpc_rqphase_move(req, req->rq_next_phase);
                        RETURN(1);
                }

                LASSERT(rc == -ETIMEDOUT);
                DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
                          desc);
        }
        RETURN(0);
}
예제 #8
0
파일: lov_log.c 프로젝트: Lezval/lustre
/* Add log records for each OSC that this object is striped over, and return
 * cookies for each one.  We _would_ have nice abstraction here, except that
 * we need to keep cookies in stripe order, even if some are NULL, so that
 * the right cookies are passed back to the right OSTs at the client side.
 * Unset cookies should be all-zero (which will never occur naturally). */
static int lov_llog_origin_add(const struct lu_env *env,
			       struct llog_ctxt *ctxt,
			       struct llog_rec_hdr *rec,
			       struct lov_stripe_md *lsm,
			       struct llog_cookie *logcookies, int numcookies)
{
        struct obd_device *obd = ctxt->loc_obd;
        struct lov_obd *lov = &obd->u.lov;
        int i, rc = 0, cookies = 0;
        ENTRY;

        LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count,
                 "logcookies %p, numcookies %d lsm->lsm_stripe_count %d \n",
                 logcookies, numcookies, lsm->lsm_stripe_count);

        for (i = 0; i < lsm->lsm_stripe_count; i++) {
                struct lov_oinfo *loi = lsm->lsm_oinfo[i];
                struct obd_device *child =
                        lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd;
                struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx);

                /* fill mds unlink/setattr log record */
                switch (rec->lrh_type) {
                case MDS_UNLINK_REC: {
                        struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec;
                        lur->lur_oid = ostid_id(&loi->loi_oi);
                        lur->lur_oseq = (__u32)ostid_seq(&loi->loi_oi);
                        break;
                }
                case MDS_SETATTR64_REC: {
                        struct llog_setattr64_rec *lsr = (struct llog_setattr64_rec *)rec;
                        lsr->lsr_oi = loi->loi_oi;
                        break;
                }
                default:
                        break;
                }

		/* inject error in llog_obd_add() below */
		if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FAIL_LOV_LOG_ADD)) {
			llog_ctxt_put(cctxt);
			cctxt = NULL;
		}
		rc = llog_obd_add(env, cctxt, rec, NULL, logcookies + cookies,
				  numcookies - cookies);
                llog_ctxt_put(cctxt);
                if (rc < 0) {
                        CERROR("Can't add llog (rc = %d) for stripe %d\n",
                               rc, cookies);
                        memset(logcookies + cookies, 0,
                               sizeof(struct llog_cookie));
                        rc = 1; /* skip this cookie */
                }
		/* Note that rc is always 1 if llog_obd_add was successful */
                cookies += rc;
        }
        RETURN(cookies);
}
예제 #9
0
파일: mdt_xattr.c 프로젝트: LLNL/lustre
/* return EADATA length to the caller. negative value means error */
static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
{
        struct req_capsule     *pill = info->mti_pill ;
        struct ptlrpc_request  *req = mdt_info_req(info);
        char                   *xattr_name;
        __u64                   valid = info->mti_body->valid;
        static const char       user_string[] = "user.";
        int                     size, rc;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
                RETURN(-ENOMEM);

        /* Determine how many bytes we need */
        if (valid & OBD_MD_FLXATTR) {
                xattr_name = req_capsule_client_get(pill, &RMF_NAME);
                if (!xattr_name)
                        RETURN(-EFAULT);

                if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) &&
                    !strncmp(xattr_name, user_string, sizeof(user_string) - 1))
                        RETURN(-EOPNOTSUPP);

                size = mo_xattr_get(info->mti_env,
                                    mdt_object_child(info->mti_object),
                                    &LU_BUF_NULL, xattr_name);
        } else if (valid & OBD_MD_FLXATTRLS) {
                size = mo_xattr_list(info->mti_env,
                                     mdt_object_child(info->mti_object),
                                     &LU_BUF_NULL);
        } else {
                CDEBUG(D_INFO, "Valid bits: "LPX64"\n", info->mti_body->valid);
                RETURN(-EINVAL);
        }

        if (size == -ENODATA) {
                size = 0;
        } else if (size < 0) {
                CERROR("Error geting EA size: %d\n", size);
                RETURN(size);
        }

        if (info->mti_body->eadatasize != 0 &&
            info->mti_body->eadatasize < size)
                RETURN(-ERANGE);

        req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
                             info->mti_body->eadatasize == 0 ? 0 : size);
        rc = req_capsule_server_pack(pill);
        if (rc) {
                LASSERT(rc < 0);
                RETURN(rc);
        }

        RETURN(size);
}
예제 #10
0
/**
 * Allocate new fid on passed client @seq and save it to @fid.
 *
 * \param[in] env	pointer to the thread context
 * \param[in,out] seq	pointer to the client sequence manager
 * \param[out] fid	to hold the new allocated fid
 *
 * \retval		1 for notify the caller that sequence switch
 *			is performed to allow it to setup FLD for it.
 * \retval		0 for new FID allocated in current sequence.
 * \retval		Negative error number on failure.
 */
int seq_client_alloc_fid(const struct lu_env *env,
			 struct lu_client_seq *seq, struct lu_fid *fid)
{
	wait_queue_t link;
	int rc;
	ENTRY;

	LASSERT(seq != NULL);
	LASSERT(fid != NULL);

	init_waitqueue_entry(&link, current);
	mutex_lock(&seq->lcs_mutex);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
		seq->lcs_fid.f_oid = seq->lcs_width;

	while (1) {
		u64 seqnr;

		if (unlikely(!fid_is_zero(&seq->lcs_fid) &&
			     fid_oid(&seq->lcs_fid) < seq->lcs_width)) {
			/* Just bump last allocated fid and return to caller. */
			seq->lcs_fid.f_oid++;
			rc = 0;
			break;
		}

		/* Release seq::lcs_mutex via seq_fid_alloc_prep() to avoid
		 * deadlock during seq_client_alloc_seq(). */
		rc = seq_fid_alloc_prep(seq, &link);
		if (rc)
			continue;

		rc = seq_client_alloc_seq(env, seq, &seqnr);
		/* Re-take seq::lcs_mutex via seq_fid_alloc_fini(). */
		seq_fid_alloc_fini(seq, rc ? 0 : seqnr, false);
		if (rc) {
			CERROR("%s: Can't allocate new sequence: rc = %d\n",
			       seq->lcs_name, rc);
			mutex_unlock(&seq->lcs_mutex);

			RETURN(rc);
		}

		rc = 1;
		break;
	}

	*fid = seq->lcs_fid;
	mutex_unlock(&seq->lcs_mutex);

	CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name,  PFID(fid));

	RETURN(rc);
}
예제 #11
0
/**
 * Pack a link_ea_entry.
 * All elements are stored as chars to avoid alignment issues.
 * Numbers are always big-endian
 * \retval record length
 */
int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
		      const struct lu_fid *pfid)
{
	struct lu_fid   tmpfid;
	int             reclen;

	tmpfid = *pfid;
	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_MUL_REF))
		tmpfid.f_oid--;
	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH))
		tmpfid.f_ver = ~0;
	fid_cpu_to_be(&tmpfid, &tmpfid);
	memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid));
	memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen);
	reclen = sizeof(struct link_ea_entry) + lname->ln_namelen;

	lee->lee_reclen[0] = (reclen >> 8) & 0xff;
	lee->lee_reclen[1] = reclen & 0xff;
	return reclen;
}
예제 #12
0
파일: niobuf.c 프로젝트: AK101111/linux
/**
 * Register request buffer descriptor for request receiving.
 */
int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
{
	struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service;
	static lnet_process_id_t match_id = {LNET_NID_ANY, LNET_PID_ANY};
	int rc;
	lnet_md_t md;
	lnet_handle_me_t me_h;

	CDEBUG(D_NET, "LNetMEAttach: portal %d\n",
	       service->srv_req_portal);

	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD))
		return -ENOMEM;

	/* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL,
	 * which means buffer can only be attached on local CPT, and LND
	 * threads can find it by grabbing a local lock
	 */
	rc = LNetMEAttach(service->srv_req_portal,
			  match_id, 0, ~0, LNET_UNLINK,
			  rqbd->rqbd_svcpt->scp_cpt >= 0 ?
			  LNET_INS_LOCAL : LNET_INS_AFTER, &me_h);
	if (rc != 0) {
		CERROR("LNetMEAttach failed: %d\n", rc);
		return -ENOMEM;
	}

	LASSERT(rqbd->rqbd_refcount == 0);
	rqbd->rqbd_refcount = 1;

	md.start = rqbd->rqbd_buffer;
	md.length = service->srv_buf_size;
	md.max_size = service->srv_max_req_size;
	md.threshold = LNET_MD_THRESH_INF;
	md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE;
	md.user_ptr = &rqbd->rqbd_cbid;
	md.eq_handle = ptlrpc_eq_h;

	rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h);
	if (rc == 0)
		return 0;

	CERROR("LNetMDAttach failed: %d;\n", rc);
	LASSERT(rc == -ENOMEM);
	rc = LNetMEUnlink(me_h);
	LASSERT(rc == 0);
	rqbd->rqbd_refcount = 0;

	return -ENOMEM;
}
예제 #13
0
/**
 *  delete an orphan \a obj from orphan index.
 *  \param obj file or directory.
 *  \param th  transaction for index deletion and object destruction.
 *
 *  \pre obj->mod_count == 0 && ORPHAN_OBJ is set for obj.
 *
 *  \retval 0  success
 *  \retval  -ve index operation error.
 */
int mdd_orphan_delete(const struct lu_env *env, struct mdd_object *obj,
		      struct thandle *th)
{
	struct mdd_device *mdd = mdo2mdd(&obj->mod_obj);
	struct dt_object *dor = mdd->mdd_orphans;
	struct dt_key *key;
	int rc = 0;

	ENTRY;

	LASSERT(mdd_write_locked(env, obj) != 0);
	LASSERT(obj->mod_flags & ORPHAN_OBJ);
	LASSERT(obj->mod_count == 0);

	LASSERT(dor);

	key = mdd_orphan_key_fill(env, mdo2fid(obj));
	dt_write_lock(env, mdd->mdd_orphans, MOR_TGT_ORPHAN);

	if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ORPHAN_DELETE))
		goto ref_del;

	rc = dt_delete(env, mdd->mdd_orphans, key, th);
	if (rc == -ENOENT) {
		key = mdd_orphan_key_fill_20(env, mdo2fid(obj));
		rc = dt_delete(env, mdd->mdd_orphans, key, th);
	}

ref_del:
	if (!rc) {
		/* lov objects will be destroyed by caller */
		mdo_ref_del(env, obj, th);
		if (S_ISDIR(mdd_object_type(obj))) {
			mdo_ref_del(env, obj, th);
			dt_ref_del(env, mdd->mdd_orphans, th);
		}
		obj->mod_flags &= ~ORPHAN_OBJ;
	} else {
		CERROR("%s: could not delete orphan object "DFID": rc = %d\n",
		       mdd2obd_dev(mdd)->obd_name, PFID(mdo2fid(obj)), rc);
	}

	dt_write_unlock(env, mdd->mdd_orphans);
	RETURN(rc);
}
예제 #14
0
/*
 * This function implements new seq allocation algorithm using async
 * updates to seq file on disk. ref bug 18857 for details.
 * there are four variable to keep track of this process
 *
 * lss_space; - available lss_space
 * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use
 * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be
 *                    not yet committed
 *
 * when lss_lowater_set reaches the end it is replaced with hiwater one and
 * a write operation is initiated to allocate new hiwater range.
 * if last seq write opearion is still not commited, current operation is
 * flaged as sync write op.
 */
static int range_alloc_set(const struct lu_env *env,
			   struct lu_seq_range *out,
			   struct lu_server_seq *seq)
{
	struct lu_seq_range *space = &seq->lss_space;
	struct lu_seq_range *loset = &seq->lss_lowater_set;
	struct lu_seq_range *hiset = &seq->lss_hiwater_set;
	int rc = 0;

	if (lu_seq_range_is_zero(loset))
		__seq_set_init(env, seq);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */
		loset->lsr_start = loset->lsr_end;

	if (lu_seq_range_is_exhausted(loset)) {
		/* reached high water mark. */
		struct lu_device *dev = seq->lss_site->ss_lu->ls_top_dev;
		int obd_num_clients = dev->ld_obd->obd_num_exports;
		__u64 set_sz;

		/* calculate new seq width based on number of clients */
		set_sz = max(seq->lss_set_width,
			     obd_num_clients * seq->lss_width);
		set_sz = min(lu_seq_range_space(space), set_sz);

		/* Switch to hiwater range now */
		*loset = *hiset;
		/* allocate new hiwater range */
		range_alloc(hiset, space, set_sz);

		/* update ondisk seq with new *space */
		rc = seq_store_update(env, seq, NULL, seq->lss_need_sync);
	}

	LASSERTF(!lu_seq_range_is_exhausted(loset) ||
		 lu_seq_range_is_sane(loset),
		 DRANGE"\n", PRANGE(loset));

	if (rc == 0)
		range_alloc(out, loset, seq->lss_width);

	RETURN(rc);
}
예제 #15
0
/**
 * FLR: verify the layout version of object.
 *
 * \param[in] env	execution environment
 * \param[in] fo	OFD object
 * \param[in] oa	OBDO structure with layout version
 *
 * \retval		0 on successful verification
 * \retval		-EINPROGRESS layout version is in transfer
 * \retval		-ESTALE the layout version on client is stale
 */
int ofd_verify_layout_version(const struct lu_env *env,
			      struct ofd_object *fo, const struct obdo *oa)
{
	__u32 layout_version;
	int rc;
	ENTRY;

	if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_OST_SKIP_LV_CHECK)))
		GOTO(out, rc = 0);

	rc = ofd_object_ff_load(env, fo);
	if (rc < 0) {
		if (rc == -ENODATA)
			rc = -EINPROGRESS;
		GOTO(out, rc);
	}

	layout_version = fo->ofo_ff.ff_layout_version;
	if (oa->o_layout_version >= layout_version &&
	    oa->o_layout_version <= layout_version + fo->ofo_ff.ff_range)
		GOTO(out, rc = 0);

	/* normal traffic, decide if to return ESTALE or EINPROGRESS */
	layout_version &= ~LU_LAYOUT_RESYNC;

	/* this update is not legitimate */
	if ((oa->o_layout_version & ~LU_LAYOUT_RESYNC) <= layout_version)
		GOTO(out, rc = -ESTALE);

	/* layout version may not be transmitted yet */
	if ((oa->o_layout_version & ~LU_LAYOUT_RESYNC) > layout_version)
		GOTO(out, rc = -EINPROGRESS);

	EXIT;

out:
	CDEBUG(D_INODE, DFID " verify layout version: %u vs. %u/%u, rc: %d\n",
	       PFID(lu_object_fid(&fo->ofo_obj.do_lu)),
	       oa->o_layout_version, fo->ofo_ff.ff_layout_version,
	       fo->ofo_ff.ff_range, rc);
	return rc;

}
예제 #16
0
파일: mdt_reint.c 프로젝트: hpc/lustre
static int mdt_reint_create(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct ptlrpc_request   *req = mdt_info_req(info);
        int                     rc;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE))
                RETURN(err_serious(-ESTALE));

        if (info->mti_dlm_req)
                ldlm_request_cancel(mdt_info_req(info), info->mti_dlm_req, 0);

        switch (info->mti_attr.ma_attr.la_mode & S_IFMT) {
        case S_IFDIR:{
                /* Cross-ref case. */
                /* TODO: we can add LPROC_MDT_CROSS for cross-ref stats */
                if (info->mti_cross_ref) {
                        rc = mdt_md_mkobj(info);
                } else {
                        LASSERT(info->mti_rr.rr_namelen > 0);
                        mdt_counter_incr(req->rq_export, LPROC_MDT_MKDIR);
                        rc = mdt_md_create(info);
                }
                break;
        }
        case S_IFREG:
        case S_IFLNK:
        case S_IFCHR:
        case S_IFBLK:
        case S_IFIFO:
        case S_IFSOCK:{
                /* Special file should stay on the same node as parent. */
                LASSERT(info->mti_rr.rr_namelen > 0);
                mdt_counter_incr(req->rq_export, LPROC_MDT_MKNOD);
                rc = mdt_md_create(info);
                break;
        }
        default:
                rc = err_serious(-EOPNOTSUPP);
        }
        RETURN(rc);
}
예제 #17
0
static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
			     struct cl_req_attr *attr)
{
	struct inode *inode;
	struct obdo  *oa;
	u64 valid_flags = OBD_MD_FLTYPE;

	oa = attr->cra_oa;
	inode = vvp_object_inode(obj);

	if (attr->cra_type == CRT_WRITE)
		valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
			       OBD_MD_FLUID | OBD_MD_FLGID;
	obdo_from_inode(oa, inode, valid_flags & attr->cra_flags);
	obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
		oa->o_parent_oid++;
	memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE);
}
예제 #18
0
static int mdt_reint_create(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct ptlrpc_request   *req = mdt_info_req(info);
        int                     rc;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE))
                RETURN(err_serious(-ESTALE));

        if (info->mti_dlm_req)
                ldlm_request_cancel(mdt_info_req(info), info->mti_dlm_req, 0);

	LASSERT(info->mti_rr.rr_namelen > 0);
        switch (info->mti_attr.ma_attr.la_mode & S_IFMT) {
	case S_IFDIR:
		mdt_counter_incr(req, LPROC_MDT_MKDIR);
		break;
        case S_IFREG:
        case S_IFLNK:
        case S_IFCHR:
        case S_IFBLK:
        case S_IFIFO:
	case S_IFSOCK:
		/* Special file should stay on the same node as parent. */
		mdt_counter_incr(req, LPROC_MDT_MKNOD);
		break;
	default:
		CERROR("%s: Unsupported mode %o\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       info->mti_attr.ma_attr.la_mode);
		RETURN(err_serious(-EOPNOTSUPP));
	}

	rc = mdt_md_create(info);
	RETURN(rc);
}
예제 #19
0
/**
 * Match client and OST server connection feature flags.
 *
 * Compute the compatibility flags for a connection request based on
 * features mutually supported by client and server.
 *
 * The obd_export::exp_connect_data.ocd_connect_flags field in \a exp
 * must not be updated here, otherwise a partially initialized value may
 * be exposed. After the connection request is successfully processed,
 * the top-level tgt_connect() request handler atomically updates the export
 * connect flags from the obd_connect_data::ocd_connect_flags field of the
 * reply. \see tgt_connect().
 *
 * \param[in] env		execution environment
 * \param[in] exp		the obd_export associated with this
 *				client/target pair
 * \param[in] data		stores data for this connect request
 * \param[in] new_connection	is this connection new or not
 *
 * \retval		0 if success
 * \retval		-EPROTO client and server feature requirements are
 *			incompatible
 * \retval		-EBADF  OST index in connect request doesn't match
 *			real OST index
 */
static int ofd_parse_connect_data(const struct lu_env *env,
				  struct obd_export *exp,
				  struct obd_connect_data *data,
				  bool new_connection)
{
	struct ofd_device		 *ofd = ofd_exp(exp);
	struct filter_export_data	 *fed = &exp->exp_filter_data;

	if (!data)
		RETURN(0);

	CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64
	       " ocd_version: %x ocd_grant: %d ocd_index: %u"
	       " ocd_group %u\n",
	       exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,
	       data->ocd_connect_flags, data->ocd_version,
	       data->ocd_grant, data->ocd_index, data->ocd_group);

	if (fed->fed_group != 0 && fed->fed_group != data->ocd_group) {
		CWARN("!!! This export (nid %s) used object group %d "
		      "earlier; now it's trying to use group %d!  This could "
		      "be a bug in the MDS. Please report to "
		      "https://jira.hpdd.intel.com/\n",
		      obd_export_nid2str(exp), fed->fed_group,
		      data->ocd_group);
		RETURN(-EPROTO);
	}
	fed->fed_group = data->ocd_group;

	data->ocd_connect_flags &= OST_CONNECT_SUPPORTED;
	data->ocd_version = LUSTRE_VERSION_CODE;

	/* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
	if (data->ocd_connect_flags & OBD_CONNECT_MDS)
		CDEBUG(D_HA, "%s: Received MDS connection for group %u\n",
		       exp->exp_obd->obd_name, data->ocd_group);
	else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN)
		RETURN(-EPROTO);

	if (ofd_grant_param_supp(exp)) {
		exp->exp_filter_data.fed_pagesize = data->ocd_blocksize;
		/* ocd_{blocksize,inodespace} are log2 values */
		data->ocd_blocksize  = ofd->ofd_blockbits;
		data->ocd_inodespace = ofd->ofd_dt_conf.ddp_inodespace;
		/* ocd_grant_extent is in 1K blocks */
		data->ocd_grant_extent = ofd->ofd_dt_conf.ddp_grant_frag >> 10;
	}

	if (data->ocd_connect_flags & OBD_CONNECT_GRANT)
		data->ocd_grant = ofd_grant_connect(env, exp, data->ocd_grant,
						    new_connection);

	if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
		struct lr_server_data *lsd = &ofd->ofd_lut.lut_lsd;
		int		       index = lsd->lsd_osd_index;

		if (index != data->ocd_index) {
			LCONSOLE_ERROR_MSG(0x136, "Connection from %s to index"
					   " %u doesn't match actual OST index"
					   " %u in last_rcvd file, bad "
					   "configuration?\n",
					   obd_export_nid2str(exp), index,
					   data->ocd_index);
			RETURN(-EBADF);
		}
		if (!(lsd->lsd_feature_compat & OBD_COMPAT_OST)) {
			/* this will only happen on the first connect */
			lsd->lsd_feature_compat |= OBD_COMPAT_OST;
			/* sync is not needed here as lut_client_add will
			 * set exp_need_sync flag */
			tgt_server_data_update(env, &ofd->ofd_lut, 0);
		}
	}
	if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_SIZE)) {
		data->ocd_brw_size = 65536;
	} else if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) {
		data->ocd_brw_size = min(data->ocd_brw_size,
					 (__u32)DT_MAX_BRW_SIZE);
		if (data->ocd_brw_size == 0) {
			CERROR("%s: cli %s/%p ocd_connect_flags: "LPX64
			       " ocd_version: %x ocd_grant: %d ocd_index: %u "
			       "ocd_brw_size is unexpectedly zero, "
			       "network data corruption?"
			       "Refusing connection of this client\n",
			       exp->exp_obd->obd_name,
			       exp->exp_client_uuid.uuid,
			       exp, data->ocd_connect_flags, data->ocd_version,
			       data->ocd_grant, data->ocd_index);
			RETURN(-EPROTO);
		}
	}

	if (data->ocd_connect_flags & OBD_CONNECT_CKSUM) {
		__u32 cksum_types = data->ocd_cksum_types;

		/* The client set in ocd_cksum_types the checksum types it
		 * supports. We have to mask off the algorithms that we don't
		 * support */
		data->ocd_cksum_types &= cksum_types_supported_server();

		if (unlikely(data->ocd_cksum_types == 0)) {
			CERROR("%s: Connect with checksum support but no "
			       "ocd_cksum_types is set\n",
			       exp->exp_obd->obd_name);
			RETURN(-EPROTO);
		}

		CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
		       "%x\n", exp->exp_obd->obd_name, obd_export_nid2str(exp),
		       cksum_types, data->ocd_cksum_types);
	} else {
		/* This client does not support OBD_CONNECT_CKSUM
		 * fall back to CRC32 */
		CDEBUG(D_RPCTRACE, "%s: cli %s does not support "
		       "OBD_CONNECT_CKSUM, CRC32 will be used\n",
		       exp->exp_obd->obd_name, obd_export_nid2str(exp));
	}

	if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
		data->ocd_maxbytes = ofd->ofd_dt_conf.ddp_maxbytes;

	if (OCD_HAS_FLAG(data, PINGLESS)) {
		if (ptlrpc_pinger_suppress_pings()) {
			spin_lock(&exp->exp_obd->obd_dev_lock);
			list_del_init(&exp->exp_obd_chain_timed);
			spin_unlock(&exp->exp_obd->obd_dev_lock);
		} else {
			data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
		}
	}

	RETURN(0);
}
예제 #20
0
파일: mdt_xattr.c 프로젝트: LLNL/lustre
int mdt_reint_setxattr(struct mdt_thread_info *info,
                       struct mdt_lock_handle *unused)
{
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_ucred         *uc  = mdt_ucred(info);
        struct mdt_lock_handle  *lh;
        const struct lu_env     *env  = info->mti_env;
        struct lu_buf           *buf  = &info->mti_buf;
        struct mdt_reint_record *rr   = &info->mti_rr;
        struct md_attr          *ma = &info->mti_attr;
        struct lu_attr          *attr = &info->mti_attr.ma_attr;
        struct mdt_object       *obj;
        struct md_object        *child;
        __u64                    valid = attr->la_valid;
        const char              *xattr_name = rr->rr_name;
        int                      xattr_len = rr->rr_eadatalen;
        __u64                    lockpart;
        int                      rc;
        posix_acl_xattr_header  *new_xattr = NULL;
        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
        __u32                    perm;
        ENTRY;

        CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1));

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
                RETURN(err_serious(-ENOMEM));

        CDEBUG(D_INODE, "%s xattr %s\n",
               valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name);

        rc = mdt_init_ucred_reint(info);
        if (rc != 0)
                RETURN(rc);

        if (valid & OBD_MD_FLRMTRSETFACL) {
                if (unlikely(!remote))
                        GOTO(out, rc = err_serious(-EINVAL));

                perm = mdt_identity_get_perm(uc->mu_identity, remote,
                                             req->rq_peer.nid);
                if (!(perm & CFS_RMTACL_PERM))
                        GOTO(out, rc = err_serious(-EPERM));
        }

        if (strncmp(xattr_name, XATTR_USER_PREFIX,
                    sizeof(XATTR_USER_PREFIX) - 1) == 0) {
                if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR))
                        GOTO(out, rc = -EOPNOTSUPP);
                if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
                        GOTO(out, rc = -EACCES);
                if (strcmp(xattr_name, XATTR_NAME_LMA) == 0)
                        GOTO(out, rc = 0);
                if (strcmp(xattr_name, XATTR_NAME_LINK) == 0)
                        GOTO(out, rc = 0);
        } else if ((valid & OBD_MD_FLXATTR) &&
                   (strncmp(xattr_name, XATTR_NAME_ACL_ACCESS,
                            sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0 ||
                    strncmp(xattr_name, XATTR_NAME_ACL_DEFAULT,
                            sizeof(XATTR_NAME_ACL_DEFAULT) - 1) == 0)) {
                /* currently lustre limit acl access size */
                if (xattr_len > LUSTRE_POSIX_ACL_MAX_SIZE)
                        GOTO(out, -ERANGE);
        }

        lockpart = MDS_INODELOCK_UPDATE;
        /* Revoke all clients' lookup lock, since the access
         * permissions for this inode is changed when ACL_ACCESS is
         * set. This isn't needed for ACL_DEFAULT, since that does
         * not change the access permissions of this inode, nor any
         * other existing inodes. It is setting the ACLs inherited
         * by new directories/files at create time. */
        if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
                lockpart |= MDS_INODELOCK_LOOKUP;

        lh = &info->mti_lh[MDT_LH_PARENT];
        /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
         * to cancel them. */
        mdt_lock_reg_init(lh, LCK_EX);
        obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
        if (IS_ERR(obj))
                GOTO(out, rc =  PTR_ERR(obj));

        info->mti_mos = obj;
        rc = mdt_version_get_check_save(info, obj, 0);
        if (rc)
                GOTO(out_unlock, rc);

        if (unlikely(!(valid & OBD_MD_FLCTIME))) {
                /* This isn't strictly an error, but all current clients
                 * should set OBD_MD_FLCTIME when setting attributes. */
                CWARN("%s: client miss to set OBD_MD_FLCTIME when "
                      "setxattr %s: [object "DFID"] [valid "LPU64"]\n",
                      info->mti_exp->exp_obd->obd_name, xattr_name,
                      PFID(rr->rr_fid1), valid);
                attr->la_ctime = cfs_time_current_sec();
        }
        attr->la_valid = LA_CTIME;
        child = mdt_object_child(obj);
        if (valid & OBD_MD_FLXATTR) {
                char *xattr = (void *)rr->rr_eadata;

                if (xattr_len > 0) {
                        int flags = 0;

                        if (valid & OBD_MD_FLRMTLSETFACL) {
                                if (unlikely(!remote))
                                        GOTO(out_unlock, rc = -EINVAL);

                                xattr_len = mdt_rmtlsetfacl(info, child,
                                                xattr_name,
                                                (ext_acl_xattr_header *)xattr,
                                                &new_xattr);
                                if (xattr_len < 0)
                                        GOTO(out_unlock, rc = xattr_len);

                                xattr = (char *)new_xattr;
                        }

                        if (attr->la_flags & XATTR_REPLACE)
                                flags |= LU_XATTR_REPLACE;

                        if (attr->la_flags & XATTR_CREATE)
                                flags |= LU_XATTR_CREATE;

                        mdt_fail_write(env, info->mti_mdt->mdt_bottom,
                                       OBD_FAIL_MDS_SETXATTR_WRITE);

                        buf->lb_buf = xattr;
                        buf->lb_len = xattr_len;
                        rc = mo_xattr_set(env, child, buf, xattr_name, flags);
                        /* update ctime after xattr changed */
                        if (rc == 0) {
                                ma->ma_attr_flags |= MDS_PERM_BYPASS;
                                mo_attr_set(env, child, ma);
                        }
                }
        } else if (valid & OBD_MD_FLXATTRRM) {
                rc = mo_xattr_del(env, child, xattr_name);
                /* update ctime after xattr changed */
                if (rc == 0) {
                        ma->ma_attr_flags |= MDS_PERM_BYPASS;
                        mo_attr_set(env, child, ma);
                }
        } else {
                CDEBUG(D_INFO, "valid bits: "LPX64"\n", valid);
                rc = -EINVAL;
        }
        if (rc == 0)
                mdt_counter_incr(req->rq_export, LPROC_MDT_SETXATTR);

        EXIT;
out_unlock:
        mdt_object_unlock_put(info, obj, lh, rc);
        if (unlikely(new_xattr != NULL))
                lustre_posix_acl_xattr_free(new_xattr, xattr_len);
out:
        mdt_exit_ucred(info);
        return rc;
}
예제 #21
0
/* return EADATA length to the caller. negative value means error */
static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
{
        struct req_capsule     *pill = info->mti_pill ;
        struct ptlrpc_request  *req = mdt_info_req(info);
        char                   *xattr_name;
        __u64                   valid;
        static const char       user_string[] = "user.";
        int                     size, rc;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
                RETURN(-ENOMEM);

	valid = info->mti_body->valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);

        /* Determine how many bytes we need */
        if (valid == OBD_MD_FLXATTR) {
                xattr_name = req_capsule_client_get(pill, &RMF_NAME);
                if (!xattr_name)
                        RETURN(-EFAULT);

		if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR) &&
		    !strncmp(xattr_name, user_string, sizeof(user_string) - 1))
			RETURN(-EOPNOTSUPP);

                size = mo_xattr_get(info->mti_env,
                                    mdt_object_child(info->mti_object),
                                    &LU_BUF_NULL, xattr_name);
        } else if (valid == OBD_MD_FLXATTRLS) {
                size = mo_xattr_list(info->mti_env,
                                     mdt_object_child(info->mti_object),
                                     &LU_BUF_NULL);
	} else if (valid == OBD_MD_FLXATTRALL) {
		/* N.B. eadatasize = 0 is not valid for FLXATTRALL */
		/* We could calculate accurate sizes, but this would
		 * introduce a lot of overhead, let's do it later... */
		size = info->mti_body->eadatasize;
		req_capsule_set_size(pill, &RMF_EAVALS, RCL_SERVER, size);
		req_capsule_set_size(pill, &RMF_EAVALS_LENS, RCL_SERVER, size);
        } else {
                CDEBUG(D_INFO, "Valid bits: "LPX64"\n", info->mti_body->valid);
                RETURN(-EINVAL);
        }

        if (size == -ENODATA) {
                size = 0;
        } else if (size < 0) {
                CERROR("Error geting EA size: %d\n", size);
                RETURN(size);
        }

        req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
                             info->mti_body->eadatasize == 0 ? 0 : size);
        rc = req_capsule_server_pack(pill);
        if (rc) {
                LASSERT(rc < 0);
                RETURN(rc);
        }

        RETURN(size);
}
예제 #22
0
/**
 * Implementation of ldlm_valblock_ops::lvbo_init for OFD.
 *
 * This function allocates and initializes new LVB data for the given
 * LDLM resource if it is not allocated yet. New LVB is filled with attributes
 * of the object associated with that resource. Function does nothing if LVB
 * for the given LDLM resource is allocated already.
 *
 * Called with res->lr_lvb_sem held.
 *
 * \param[in] res	LDLM resource
 *
 * \retval		0 on successful setup
 * \retval		negative value on error
 */
static int ofd_lvbo_init(struct ldlm_resource *res)
{
	struct ost_lvb		*lvb;
	struct ofd_device	*ofd;
	struct ofd_object	*fo;
	struct ofd_thread_info	*info;
	struct lu_env		 env;
	int			 rc = 0;

	ENTRY;

	LASSERT(res);
	LASSERT(mutex_is_locked(&res->lr_lvb_mutex));

	if (res->lr_lvb_data != NULL)
		RETURN(0);

	ofd = ldlm_res_to_ns(res)->ns_lvbp;
	LASSERT(ofd != NULL);

	if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_OST_LVB))
		RETURN(-ENOMEM);

	rc = lu_env_init(&env, LCT_DT_THREAD);
	if (rc)
		RETURN(rc);

	OBD_ALLOC_PTR(lvb);
	if (lvb == NULL)
		GOTO(out_env, rc = -ENOMEM);

	res->lr_lvb_data = lvb;
	res->lr_lvb_len = sizeof(*lvb);

	info = ofd_info_init(&env, NULL);
	ost_fid_from_resid(&info->fti_fid, &res->lr_name,
			   ofd->ofd_lut.lut_lsd.lsd_osd_index);
	fo = ofd_object_find(&env, ofd, &info->fti_fid);
	if (IS_ERR(fo))
		GOTO(out_lvb, rc = PTR_ERR(fo));

	rc = ofd_attr_get(&env, fo, &info->fti_attr);
	if (rc)
		GOTO(out_obj, rc);

	lvb->lvb_size = info->fti_attr.la_size;
	lvb->lvb_blocks = info->fti_attr.la_blocks;
	lvb->lvb_mtime = info->fti_attr.la_mtime;
	lvb->lvb_atime = info->fti_attr.la_atime;
	lvb->lvb_ctime = info->fti_attr.la_ctime;

	CDEBUG(D_DLMTRACE, "res: "DFID" initial lvb size: "LPU64", "
	       "mtime: "LPX64", blocks: "LPX64"\n",
	       PFID(&info->fti_fid), lvb->lvb_size,
	       lvb->lvb_mtime, lvb->lvb_blocks);

	EXIT;
out_obj:
	ofd_object_put(&env, fo);
out_lvb:
	if (rc != 0)
		OST_LVB_SET_ERR(lvb->lvb_blocks, rc);
out_env:
	lu_env_fini(&env);
	/* Don't free lvb data on lookup error */
	return rc;
}
예제 #23
0
/* Allocate new fid on passed client @seq and save it to @fid. */
int seq_client_alloc_fid(const struct lu_env *env,
			 struct lu_client_seq *seq, struct lu_fid *fid)
{
	wait_queue_t link;
	int rc;
	ENTRY;

	LASSERT(seq != NULL);
	LASSERT(fid != NULL);

	init_waitqueue_entry_current(&link);
	mutex_lock(&seq->lcs_mutex);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
		seq->lcs_fid.f_oid = seq->lcs_width;

        while (1) {
		u64 seqnr;

                if (!fid_is_zero(&seq->lcs_fid) &&
                    fid_oid(&seq->lcs_fid) < seq->lcs_width) {
                        /* Just bump last allocated fid and return to caller. */
                        seq->lcs_fid.f_oid += 1;
                        rc = 0;
                        break;
                }

                rc = seq_fid_alloc_prep(seq, &link);
                if (rc)
                        continue;

                rc = seq_client_alloc_seq(env, seq, &seqnr);
                if (rc) {
                        CERROR("%s: Can't allocate new sequence, "
                               "rc %d\n", seq->lcs_name, rc);
                        seq_fid_alloc_fini(seq);
			mutex_unlock(&seq->lcs_mutex);
                        RETURN(rc);
                }

                CDEBUG(D_INFO, "%s: Switch to sequence "
                       "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr);

                seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
                seq->lcs_fid.f_seq = seqnr;
                seq->lcs_fid.f_ver = 0;

                /*
                 * Inform caller that sequence switch is performed to allow it
                 * to setup FLD for it.
                 */
                rc = 1;

                seq_fid_alloc_fini(seq);
                break;
        }

        *fid = seq->lcs_fid;
	mutex_unlock(&seq->lcs_mutex);

        CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name,  PFID(fid));
        RETURN(rc);
}
예제 #24
0
/**
 * Callback handler for receiving incoming completion ASTs.
 *
 * This only can happen on client side.
 */
static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
				    struct ldlm_namespace *ns,
				    struct ldlm_request *dlm_req,
				    struct ldlm_lock *lock)
{
	int lvb_len;
	LIST_HEAD(ast_list);
	int rc = 0;

	LDLM_DEBUG(lock, "client completion callback handler START");

	if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
		int to = cfs_time_seconds(1);
		while (to > 0) {
			schedule_timeout_and_set_state(
				TASK_INTERRUPTIBLE, to);
			if (lock->l_granted_mode == lock->l_req_mode ||
			    lock->l_flags & LDLM_FL_DESTROYED)
				break;
		}
	}

	lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT);
	if (lvb_len < 0) {
		LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len);
		GOTO(out, rc = lvb_len);
	} else if (lvb_len > 0) {
		if (lock->l_lvb_len > 0) {
			/* for extent lock, lvb contains ost_lvb{}. */
			LASSERT(lock->l_lvb_data != NULL);

			if (unlikely(lock->l_lvb_len < lvb_len)) {
				LDLM_ERROR(lock, "Replied LVB is larger than "
					   "expectation, expected = %d, "
					   "replied = %d",
					   lock->l_lvb_len, lvb_len);
				GOTO(out, rc = -EINVAL);
			}
		} else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has
						     * variable length */
			void *lvb_data;

			OBD_ALLOC(lvb_data, lvb_len);
			if (lvb_data == NULL) {
				LDLM_ERROR(lock, "No memory: %d.\n", lvb_len);
				GOTO(out, rc = -ENOMEM);
			}

			lock_res_and_lock(lock);
			LASSERT(lock->l_lvb_data == NULL);
			lock->l_lvb_data = lvb_data;
			lock->l_lvb_len = lvb_len;
			unlock_res_and_lock(lock);
		}
	}

	lock_res_and_lock(lock);
	if ((lock->l_flags & LDLM_FL_DESTROYED) ||
	    lock->l_granted_mode == lock->l_req_mode) {
		/* bug 11300: the lock has already been granted */
		unlock_res_and_lock(lock);
		LDLM_DEBUG(lock, "Double grant race happened");
		GOTO(out, rc = 0);
	}

	/* If we receive the completion AST before the actual enqueue returned,
	 * then we might need to switch lock modes, resources, or extents. */
	if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
		lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
		LDLM_DEBUG(lock, "completion AST, new lock mode");
	}

	if (lock->l_resource->lr_type != LDLM_PLAIN) {
		ldlm_convert_policy_to_local(req->rq_export,
					  dlm_req->lock_desc.l_resource.lr_type,
					  &dlm_req->lock_desc.l_policy_data,
					  &lock->l_policy_data);
		LDLM_DEBUG(lock, "completion AST, new policy data");
	}

	ldlm_resource_unlink_lock(lock);
	if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
		   &lock->l_resource->lr_name,
		   sizeof(lock->l_resource->lr_name)) != 0) {
		unlock_res_and_lock(lock);
		rc = ldlm_lock_change_resource(ns, lock,
				&dlm_req->lock_desc.l_resource.lr_name);
		if (rc < 0) {
			LDLM_ERROR(lock, "Failed to allocate resource");
			GOTO(out, rc);
		}
		LDLM_DEBUG(lock, "completion AST, new resource");
		CERROR("change resource!\n");
		lock_res_and_lock(lock);
	}

	if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
		/* BL_AST locks are not needed in LRU.
		 * Let ldlm_cancel_lru() be fast. */
		ldlm_lock_remove_from_lru(lock);
		lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
		LDLM_DEBUG(lock, "completion AST includes blocking AST");
	}

	if (lock->l_lvb_len > 0) {
		rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT,
				   lock->l_lvb_data, lvb_len);
		if (rc < 0) {
			unlock_res_and_lock(lock);
			GOTO(out, rc);
		}
	}

	ldlm_grant_lock(lock, &ast_list);
	unlock_res_and_lock(lock);

	LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");

	/* Let Enqueue to call osc_lock_upcall() and initialize
	 * l_ast_data */
	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2);

	ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST);

	LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)",
			  lock);
	GOTO(out, rc);

out:
	if (rc < 0) {
		lock_res_and_lock(lock);
		lock->l_flags |= LDLM_FL_FAILED;
		unlock_res_and_lock(lock);
		wake_up(&lock->l_waitq);
	}
	LDLM_LOCK_RELEASE(lock);
}
예제 #25
0
파일: niobuf.c 프로젝트: a2hojsjsjs/linux
/**
 * Register bulk at the sender for later transfer.
 * Returns 0 on success or error code.
 */
static int ptlrpc_register_bulk(struct ptlrpc_request *req)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	lnet_process_id_t peer;
	int rc = 0;
	int rc2;
	int posted_md;
	int total_md;
	__u64 xid;
	lnet_handle_me_t me_h;
	lnet_md_t md;

	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
		return 0;

	/* NB no locking required until desc is on the network */
	LASSERT(desc->bd_nob > 0);
	LASSERT(desc->bd_md_count == 0);
	LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
	LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
	LASSERT(desc->bd_req != NULL);
	LASSERT(desc->bd_type == BULK_PUT_SINK ||
		desc->bd_type == BULK_GET_SOURCE);

	/* cleanup the state of the bulk for it will be reused */
	if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
		desc->bd_nob_transferred = 0;
	else
		LASSERT(desc->bd_nob_transferred == 0);

	desc->bd_failure = 0;

	peer = desc->bd_import->imp_connection->c_peer;

	LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
	LASSERT(desc->bd_cbid.cbid_arg == desc);

	/* An XID is only used for a single request from the client.
	 * For retried bulk transfers, a new XID will be allocated in
	 * in ptlrpc_check_set() if it needs to be resent, so it is not
	 * using the same RDMA match bits after an error.
	 *
	 * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
	 * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */
	xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
	LASSERTF(!(desc->bd_registered &&
		   req->rq_send_state != LUSTRE_IMP_REPLAY) ||
		 xid != desc->bd_last_xid,
		 "registered: %d  rq_xid: %llu bd_last_xid: %llu\n",
		 desc->bd_registered, xid, desc->bd_last_xid);

	total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
	desc->bd_registered = 1;
	desc->bd_last_xid = xid;
	desc->bd_md_count = total_md;
	md.user_ptr = &desc->bd_cbid;
	md.eq_handle = ptlrpc_eq_h;
	md.threshold = 1;		       /* PUT or GET */

	for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
		md.options = PTLRPC_MD_OPTIONS |
			     ((desc->bd_type == BULK_GET_SOURCE) ?
			      LNET_MD_OP_GET : LNET_MD_OP_PUT);
		ptlrpc_fill_bulk_md(&md, desc, posted_md);

		rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
				  LNET_UNLINK, LNET_INS_AFTER, &me_h);
		if (rc != 0) {
			CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
			       desc->bd_import->imp_obd->obd_name, xid,
			       posted_md, rc);
			break;
		}

		/* About to let the network at it... */
		rc = LNetMDAttach(me_h, md, LNET_UNLINK,
				  &desc->bd_mds[posted_md]);
		if (rc != 0) {
			CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
			       desc->bd_import->imp_obd->obd_name, xid,
			       posted_md, rc);
			rc2 = LNetMEUnlink(me_h);
			LASSERT(rc2 == 0);
			break;
		}
	}

	if (rc != 0) {
		LASSERT(rc == -ENOMEM);
		spin_lock(&desc->bd_lock);
		desc->bd_md_count -= total_md - posted_md;
		spin_unlock(&desc->bd_lock);
		LASSERT(desc->bd_md_count >= 0);
		mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
		req->rq_status = -ENOMEM;
		return -ENOMEM;
	}

	/* Set rq_xid to matchbits of the final bulk so that server can
	 * infer the number of bulks that were prepared */
	req->rq_xid = --xid;
	LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
		 "bd_last_xid = x%llu, rq_xid = x%llu\n",
		 desc->bd_last_xid, req->rq_xid);

	spin_lock(&desc->bd_lock);
	/* Holler if peer manages to touch buffers before he knows the xid */
	if (desc->bd_md_count != total_md)
		CWARN("%s: Peer %s touched %d buffers while I registered\n",
		      desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
		      total_md - desc->bd_md_count);
	spin_unlock(&desc->bd_lock);

	CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n",
	       desc->bd_md_count,
	       desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
	       desc->bd_iov_count, desc->bd_nob,
	       desc->bd_last_xid, req->rq_xid, desc->bd_portal);

	return 0;
}
예제 #26
0
파일: llog_obd.c 프로젝트: 7799/linux
int llog_setup(const struct lu_env *env, struct obd_device *obd,
	       struct obd_llog_group *olg, int index,
	       struct obd_device *disk_obd, struct llog_operations *op)
{
	struct llog_ctxt *ctxt;
	int rc = 0;

	if (index < 0 || index >= LLOG_MAX_CTXTS)
		return -EINVAL;

	LASSERT(olg != NULL);

	ctxt = llog_new_ctxt(obd);
	if (!ctxt)
		return -ENOMEM;

	ctxt->loc_obd = obd;
	ctxt->loc_olg = olg;
	ctxt->loc_idx = index;
	ctxt->loc_logops = op;
	mutex_init(&ctxt->loc_mutex);
	ctxt->loc_exp = class_export_get(disk_obd->obd_self_export);
	ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED;

	rc = llog_group_set_ctxt(olg, ctxt, index);
	if (rc) {
		llog_ctxt_destroy(ctxt);
		if (rc == -EEXIST) {
			ctxt = llog_group_get_ctxt(olg, index);
			if (ctxt) {
				/*
				 * mds_lov_update_desc() might call here multiple
				 * times. So if the llog is already set up then
				 * don't to do it again.
				 */
				CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n",
				       obd->obd_name, index);
				LASSERT(ctxt->loc_olg == olg);
				LASSERT(ctxt->loc_obd == obd);
				LASSERT(ctxt->loc_exp == disk_obd->obd_self_export);
				LASSERT(ctxt->loc_logops == op);
				llog_ctxt_put(ctxt);
			}
			rc = 0;
		}
		return rc;
	}

	if (op->lop_setup) {
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LLOG_SETUP))
			rc = -EOPNOTSUPP;
		else
			rc = op->lop_setup(env, obd, olg, index, disk_obd);
	}

	if (rc) {
		CERROR("%s: ctxt %d lop_setup=%p failed: rc = %d\n",
		       obd->obd_name, index, op->lop_setup, rc);
		llog_group_clear_ctxt(olg, index);
		llog_ctxt_destroy(ctxt);
	} else {
		CDEBUG(D_CONFIG, "obd %s ctxt %d is initialized\n",
		       obd->obd_name, index);
		ctxt->loc_flags &= ~LLOG_CTXT_FLAG_UNINITIALIZED;
	}

	return rc;
}
예제 #27
0
/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
static int ldlm_callback_handler(struct ptlrpc_request *req)
{
	struct ldlm_namespace *ns;
	struct ldlm_request *dlm_req;
	struct ldlm_lock *lock;
	int rc;

	/* Requests arrive in sender's byte order.  The ptlrpc service
	 * handler has already checked and, if necessary, byte-swapped the
	 * incoming request message body, but I am responsible for the
	 * message buffers. */

	/* do nothing for sec context finalize */
	if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI)
		return 0;

	req_capsule_init(&req->rq_pill, req, RCL_SERVER);

	if (req->rq_export == NULL) {
		rc = ldlm_callback_reply(req, -ENOTCONN);
		ldlm_callback_errmsg(req, "Operate on unconnected server",
				     rc, NULL);
		return 0;
	}

	LASSERT(req->rq_export != NULL);
	LASSERT(req->rq_export->exp_obd != NULL);

	switch (lustre_msg_get_opc(req->rq_reqmsg)) {
	case LDLM_BL_CALLBACK:
		if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
			return 0;
		break;
	case LDLM_CP_CALLBACK:
		if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET))
			return 0;
		break;
	case LDLM_GL_CALLBACK:
		if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET))
			return 0;
		break;
	case LDLM_SET_INFO:
		rc = ldlm_handle_setinfo(req);
		ldlm_callback_reply(req, rc);
		return 0;
	case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */
		CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n");
		req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
			return 0;
		rc = llog_origin_handle_cancel(req);
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
			return 0;
		ldlm_callback_reply(req, rc);
		return 0;
	case LLOG_ORIGIN_HANDLE_CREATE:
		req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
			return 0;
		rc = llog_origin_handle_open(req);
		ldlm_callback_reply(req, rc);
		return 0;
	case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
		req_capsule_set(&req->rq_pill,
				&RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
			return 0;
		rc = llog_origin_handle_next_block(req);
		ldlm_callback_reply(req, rc);
		return 0;
	case LLOG_ORIGIN_HANDLE_READ_HEADER:
		req_capsule_set(&req->rq_pill,
				&RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
			return 0;
		rc = llog_origin_handle_read_header(req);
		ldlm_callback_reply(req, rc);
		return 0;
	case LLOG_ORIGIN_HANDLE_CLOSE:
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
			return 0;
		rc = llog_origin_handle_close(req);
		ldlm_callback_reply(req, rc);
		return 0;
	case OBD_QC_CALLBACK:
		req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK);
		if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET))
			return 0;
		rc = ldlm_handle_qc_callback(req);
		ldlm_callback_reply(req, rc);
		return 0;
	default:
		CERROR("unknown opcode %u\n",
		       lustre_msg_get_opc(req->rq_reqmsg));
		ldlm_callback_reply(req, -EPROTO);
		return 0;
	}

	ns = req->rq_export->exp_obd->obd_namespace;
	LASSERT(ns != NULL);

	req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK);

	dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
	if (dlm_req == NULL) {
		rc = ldlm_callback_reply(req, -EPROTO);
		ldlm_callback_errmsg(req, "Operate without parameter", rc,
				     NULL);
		return 0;
	}

	/* Force a known safe race, send a cancel to the server for a lock
	 * which the server has already started a blocking callback on. */
	if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
	    lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
		rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0);
		if (rc < 0)
			CERROR("ldlm_cli_cancel: %d\n", rc);
	}

	lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0);
	if (!lock) {
		CDEBUG(D_DLMTRACE, "callback on lock "LPX64" - lock "
		       "disappeared\n", dlm_req->lock_handle[0].cookie);
		rc = ldlm_callback_reply(req, -EINVAL);
		ldlm_callback_errmsg(req, "Operate with invalid parameter", rc,
				     &dlm_req->lock_handle[0]);
		return 0;
	}

	if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
	    lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
		OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);

	/* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
	lock_res_and_lock(lock);
	lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
					      LDLM_AST_FLAGS);
	if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
		/* If somebody cancels lock and cache is already dropped,
		 * or lock is failed before cp_ast received on client,
		 * we can tell the server we have no lock. Otherwise, we
		 * should send cancel after dropping the cache. */
		if (((lock->l_flags & LDLM_FL_CANCELING) &&
		    (lock->l_flags & LDLM_FL_BL_DONE)) ||
		    (lock->l_flags & LDLM_FL_FAILED)) {
			LDLM_DEBUG(lock, "callback on lock "
				   LPX64" - lock disappeared\n",
				   dlm_req->lock_handle[0].cookie);
			unlock_res_and_lock(lock);
			LDLM_LOCK_RELEASE(lock);
			rc = ldlm_callback_reply(req, -EINVAL);
			ldlm_callback_errmsg(req, "Operate on stale lock", rc,
					     &dlm_req->lock_handle[0]);
			return 0;
		}
		/* BL_AST locks are not needed in LRU.
		 * Let ldlm_cancel_lru() be fast. */
		ldlm_lock_remove_from_lru(lock);
		lock->l_flags |= LDLM_FL_BL_AST;
	}
	unlock_res_and_lock(lock);

	/* We want the ost thread to get this reply so that it can respond
	 * to ost requests (write cache writeback) that might be triggered
	 * in the callback.
	 *
	 * But we'd also like to be able to indicate in the reply that we're
	 * cancelling right now, because it's unused, or have an intent result
	 * in the reply, so we might have to push the responsibility for sending
	 * the reply down into the AST handlers, alas. */

	switch (lustre_msg_get_opc(req->rq_reqmsg)) {
	case LDLM_BL_CALLBACK:
		CDEBUG(D_INODE, "blocking ast\n");
		req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
		if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
			rc = ldlm_callback_reply(req, 0);
			if (req->rq_no_reply || rc)
				ldlm_callback_errmsg(req, "Normal process", rc,
						     &dlm_req->lock_handle[0]);
		}
		if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock))
			ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock);
		break;
	case LDLM_CP_CALLBACK:
		CDEBUG(D_INODE, "completion ast\n");
		req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK);
		ldlm_callback_reply(req, 0);
		ldlm_handle_cp_callback(req, ns, dlm_req, lock);
		break;
	case LDLM_GL_CALLBACK:
		CDEBUG(D_INODE, "glimpse ast\n");
		req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK);
		ldlm_handle_gl_callback(req, ns, dlm_req, lock);
		break;
	default:
		LBUG();			 /* checked above */
	}

	return 0;
}
예제 #28
0
/**
 * Prepare bulk IO requests for processing.
 *
 * This function does initial checks of IO and calls corresponding
 * functions for read/write processing.
 *
 * \param[in] env	execution environment
 * \param[in] cmd	IO type (read/write)
 * \param[in] exp	OBD export of client
 * \param[in] oa	OBDO structure from request
 * \param[in] objcount	always 1
 * \param[in] obj	object data
 * \param[in] rnb	remote buffers
 * \param[in] nr_local	number of local buffers
 * \param[in] lnb	local buffers
 *
 * \retval		0 on successful prepare
 * \retval		negative value on error
 */
int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
	       struct obdo *oa, int objcount, struct obd_ioobj *obj,
	       struct niobuf_remote *rnb, int *nr_local,
	       struct niobuf_local *lnb)
{
	struct tgt_session_info	*tsi = tgt_ses_info(env);
	struct ofd_device	*ofd = ofd_exp(exp);
	struct ofd_thread_info	*info;
	char			*jobid;
	const struct lu_fid	*fid = &oa->o_oi.oi_fid;
	int			 rc = 0;

	if (*nr_local > PTLRPC_MAX_BRW_PAGES) {
		CERROR("%s: bulk has too many pages %d, which exceeds the"
		       "maximum pages per RPC of %d\n",
		       exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES);
		RETURN(-EPROTO);
	}

	if (tgt_ses_req(tsi) == NULL) { /* echo client case */
		info = ofd_info_init(env, exp);
		jobid = NULL;
	} else {
		info = tsi2ofd_info(tsi);
		jobid = tsi->tsi_jobid;
	}

	LASSERT(oa != NULL);

	if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) {
		struct ofd_seq		*oseq;

		oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi));
		if (IS_ERR(oseq)) {
			CERROR("%s: Can not find seq for "DOSTID
			       ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi),
			       PTR_ERR(oseq));
			RETURN(-EINVAL);
		}

		if (oseq->os_destroys_in_progress == 0) {
			/* don't fail lookups for orphan recovery, it causes
			 * later LBUGs when objects still exist during
			 * precreate */
			ofd_seq_put(env, oseq);
			RETURN(-ENOENT);
		}
		ofd_seq_put(env, oseq);
	}

	LASSERT(objcount == 1);
	LASSERT(obj->ioo_bufcnt > 0);

	if (cmd == OBD_BRW_WRITE) {
		la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR);
		rc = ofd_preprw_write(env, exp, ofd, fid, &info->fti_attr, oa,
				      objcount, obj, rnb, nr_local, lnb, jobid);
	} else if (cmd == OBD_BRW_READ) {
		ofd_grant_prepare_read(env, exp, oa);
		rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa,
				     obj->ioo_bufcnt, rnb, nr_local, lnb,
				     jobid);
		obdo_from_la(oa, &info->fti_attr, LA_ATIME);
	} else {
		CERROR("%s: wrong cmd %d received!\n",
		       exp->exp_obd->obd_name, cmd);
		rc = -EPROTO;
	}
	RETURN(rc);
}
예제 #29
0
파일: niobuf.c 프로젝트: a2hojsjsjs/linux
/**
 * Send request \a request.
 * if \a noreply is set, don't expect any reply back and don't set up
 * reply buffers.
 * Returns 0 on success or error code.
 */
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
	int rc;
	int rc2;
	int mpflag = 0;
	struct ptlrpc_connection *connection;
	lnet_handle_me_t reply_me_h;
	lnet_md_t reply_md;
	struct obd_device *obd = request->rq_import->imp_obd;

	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
		return 0;

	LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
	LASSERT(request->rq_wait_ctx == 0);

	/* If this is a re-transmit, we're required to have disengaged
	 * cleanly from the previous attempt */
	LASSERT(!request->rq_receiving_reply);
	LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
		(request->rq_import->imp_state == LUSTRE_IMP_FULL)));

	if (unlikely(obd != NULL && obd->obd_fail)) {
		CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
			obd->obd_name);
		/* this prevents us from waiting in ptlrpc_queue_wait */
		spin_lock(&request->rq_lock);
		request->rq_err = 1;
		spin_unlock(&request->rq_lock);
		request->rq_status = -ENODEV;
		return -ENODEV;
	}

	connection = request->rq_import->imp_connection;

	lustre_msg_set_handle(request->rq_reqmsg,
			      &request->rq_import->imp_remote_handle);
	lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
	lustre_msg_set_conn_cnt(request->rq_reqmsg,
				request->rq_import->imp_conn_cnt);
	lustre_msghdr_set_flags(request->rq_reqmsg,
				request->rq_import->imp_msghdr_flags);

	if (request->rq_resend)
		lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);

	if (request->rq_memalloc)
		mpflag = cfs_memory_pressure_get_and_set();

	rc = sptlrpc_cli_wrap_request(request);
	if (rc)
		goto out;

	/* bulk register should be done after wrap_request() */
	if (request->rq_bulk != NULL) {
		rc = ptlrpc_register_bulk(request);
		if (rc != 0)
			goto out;
	}

	if (!noreply) {
		LASSERT(request->rq_replen != 0);
		if (request->rq_repbuf == NULL) {
			LASSERT(request->rq_repdata == NULL);
			LASSERT(request->rq_repmsg == NULL);
			rc = sptlrpc_cli_alloc_repbuf(request,
						      request->rq_replen);
			if (rc) {
				/* this prevents us from looping in
				 * ptlrpc_queue_wait */
				spin_lock(&request->rq_lock);
				request->rq_err = 1;
				spin_unlock(&request->rq_lock);
				request->rq_status = rc;
				goto cleanup_bulk;
			}
		} else {
			request->rq_repdata = NULL;
			request->rq_repmsg = NULL;
		}

		rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
				  connection->c_peer, request->rq_xid, 0,
				  LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
		if (rc != 0) {
			CERROR("LNetMEAttach failed: %d\n", rc);
			LASSERT(rc == -ENOMEM);
			rc = -ENOMEM;
			goto cleanup_bulk;
		}
	}

	spin_lock(&request->rq_lock);
	/* If the MD attach succeeds, there _will_ be a reply_in callback */
	request->rq_receiving_reply = !noreply;
	request->rq_req_unlink = 1;
	/* We are responsible for unlinking the reply buffer */
	request->rq_reply_unlink = !noreply;
	/* Clear any flags that may be present from previous sends. */
	request->rq_replied = 0;
	request->rq_err = 0;
	request->rq_timedout = 0;
	request->rq_net_err = 0;
	request->rq_resend = 0;
	request->rq_restart = 0;
	request->rq_reply_truncate = 0;
	spin_unlock(&request->rq_lock);

	if (!noreply) {
		reply_md.start = request->rq_repbuf;
		reply_md.length = request->rq_repbuf_len;
		/* Allow multiple early replies */
		reply_md.threshold = LNET_MD_THRESH_INF;
		/* Manage remote for early replies */
		reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
			LNET_MD_MANAGE_REMOTE |
			LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */
		reply_md.user_ptr = &request->rq_reply_cbid;
		reply_md.eq_handle = ptlrpc_eq_h;

		/* We must see the unlink callback to unset rq_reply_unlink,
		   so we can't auto-unlink */
		rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
				  &request->rq_reply_md_h);
		if (rc != 0) {
			CERROR("LNetMDAttach failed: %d\n", rc);
			LASSERT(rc == -ENOMEM);
			spin_lock(&request->rq_lock);
			/* ...but the MD attach didn't succeed... */
			request->rq_receiving_reply = 0;
			spin_unlock(&request->rq_lock);
			rc = -ENOMEM;
			goto cleanup_me;
		}

		CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n",
		       request->rq_repbuf_len, request->rq_xid,
		       request->rq_reply_portal);
	}

	/* add references on request for request_out_callback */
	ptlrpc_request_addref(request);
	if (obd != NULL && obd->obd_svc_stats != NULL)
		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
			atomic_read(&request->rq_import->imp_inflight));

	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);

	ktime_get_real_ts64(&request->rq_arrival_time);
	request->rq_sent = ktime_get_real_seconds();
	/* We give the server rq_timeout secs to process the req, and
	   add the network latency for our local timeout. */
	request->rq_deadline = request->rq_sent + request->rq_timeout +
		ptlrpc_at_get_net_latency(request);

	ptlrpc_pinger_sending_on_import(request->rq_import);

	DEBUG_REQ(D_INFO, request, "send flg=%x",
		  lustre_msg_get_flags(request->rq_reqmsg));
	rc = ptl_send_buf(&request->rq_req_md_h,
			  request->rq_reqbuf, request->rq_reqdata_len,
			  LNET_NOACK_REQ, &request->rq_req_cbid,
			  connection,
			  request->rq_request_portal,
			  request->rq_xid, 0);
	if (rc == 0)
		goto out;

	ptlrpc_req_finished(request);
	if (noreply)
		goto out;

 cleanup_me:
	/* MEUnlink is safe; the PUT didn't even get off the ground, and
	 * nobody apart from the PUT's target has the right nid+XID to
	 * access the reply buffer. */
	rc2 = LNetMEUnlink(reply_me_h);
	LASSERT(rc2 == 0);
	/* UNLINKED callback called synchronously */
	LASSERT(!request->rq_receiving_reply);

 cleanup_bulk:
	/* We do sync unlink here as there was no real transfer here so
	 * the chance to have long unlink to sluggish net is smaller here. */
	ptlrpc_unregister_bulk(request, 0);
 out:
	if (request->rq_memalloc)
		cfs_memory_pressure_restore(mpflag);
	return rc;
}
예제 #30
0
/**
 * Set attributes of object during write bulk IO processing.
 *
 * Change object attributes and write parent FID into extended
 * attributes when needed.
 *
 * \param[in] env	execution environment
 * \param[in] ofd	OFD device
 * \param[in] ofd_obj	OFD object
 * \param[in] la	object attributes
 * \param[in] ff	parent FID
 *
 * \retval		0 on successful attributes update
 * \retval		negative value on error
 */
static int
ofd_write_attr_set(const struct lu_env *env, struct ofd_device *ofd,
		   struct ofd_object *ofd_obj, struct lu_attr *la,
		   struct filter_fid *ff)
{
	struct ofd_thread_info	*info = ofd_info(env);
	__u64			 valid = la->la_valid;
	int			 rc;
	struct thandle		*th;
	struct dt_object	*dt_obj;
	int			 ff_needed = 0;

	ENTRY;

	LASSERT(la);

	dt_obj = ofd_object_child(ofd_obj);
	LASSERT(dt_obj != NULL);

	la->la_valid &= LA_UID | LA_GID;

	rc = ofd_attr_handle_ugid(env, ofd_obj, la, 0 /* !is_setattr */);
	if (rc != 0)
		GOTO(out, rc);

	if (ff != NULL) {
		rc = ofd_object_ff_load(env, ofd_obj);
		if (rc == -ENODATA)
			ff_needed = 1;
		else if (rc < 0)
			GOTO(out, rc);
	}

	if (!la->la_valid && !ff_needed)
		/* no attributes to set */
		GOTO(out, rc = 0);

	th = ofd_trans_create(env, ofd);
	if (IS_ERR(th))
		GOTO(out, rc = PTR_ERR(th));

	if (la->la_valid) {
		rc = dt_declare_attr_set(env, dt_obj, la, th);
		if (rc)
			GOTO(out_tx, rc);
	}

	if (ff_needed) {
		if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_UNMATCHED_PAIR1))
			ff->ff_parent.f_oid = cpu_to_le32(1UL << 31);
		if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_UNMATCHED_PAIR2))
			ff->ff_parent.f_oid =
			cpu_to_le32(le32_to_cpu(ff->ff_parent.f_oid) - 1);

		info->fti_buf.lb_buf = ff;
		info->fti_buf.lb_len = sizeof(*ff);
		rc = dt_declare_xattr_set(env, dt_obj, &info->fti_buf,
					  XATTR_NAME_FID, 0, th);
		if (rc)
			GOTO(out_tx, rc);
	}

	/* We don't need a transno for this operation which will be re-executed
	 * anyway when the OST_WRITE (with a transno assigned) is replayed */
	rc = dt_trans_start_local(env, ofd->ofd_osd , th);
	if (rc)
		GOTO(out_tx, rc);

	/* set uid/gid */
	if (la->la_valid) {
		rc = dt_attr_set(env, dt_obj, la, th);
		if (rc)
			GOTO(out_tx, rc);
	}

	/* set filter fid EA */
	if (ff_needed) {
		if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NOPFID))
			GOTO(out_tx, rc);

		rc = dt_xattr_set(env, dt_obj, &info->fti_buf, XATTR_NAME_FID,
				  0, th);
		if (rc == 0) {
			ofd_obj->ofo_pfid.f_seq = le64_to_cpu(ff->ff_parent.f_seq);
			ofd_obj->ofo_pfid.f_oid = le32_to_cpu(ff->ff_parent.f_oid);
			/* Currently, the filter_fid::ff_parent::f_ver is not
			 * the real parent MDT-object's FID::f_ver, instead it
			 * is the OST-object index in its parent MDT-object's
			 * layout EA. */
			ofd_obj->ofo_pfid.f_stripe_idx =
					le32_to_cpu(ff->ff_parent.f_stripe_idx);
		}
	}

	GOTO(out_tx, rc);

out_tx:
	dt_trans_stop(env, ofd->ofd_osd, th);
out:
	la->la_valid = valid;
	return rc;
}