示例#1
0
文件: xattr.c 项目: 3null/linux
static
int ll_getxattr_common(struct inode *inode, const char *name,
		       void *buffer, size_t size, __u64 valid)
{
	struct ll_sb_info *sbi = ll_i2sbi(inode);
	struct ptlrpc_request *req = NULL;
	struct mdt_body *body;
	int xattr_type, rc;
	void *xdata;
	struct obd_capa *oc;
	struct rmtacl_ctl_entry *rce = NULL;
	struct ll_inode_info *lli = ll_i2info(inode);

	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
	       inode->i_ino, inode->i_generation, inode);

	/* listxattr have slightly different behavior from of ext3:
	 * without 'user_xattr' ext3 will list all xattr names but
	 * filtered out "^user..*"; we list them all for simplicity.
	 */
	if (!name) {
		xattr_type = XATTR_OTHER_T;
		goto do_getxattr;
	}

	xattr_type = get_xattr_type(name);
	rc = xattr_type_filter(sbi, xattr_type);
	if (rc)
		return rc;

	/* b15587: ignore security.capability xattr for now */
	if ((xattr_type == XATTR_SECURITY_T &&
	    strcmp(name, "security.capability") == 0))
		return -ENODATA;

	/* LU-549:  Disable security.selinux when selinux is disabled */
	if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
	    strcmp(name, "security.selinux") == 0)
		return -EOPNOTSUPP;

#ifdef CONFIG_FS_POSIX_ACL
	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
	    (xattr_type == XATTR_ACL_ACCESS_T ||
	    xattr_type == XATTR_ACL_DEFAULT_T)) {
		rce = rct_search(&sbi->ll_rct, current_pid());
		if (rce == NULL ||
		    (rce->rce_ops != RMT_LSETFACL &&
		    rce->rce_ops != RMT_LGETFACL &&
		    rce->rce_ops != RMT_RSETFACL &&
		    rce->rce_ops != RMT_RGETFACL))
			return -EOPNOTSUPP;
	}

	/* posix acl is under protection of LOOKUP lock. when calling to this,
	 * we just have path resolution to the target inode, so we have great
	 * chance that cached ACL is uptodate.
	 */
	if (xattr_type == XATTR_ACL_ACCESS_T &&
	    !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {

		struct posix_acl *acl;

		spin_lock(&lli->lli_lock);
		acl = posix_acl_dup(lli->lli_posix_acl);
		spin_unlock(&lli->lli_lock);

		if (!acl)
			return -ENODATA;

		rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
		posix_acl_release(acl);
		return rc;
	}
	if (xattr_type == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
		return -ENODATA;
#endif

do_getxattr:
	if (sbi->ll_xattr_cache_enabled && xattr_type != XATTR_ACL_ACCESS_T) {
		rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
		if (rc == -EAGAIN)
			goto getxattr_nocache;
		if (rc < 0)
			goto out_xattr;

		/* Add "system.posix_acl_access" to the list */
		if (lli->lli_posix_acl != NULL && valid & OBD_MD_FLXATTRLS) {
			if (size == 0) {
				rc += sizeof(XATTR_NAME_ACL_ACCESS);
			} else if (size - rc >= sizeof(XATTR_NAME_ACL_ACCESS)) {
				memcpy(buffer + rc, XATTR_NAME_ACL_ACCESS,
				       sizeof(XATTR_NAME_ACL_ACCESS));
				rc += sizeof(XATTR_NAME_ACL_ACCESS);
			} else {
				rc = -ERANGE;
				goto out_xattr;
			}
		}
	} else {
getxattr_nocache:
		oc = ll_mdscapa_get(inode);
		rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
				valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
				name, NULL, 0, size, 0, &req);
		capa_put(oc);

		if (rc < 0)
			goto out_xattr;

		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
		LASSERT(body);

		/* only detect the xattr size */
		if (size == 0) {
			rc = body->eadatasize;
			goto out;
		}

		if (size < body->eadatasize) {
			CERROR("server bug: replied size %u > %u\n",
				body->eadatasize, (int)size);
			rc = -ERANGE;
			goto out;
		}

		if (body->eadatasize == 0) {
			rc = -ENODATA;
			goto out;
		}

		/* do not need swab xattr data */
		xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
							body->eadatasize);
		if (!xdata) {
			rc = -EFAULT;
			goto out;
		}

		memcpy(buffer, xdata, body->eadatasize);
		rc = body->eadatasize;
	}

#ifdef CONFIG_FS_POSIX_ACL
	if (rce && rce->rce_ops == RMT_LSETFACL) {
		ext_acl_xattr_header *acl;

		acl = lustre_posix_acl_xattr_2ext(
					(posix_acl_xattr_header *)buffer, rc);
		if (IS_ERR(acl)) {
			rc = PTR_ERR(acl);
			goto out;
		}

		rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode),
			    xattr_type, acl);
		if (unlikely(rc < 0)) {
			lustre_ext_acl_xattr_free(acl);
			goto out;
		}
	}
#endif

out_xattr:
	if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
		LCONSOLE_INFO(
			"%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
			ll_get_fsname(inode->i_sb, NULL, 0), rc);
		sbi->ll_flags &= ~LL_SBI_USER_XATTR;
	}
out:
	ptlrpc_req_finished(req);
	return rc;
}
示例#2
0
static int llog_process_thread(void *arg)
{
	struct llog_process_info	*lpi = arg;
	struct llog_handle		*loghandle = lpi->lpi_loghandle;
	struct llog_log_hdr		*llh = loghandle->lgh_hdr;
	struct llog_process_cat_data	*cd  = lpi->lpi_catdata;
	char				*buf;
	__u64				 cur_offset = LLOG_CHUNK_SIZE;
	__u64				 last_offset;
	int				 rc = 0, index = 1, last_index;
	int				 saved_index = 0;
	int				 last_called_index = 0;

	LASSERT(llh);

	OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
	if (!buf) {
		lpi->lpi_rc = -ENOMEM;
		return 0;
	}

	if (cd != NULL) {
		last_called_index = cd->lpcd_first_idx;
		index = cd->lpcd_first_idx + 1;
	}
	if (cd != NULL && cd->lpcd_last_idx)
		last_index = cd->lpcd_last_idx;
	else
		last_index = LLOG_BITMAP_BYTES * 8 - 1;

	while (rc == 0) {
		struct llog_rec_hdr *rec;

		/* skip records not set in bitmap */
		while (index <= last_index &&
		       !ext2_test_bit(index, llh->llh_bitmap))
			++index;

		LASSERT(index <= last_index + 1);
		if (index == last_index + 1)
			break;
repeat:
		CDEBUG(D_OTHER, "index: %d last_index %d\n",
		       index, last_index);

		/* get the buf with our target record; avoid old garbage */
		memset(buf, 0, LLOG_CHUNK_SIZE);
		last_offset = cur_offset;
		rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
				     index, &cur_offset, buf, LLOG_CHUNK_SIZE);
		if (rc)
			GOTO(out, rc);

		/* NB: when rec->lrh_len is accessed it is already swabbed
		 * since it is used at the "end" of the loop and the rec
		 * swabbing is done at the beginning of the loop. */
		for (rec = (struct llog_rec_hdr *)buf;
		     (char *)rec < buf + LLOG_CHUNK_SIZE;
		     rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){

			CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
			       rec, rec->lrh_type);

			if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
				lustre_swab_llog_rec(rec);

			CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
			       rec->lrh_type, rec->lrh_index);

			if (rec->lrh_index == 0) {
				/* probably another rec just got added? */
				if (index <= loghandle->lgh_last_idx)
					GOTO(repeat, rc = 0);
				GOTO(out, rc = 0); /* no more records */
			}
			if (rec->lrh_len == 0 ||
			    rec->lrh_len > LLOG_CHUNK_SIZE) {
				CWARN("invalid length %d in llog record for "
				      "index %d/%d\n", rec->lrh_len,
				      rec->lrh_index, index);
				GOTO(out, rc = -EINVAL);
			}

			if (rec->lrh_index < index) {
				CDEBUG(D_OTHER, "skipping lrh_index %d\n",
				       rec->lrh_index);
				continue;
			}

			CDEBUG(D_OTHER,
			       "lrh_index: %d lrh_len: %d (%d remains)\n",
			       rec->lrh_index, rec->lrh_len,
			       (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));

			loghandle->lgh_cur_idx = rec->lrh_index;
			loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
						    last_offset;

			/* if set, process the callback on this record */
			if (ext2_test_bit(index, llh->llh_bitmap)) {
				rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
						 lpi->lpi_cbdata);
				last_called_index = index;
				if (rc == LLOG_PROC_BREAK) {
					GOTO(out, rc);
				} else if (rc == LLOG_DEL_RECORD) {
					llog_cancel_rec(lpi->lpi_env,
							loghandle,
							rec->lrh_index);
					rc = 0;
				}
				if (rc)
					GOTO(out, rc);
			} else {
				CDEBUG(D_OTHER, "Skipped index %d\n", index);
			}

			/* next record, still in buffer? */
			++index;
			if (index > last_index)
				GOTO(out, rc = 0);
		}
	}

out:
	if (cd != NULL)
		cd->lpcd_last_idx = last_called_index;

	OBD_FREE(buf, LLOG_CHUNK_SIZE);
	lpi->lpi_rc = rc;
	return 0;
}
示例#3
0
/*
 * Server's incoming request callback
 */
void request_in_callback(lnet_event_t *ev)
{
	struct ptlrpc_cb_id		  *cbid = ev->md.user_ptr;
	struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
	struct ptlrpc_service_part	  *svcpt = rqbd->rqbd_svcpt;
	struct ptlrpc_service	     *service = svcpt->scp_service;
	struct ptlrpc_request	     *req;

	LASSERT(ev->type == LNET_EVENT_PUT ||
		ev->type == LNET_EVENT_UNLINK);
	LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer);
	LASSERT((char *)ev->md.start + ev->offset + ev->mlength <=
		rqbd->rqbd_buffer + service->srv_buf_size);

	CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
	       "event type %d, status %d, service %s\n",
	       ev->type, ev->status, service->srv_name);

	if (ev->unlinked) {
		/* If this is the last request message to fit in the
		 * request buffer we can use the request object embedded in
		 * rqbd.  Note that if we failed to allocate a request,
		 * we'd have to re-post the rqbd, which we can't do in this
		 * context. */
		req = &rqbd->rqbd_req;
		memset(req, 0, sizeof(*req));
	} else {
		LASSERT(ev->type == LNET_EVENT_PUT);
		if (ev->status != 0) {
			/* We moaned above already... */
			return;
		}
		req = ptlrpc_request_cache_alloc(GFP_ATOMIC);
		if (req == NULL) {
			CERROR("Can't allocate incoming request descriptor: "
			       "Dropping %s RPC from %s\n",
			       service->srv_name,
			       libcfs_id2str(ev->initiator));
			return;
		}
	}

	/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
	 * flags are reset and scalars are zero.  We only set the message
	 * size to non-zero if this was a successful receive. */
	req->rq_xid = ev->match_bits;
	req->rq_reqbuf = ev->md.start + ev->offset;
	if (ev->type == LNET_EVENT_PUT && ev->status == 0)
		req->rq_reqdata_len = ev->mlength;
	do_gettimeofday(&req->rq_arrival_time);
	req->rq_peer = ev->initiator;
	req->rq_self = ev->target.nid;
	req->rq_rqbd = rqbd;
	req->rq_phase = RQ_PHASE_NEW;
	spin_lock_init(&req->rq_lock);
	INIT_LIST_HEAD(&req->rq_timed_list);
	INIT_LIST_HEAD(&req->rq_exp_list);
	atomic_set(&req->rq_refcount, 1);
	if (ev->type == LNET_EVENT_PUT)
		CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
		       req, req->rq_xid, ev->mlength);

	CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));

	spin_lock(&svcpt->scp_lock);

	ptlrpc_req_add_history(svcpt, req);

	if (ev->unlinked) {
		svcpt->scp_nrqbds_posted--;
		CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
		       svcpt->scp_nrqbds_posted);

		/* Normally, don't complain about 0 buffers posted; LNET won't
		 * drop incoming reqs since we set the portal lazy */
		if (test_req_buffer_pressure &&
		    ev->type != LNET_EVENT_UNLINK &&
		    svcpt->scp_nrqbds_posted == 0)
			CWARN("All %s request buffers busy\n",
			      service->srv_name);

		/* req takes over the network's ref on rqbd */
	} else {
		/* req takes a ref on rqbd */
		rqbd->rqbd_refcount++;
	}

	list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
	svcpt->scp_nreqs_incoming++;

	/* NB everything can disappear under us once the request
	 * has been queued and we unlock, so do the wake now... */
	wake_up(&svcpt->scp_waitq);

	spin_unlock(&svcpt->scp_lock);
}
示例#4
0
static int out_attr_get(struct tgt_session_info *tsi)
{
	const struct lu_env	*env = tsi->tsi_env;
	struct tgt_thread_info	*tti = tgt_th_info(env);
	struct obdo		*obdo = &tti->tti_u.update.tti_obdo;
	struct lu_attr		*la = &tti->tti_attr;
	struct dt_object        *obj = tti->tti_u.update.tti_dt_object;
	int			idx = tti->tti_u.update.tti_update_reply_index;
	int			rc;

	ENTRY;

	if (!lu_object_exists(&obj->do_lu)) {
		/* Usually, this will be called when the master MDT try
		 * to init a remote object(see osp_object_init), so if
		 * the object does not exist on slave, we need set BANSHEE flag,
		 * so the object can be removed from the cache immediately */
		set_bit(LU_OBJECT_HEARD_BANSHEE,
			&obj->do_lu.lo_header->loh_flags);
		RETURN(-ENOENT);
	}

	dt_read_lock(env, obj, MOR_TGT_CHILD);
	rc = dt_attr_get(env, obj, la, NULL);
	if (rc)
		GOTO(out_unlock, rc);
	/*
	 * If it is a directory, we will also check whether the
	 * directory is empty.
	 * la_flags = 0 : Empty.
	 *          = 1 : Not empty.
	 */
	la->la_flags = 0;
	if (S_ISDIR(la->la_mode)) {
		struct dt_it		*it;
		const struct dt_it_ops	*iops;

		if (!dt_try_as_dir(env, obj))
			GOTO(out_unlock, rc = -ENOTDIR);

		iops = &obj->do_index_ops->dio_it;
		it = iops->init(env, obj, LUDA_64BITHASH, BYPASS_CAPA);
		if (!IS_ERR(it)) {
			int  result;
			result = iops->get(env, it, (const void *)"");
			if (result > 0) {
				int i;
				for (result = 0, i = 0; result == 0 && i < 3;
				     ++i)
					result = iops->next(env, it);
				if (result == 0)
					la->la_flags = 1;
			} else if (result == 0)
				/*
				 * Huh? Index contains no zero key?
				 */
				rc = -EIO;

			iops->put(env, it);
			iops->fini(env, it);
		}
	}

	obdo->o_valid = 0;
	obdo_from_la(obdo, la, la->la_valid);
	obdo_cpu_to_le(obdo, obdo);
	lustre_set_wire_obdo(NULL, obdo, obdo);

out_unlock:
	dt_read_unlock(env, obj);

	CDEBUG(D_INFO, "%s: insert attr get reply %p index %d: rc = %d\n",
	       tgt_name(tsi->tsi_tgt), tti->tti_u.update.tti_update_reply,
	       0, rc);

	object_update_result_insert(tti->tti_u.update.tti_update_reply, obdo,
				    sizeof(*obdo), idx, rc);

	RETURN(rc);
}
示例#5
0
/*
 * Look-up/create a global index file.
 *
 * \param env - is the environment passed by the caller
 * \parap dev - is the dt_device where to lookup/create the global index file
 * \param parent - is the parent directory where to create the global index if
 *                 not found
 * \param fid - is the fid of the global index to be looked up/created
 * \parap local - indicates whether the index should be created with a local
 *                generated fid or with \fid
 *
 * \retval     - pointer to the dt_object of the global index on success,
 *               appropriate error on failure
 */
struct dt_object *lquota_disk_glb_find_create(const struct lu_env *env,
					      struct dt_device *dev,
					      struct dt_object *parent,
					      struct lu_fid *fid, bool local)
{
	struct lquota_thread_info	*qti = lquota_info(env);
	struct dt_object		*glb_idx;
	const struct dt_index_features	*idx_feat;
	ENTRY;

	CDEBUG(D_QUOTA, "look-up/create %sglobal idx file ("DFID")\n",
	       local ? "local " : "", PFID(fid));

#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 53, 0)
	/* we use different index feature for each quota type and target type
	 * for the time being. This is done for on-disk conversion from the old
	 * quota format. Once this is no longer required, we should just be
	 * using dt_quota_glb_features for all global index file */
	idx_feat = glb_idx_feature(fid);
#else
	idx_feat = &dt_quota_glb_features;
#endif

	/* the filename is composed of the most signicant bits of the FID,
	 * that's to say the oid which encodes the pool id, pool type and quota
	 * type */
	sprintf(qti->qti_buf, "0x%x", fid->f_oid);

	if (local) {
		/* We use the sequence reserved for local named objects */
		lu_local_name_obj_fid(&qti->qti_fid, 1);
		glb_idx = lquota_disk_find_create(env, dev, parent,
						  &qti->qti_fid, idx_feat,
						  qti->qti_buf);
	} else {
		/* look-up/create global index on disk */
		glb_idx = local_index_find_or_create_with_fid(env, dev, fid,
							      parent,
							      qti->qti_buf,
							      LQUOTA_MODE,
							      idx_feat);
	}

	if (IS_ERR(glb_idx)) {
		CERROR("%s: failed to look-up/create idx file "DFID" rc:%ld "
		       "local:%d\n", dev->dd_lu_dev.ld_obd->obd_name,
		       PFID(fid), PTR_ERR(glb_idx), local);
		RETURN(glb_idx);
	}

	/* install index operation vector */
	if (glb_idx->do_index_ops == NULL) {
		int rc;

		rc = glb_idx->do_ops->do_index_try(env, glb_idx, idx_feat);
		if (rc) {
			CERROR("%s: failed to setup index operations for "DFID
			       " rc:%d\n", dev->dd_lu_dev.ld_obd->obd_name,
			       PFID(lu_object_fid(&glb_idx->do_lu)), rc);
			lu_object_put(env, &glb_idx->do_lu);
			glb_idx = ERR_PTR(rc);
		}
	}

	RETURN(glb_idx);
}
示例#6
0
static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr,
			      const char *buffer, size_t count)
{
	struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
						 ns_kobj);
	unsigned long tmp;
	int lru_resize;
	int err;

	if (strncmp(buffer, "clear", 5) == 0) {
		CDEBUG(D_DLMTRACE,
		       "dropping all unused locks from namespace %s\n",
		       ldlm_ns_name(ns));
		if (ns_connect_lru_resize(ns)) {
			int canceled, unused  = ns->ns_nr_unused;

			/* Try to cancel all @ns_nr_unused locks. */
			canceled = ldlm_cancel_lru(ns, unused, 0,
						   LDLM_CANCEL_PASSED);
			if (canceled < unused) {
				CDEBUG(D_DLMTRACE,
				       "not all requested locks are canceled, requested: %d, canceled: %d\n",
				       unused,
				       canceled);
				return -EINVAL;
			}
		} else {
			tmp = ns->ns_max_unused;
			ns->ns_max_unused = 0;
			ldlm_cancel_lru(ns, 0, 0, LDLM_CANCEL_PASSED);
			ns->ns_max_unused = tmp;
		}
		return count;
	}

	err = kstrtoul(buffer, 10, &tmp);
	if (err != 0) {
		CERROR("lru_size: invalid value written\n");
		return -EINVAL;
	}
	lru_resize = (tmp == 0);

	if (ns_connect_lru_resize(ns)) {
		if (!lru_resize)
			ns->ns_max_unused = (unsigned int)tmp;

		if (tmp > ns->ns_nr_unused)
			tmp = ns->ns_nr_unused;
		tmp = ns->ns_nr_unused - tmp;

		CDEBUG(D_DLMTRACE,
		       "changing namespace %s unused locks from %u to %u\n",
		       ldlm_ns_name(ns), ns->ns_nr_unused,
		       (unsigned int)tmp);
		ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_CANCEL_PASSED);

		if (!lru_resize) {
			CDEBUG(D_DLMTRACE,
			       "disable lru_resize for namespace %s\n",
			       ldlm_ns_name(ns));
			ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE;
		}
	} else {
		CDEBUG(D_DLMTRACE,
		       "changing namespace %s max_unused from %u to %u\n",
		       ldlm_ns_name(ns), ns->ns_max_unused,
		       (unsigned int)tmp);
		ns->ns_max_unused = (unsigned int)tmp;
		ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_CANCEL_PASSED);

		/* Make sure that LRU resize was originally supported before
		 * turning it on here.
		 */
		if (lru_resize &&
		    (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
			CDEBUG(D_DLMTRACE,
			       "enable lru_resize for namespace %s\n",
			       ldlm_ns_name(ns));
			ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE;
		}
	}

	return count;
}
示例#7
0
文件: obd_mount.c 项目: 3null/linux
/** Set up a mgc obd to process startup logs
 *
 * \param sb [in] super block of the mgc obd
 *
 * \retval 0 success, otherwise error code
 */
int lustre_start_mgc(struct super_block *sb)
{
	struct obd_connect_data *data = NULL;
	struct lustre_sb_info *lsi = s2lsi(sb);
	struct obd_device *obd;
	struct obd_export *exp;
	struct obd_uuid *uuid;
	class_uuid_t uuidc;
	lnet_nid_t nid;
	char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
	char *ptr;
	int rc = 0, i = 0, j, len;

	LASSERT(lsi->lsi_lmd);

	/* Find the first non-lo MGS nid for our MGC name */
	if (IS_SERVER(lsi)) {
		/* mount -o mgsnode=nid */
		ptr = lsi->lsi_lmd->lmd_mgs;
		if (lsi->lsi_lmd->lmd_mgs &&
		    (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
			i++;
		} else if (IS_MGS(lsi)) {
			lnet_process_id_t id;
			while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
				if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
					continue;
				nid = id.nid;
				i++;
				break;
			}
		}
	} else { /* client */
		/* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
		ptr = lsi->lsi_lmd->lmd_dev;
		if (class_parse_nid(ptr, &nid, &ptr) == 0)
			i++;
	}
	if (i == 0) {
		CERROR("No valid MGS nids found.\n");
		return -EINVAL;
	}

	mutex_lock(&mgc_start_lock);

	len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
	OBD_ALLOC(mgcname, len);
	OBD_ALLOC(niduuid, len + 2);
	if (!mgcname || !niduuid) {
		rc = -ENOMEM;
		goto out_free;
	}
	sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));

	mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";

	OBD_ALLOC_PTR(data);
	if (data == NULL) {
		rc = -ENOMEM;
		goto out_free;
	}

	obd = class_name2obd(mgcname);
	if (obd && !obd->obd_stopping) {
		int recov_bk;

		rc = obd_set_info_async(NULL, obd->obd_self_export,
					strlen(KEY_MGSSEC), KEY_MGSSEC,
					strlen(mgssec), mgssec, NULL);
		if (rc)
			goto out_free;

		/* Re-using an existing MGC */
		atomic_inc(&obd->u.cli.cl_mgc_refcount);

		/* IR compatibility check, only for clients */
		if (lmd_is_client(lsi->lsi_lmd)) {
			int has_ir;
			int vallen = sizeof(*data);
			__u32 *flags = &lsi->lsi_lmd->lmd_flags;

			rc = obd_get_info(NULL, obd->obd_self_export,
					  strlen(KEY_CONN_DATA), KEY_CONN_DATA,
					  &vallen, data, NULL);
			LASSERT(rc == 0);
			has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
			if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
				/* LMD_FLG_NOIR is for test purpose only */
				LCONSOLE_WARN(
				    "Trying to mount a client with IR setting "
				    "not compatible with current mgc. "
				    "Force to use current mgc setting that is "
				    "IR %s.\n",
				    has_ir ? "enabled" : "disabled");
				if (has_ir)
					*flags &= ~LMD_FLG_NOIR;
				else
					*flags |= LMD_FLG_NOIR;
			}
		}

		recov_bk = 0;
		/* If we are restarting the MGS, don't try to keep the MGC's
		   old connection, or registration will fail. */
		if (IS_MGS(lsi)) {
			CDEBUG(D_MOUNT, "New MGS with live MGC\n");
			recov_bk = 1;
		}

		/* Try all connections, but only once (again).
		   We don't want to block another target from starting
		   (using its local copy of the log), but we do want to connect
		   if at all possible. */
		recov_bk++;
		CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
		       recov_bk);
		rc = obd_set_info_async(NULL, obd->obd_self_export,
					sizeof(KEY_INIT_RECOV_BACKUP),
					KEY_INIT_RECOV_BACKUP,
					sizeof(recov_bk), &recov_bk, NULL);
		rc = 0;
		goto out;
	}

	CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);

	/* Add the primary nids for the MGS */
	i = 0;
	sprintf(niduuid, "%s_%x", mgcname, i);
	if (IS_SERVER(lsi)) {
		ptr = lsi->lsi_lmd->lmd_mgs;
		if (IS_MGS(lsi)) {
			/* Use local nids (including LO) */
			lnet_process_id_t id;
			while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
				rc = do_lcfg(mgcname, id.nid,
					     LCFG_ADD_UUID, niduuid,
					     NULL, NULL, NULL);
			}
		} else {
			/* Use mgsnode= nids */
			/* mount -o mgsnode=nid */
			if (lsi->lsi_lmd->lmd_mgs) {
				ptr = lsi->lsi_lmd->lmd_mgs;
			} else if (class_find_param(ptr, PARAM_MGSNODE,
						    &ptr) != 0) {
				CERROR("No MGS nids given.\n");
				rc = -EINVAL;
				goto out_free;
			}
			while (class_parse_nid(ptr, &nid, &ptr) == 0) {
				rc = do_lcfg(mgcname, nid,
					     LCFG_ADD_UUID, niduuid,
					     NULL, NULL, NULL);
				i++;
			}
		}
	} else { /* client */
		/* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
		ptr = lsi->lsi_lmd->lmd_dev;
		while (class_parse_nid(ptr, &nid, &ptr) == 0) {
			rc = do_lcfg(mgcname, nid,
				     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
			i++;
			/* Stop at the first failover nid */
			if (*ptr == ':')
				break;
		}
	}
	if (i == 0) {
		CERROR("No valid MGS nids found.\n");
		rc = -EINVAL;
		goto out_free;
	}
	lsi->lsi_lmd->lmd_mgs_failnodes = 1;

	/* Random uuid for MGC allows easier reconnects */
	OBD_ALLOC_PTR(uuid);
	ll_generate_random_uuid(uuidc);
	class_uuid_unparse(uuidc, uuid);

	/* Start the MGC */
	rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
				 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
				 niduuid, NULL, NULL);
	OBD_FREE_PTR(uuid);
	if (rc)
		goto out_free;

	/* Add any failover MGS nids */
	i = 1;
	while (ptr && ((*ptr == ':' ||
	       class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
		/* New failover node */
		sprintf(niduuid, "%s_%x", mgcname, i);
		j = 0;
		while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
			j++;
			rc = do_lcfg(mgcname, nid,
				     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
			if (*ptr == ':')
				break;
		}
		if (j > 0) {
			rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
				     niduuid, NULL, NULL, NULL);
			i++;
		} else {
			/* at ":/fsname" */
			break;
		}
	}
	lsi->lsi_lmd->lmd_mgs_failnodes = i;

	obd = class_name2obd(mgcname);
	if (!obd) {
		CERROR("Can't find mgcobd %s\n", mgcname);
		rc = -ENOTCONN;
		goto out_free;
	}

	rc = obd_set_info_async(NULL, obd->obd_self_export,
				strlen(KEY_MGSSEC), KEY_MGSSEC,
				strlen(mgssec), mgssec, NULL);
	if (rc)
		goto out_free;

	/* Keep a refcount of servers/clients who started with "mount",
	   so we know when we can get rid of the mgc. */
	atomic_set(&obd->u.cli.cl_mgc_refcount, 1);

	/* We connect to the MGS at setup, and don't disconnect until cleanup */
	data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
				  OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
				  OBD_CONNECT_LVB_TYPE;

#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
	data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
#else
#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
#endif

	if (lmd_is_client(lsi->lsi_lmd) &&
	    lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
		data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
	data->ocd_version = LUSTRE_VERSION_CODE;
	rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
	if (rc) {
		CERROR("connect failed %d\n", rc);
		goto out;
	}

	obd->u.cli.cl_mgc_mgsexp = exp;

out:
	/* Keep the mgc info in the sb. Note that many lsi's can point
	   to the same mgc.*/
	lsi->lsi_mgc = obd;
out_free:
	mutex_unlock(&mgc_start_lock);

	if (data)
		OBD_FREE_PTR(data);
	if (mgcname)
		OBD_FREE(mgcname, len);
	if (niduuid)
		OBD_FREE(niduuid, len + 2);
	return rc;
}
示例#8
0
static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
			struct cl_object *stripe, struct lov_layout_raid0 *r0,
			int idx)
{
	struct cl_object_header *hdr;
	struct cl_object_header *subhdr;
	struct cl_object_header *parent;
	struct lov_oinfo	*oinfo;
	int result;

	if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) {
		/* For sanity:test_206.
		 * Do not leave the object in cache to avoid accessing
		 * freed memory. This is because osc_object is referring to
		 * lov_oinfo of lsm_stripe_data which will be freed due to
		 * this failure.
		 */
		cl_object_kill(env, stripe);
		cl_object_put(env, stripe);
		return -EIO;
	}

	hdr    = cl_object_header(lov2cl(lov));
	subhdr = cl_object_header(stripe);

	oinfo = lov->lo_lsm->lsm_oinfo[idx];
	CDEBUG(D_INODE, DFID"@%p[%d] -> "DFID"@%p: ostid: "DOSTID
	       " idx: %d gen: %d\n",
	       PFID(&subhdr->coh_lu.loh_fid), subhdr, idx,
	       PFID(&hdr->coh_lu.loh_fid), hdr, POSTID(&oinfo->loi_oi),
	       oinfo->loi_ost_idx, oinfo->loi_ost_gen);

	/* reuse ->coh_attr_guard to protect coh_parent change */
	spin_lock(&subhdr->coh_attr_guard);
	parent = subhdr->coh_parent;
	if (!parent) {
		subhdr->coh_parent = hdr;
		spin_unlock(&subhdr->coh_attr_guard);
		subhdr->coh_nesting = hdr->coh_nesting + 1;
		lu_object_ref_add(&stripe->co_lu, "lov-parent", lov);
		r0->lo_sub[idx] = cl2lovsub(stripe);
		r0->lo_sub[idx]->lso_super = lov;
		r0->lo_sub[idx]->lso_index = idx;
		result = 0;
	} else {
		struct lu_object  *old_obj;
		struct lov_object *old_lov;
		unsigned int mask = D_INODE;

		spin_unlock(&subhdr->coh_attr_guard);
		old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type);
		LASSERT(old_obj);
		old_lov = cl2lov(lu2cl(old_obj));
		if (old_lov->lo_layout_invalid) {
			/* the object's layout has already changed but isn't
			 * refreshed
			 */
			lu_object_unhash(env, &stripe->co_lu);
			result = -EAGAIN;
		} else {
			mask = D_ERROR;
			result = -EIO;
		}

		LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
				"stripe %d is already owned.", idx);
		LU_OBJECT_DEBUG(mask, env, old_obj, "owned.");
		LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
		cl_object_put(env, stripe);
	}
	return result;
}
示例#9
0
static int lov_layout_change(const struct lu_env *unused,
			     struct lov_object *lov,
			     const struct cl_object_conf *conf)
{
	int result;
	enum lov_layout_type llt = LLT_EMPTY;
	union lov_layout_state *state = &lov->u;
	const struct lov_layout_operations *old_ops;
	const struct lov_layout_operations *new_ops;

	void *cookie;
	struct lu_env *env;
	int refcheck;

	LASSERT(0 <= lov->lo_type && lov->lo_type < ARRAY_SIZE(lov_dispatch));

	if (conf->u.coc_md)
		llt = lov_type(conf->u.coc_md->lsm);
	LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch));

	cookie = cl_env_reenter();
	env = cl_env_get(&refcheck);
	if (IS_ERR(env)) {
		cl_env_reexit(cookie);
		return PTR_ERR(env);
	}

	CDEBUG(D_INODE, DFID" from %s to %s\n",
	       PFID(lu_object_fid(lov2lu(lov))),
	       llt2str(lov->lo_type), llt2str(llt));

	old_ops = &lov_dispatch[lov->lo_type];
	new_ops = &lov_dispatch[llt];

	result = cl_object_prune(env, &lov->lo_cl);
	if (result != 0)
		goto out;

	result = old_ops->llo_delete(env, lov, &lov->u);
	if (result == 0) {
		old_ops->llo_fini(env, lov, &lov->u);

		LASSERT(atomic_read(&lov->lo_active_ios) == 0);

		lov->lo_type = LLT_EMPTY;
		result = new_ops->llo_init(env,
					lu2lov_dev(lov->lo_cl.co_lu.lo_dev),
					lov, conf, state);
		if (result == 0) {
			new_ops->llo_install(env, lov, state);
			lov->lo_type = llt;
		} else {
			new_ops->llo_delete(env, lov, state);
			new_ops->llo_fini(env, lov, state);
			/* this file becomes an EMPTY file. */
		}
	}

out:
	cl_env_put(env, &refcheck);
	cl_env_reexit(cookie);
	return result;
}
示例#10
0
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
                            bool *retry)
{
	struct lu_env           *env;
	struct cl_io            *io;
	struct vvp_io           *vio;
	int                      result;
	__u16			 refcheck;
	sigset_t		 set;
	struct inode             *inode;
	struct ll_inode_info     *lli;
	ENTRY;

	LASSERT(vmpage != NULL);
	env = cl_env_get(&refcheck);
	if (IS_ERR(env))
		RETURN(PTR_ERR(env));

	io = ll_fault_io_init(env, vma, vmpage->index, NULL);
	if (IS_ERR(io))
		GOTO(out, result = PTR_ERR(io));

	result = io->ci_result;
	if (result < 0)
		GOTO(out_io, result);

	io->u.ci_fault.ft_mkwrite = 1;
	io->u.ci_fault.ft_writable = 1;

	vio = vvp_env_io(env);
	vio->u.fault.ft_vma    = vma;
	vio->u.fault.ft_vmpage = vmpage;

	set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));

	inode = vvp_object_inode(io->ci_obj);
	lli = ll_i2info(inode);

	result = cl_io_loop(env, io);

	cfs_restore_sigs(set);

        if (result == 0) {
                lock_page(vmpage);
                if (vmpage->mapping == NULL) {
                        unlock_page(vmpage);

                        /* page was truncated and lock was cancelled, return
                         * ENODATA so that VM_FAULT_NOPAGE will be returned
                         * to handle_mm_fault(). */
                        if (result == 0)
                                result = -ENODATA;
                } else if (!PageDirty(vmpage)) {
                        /* race, the page has been cleaned by ptlrpcd after
                         * it was unlocked, it has to be added into dirty
                         * cache again otherwise this soon-to-dirty page won't
                         * consume any grants, even worse if this page is being
                         * transferred because it will break RPC checksum.
                         */
                        unlock_page(vmpage);

                        CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has "
                               "been written out, retry.\n",
                               vmpage, vmpage->index);

                        *retry = true;
                        result = -EAGAIN;
                }

		if (result == 0)
			ll_file_set_flag(lli, LLIF_DATA_MODIFIED);
        }
        EXIT;

out_io:
	cl_io_fini(env, io);
out:
	cl_env_put(env, &refcheck);
	CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
	LASSERT(ergo(result == 0, PageLocked(vmpage)));

	return result;
}
示例#11
0
/**
 * Lustre implementation of a vm_operations_struct::fault() method, called by
 * VM to server page fault (both in kernel and user space).
 *
 * \param vma - is virtiual area struct related to page fault
 * \param vmf - structure which describe type and address where hit fault
 *
 * \return allocated and filled _locked_ page for address
 * \retval VM_FAULT_ERROR on general error
 * \retval NOPAGE_OOM not have memory for allocate new page
 */
static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct lu_env           *env;
	struct cl_io            *io;
	struct vvp_io           *vio = NULL;
	struct page             *vmpage;
	unsigned long            ra_flags;
	int                      result = 0;
	int                      fault_ret = 0;
	__u16			 refcheck;
	ENTRY;

	env = cl_env_get(&refcheck);
	if (IS_ERR(env))
		RETURN(PTR_ERR(env));

	if (ll_sbi_has_fast_read(ll_i2sbi(file_inode(vma->vm_file)))) {
		/* do fast fault */
		ll_cl_add(vma->vm_file, env, NULL, LCC_MMAP);
		fault_ret = filemap_fault(vma, vmf);
		ll_cl_remove(vma->vm_file, env);

		/* - If there is no error, then the page was found in cache and
		 *   uptodate;
		 * - If VM_FAULT_RETRY is set, the page existed but failed to
		 *   lock. It will return to kernel and retry;
		 * - Otherwise, it should try normal fault under DLM lock. */
		if ((fault_ret & VM_FAULT_RETRY) ||
		    !(fault_ret & VM_FAULT_ERROR))
			GOTO(out, result = 0);

		fault_ret = 0;
	}

	io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
	if (IS_ERR(io))
		GOTO(out, result = PTR_ERR(io));

	result = io->ci_result;
	if (result == 0) {
		vio = vvp_env_io(env);
		vio->u.fault.ft_vma       = vma;
		vio->u.fault.ft_vmpage    = NULL;
		vio->u.fault.ft_vmf = vmf;
		vio->u.fault.ft_flags = 0;
		vio->u.fault.ft_flags_valid = 0;

		/* May call ll_readpage() */
		ll_cl_add(vma->vm_file, env, io, LCC_MMAP);

		result = cl_io_loop(env, io);

		ll_cl_remove(vma->vm_file, env);

		/* ft_flags are only valid if we reached
		 * the call to filemap_fault */
		if (vio->u.fault.ft_flags_valid)
			fault_ret = vio->u.fault.ft_flags;

		vmpage = vio->u.fault.ft_vmpage;
		if (result != 0 && vmpage != NULL) {
			put_page(vmpage);
			vmf->page = NULL;
		}
        }
	cl_io_fini(env, io);

	vma->vm_flags |= ra_flags;

out:
	cl_env_put(env, &refcheck);
	if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
		fault_ret |= to_fault_error(result);

	CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result);
	RETURN(fault_ret);
}
示例#12
0
/**
 * Get a reference on the specified pool.
 *
 * To ensure the pool descriptor is not freed before the caller is finished
 * with it.  Any process that is accessing \a pool directly needs to hold
 * reference on it, including /proc since a userspace thread may be holding
 * the /proc file open and busy in the kernel.
 *
 * \param[in] pool	pool descriptor on which to gain reference
 */
static void pool_getref(struct pool_desc *pool)
{
	CDEBUG(D_INFO, "pool %p\n", pool);
	atomic_inc(&pool->pool_refcount);
}
示例#13
0
文件: xattr.c 项目: 3null/linux
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
	struct inode *inode = dentry->d_inode;
	int rc = 0, rc2 = 0;
	struct lov_mds_md *lmm = NULL;
	struct ptlrpc_request *request = NULL;
	int lmmsize;

	LASSERT(inode);

	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
	       inode->i_ino, inode->i_generation, inode);

	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);

	rc = ll_getxattr_common(inode, NULL, buffer, size, OBD_MD_FLXATTRLS);
	if (rc < 0)
		goto out;

	if (buffer != NULL) {
		struct ll_sb_info *sbi = ll_i2sbi(inode);
		char *xattr_name = buffer;
		int xlen, rem = rc;

		while (rem > 0) {
			xlen = strnlen(xattr_name, rem - 1) + 1;
			rem -= xlen;
			if (xattr_type_filter(sbi,
					get_xattr_type(xattr_name)) == 0) {
				/* skip OK xattr type
				 * leave it in buffer
				 */
				xattr_name += xlen;
				continue;
			}
			/* move up remaining xattrs in buffer
			 * removing the xattr that is not OK
			 */
			memmove(xattr_name, xattr_name + xlen, rem);
			rc -= xlen;
		}
	}
	if (S_ISREG(inode->i_mode)) {
		if (!ll_i2info(inode)->lli_has_smd)
			rc2 = -1;
	} else if (S_ISDIR(inode->i_mode)) {
		rc2 = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
	}

	if (rc2 < 0) {
		rc2 = 0;
		goto out;
	} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
		const int prefix_len = sizeof(XATTR_LUSTRE_PREFIX) - 1;
		const size_t name_len   = sizeof("lov") - 1;
		const size_t total_len  = prefix_len + name_len + 1;

		if (((rc + total_len) > size) && (buffer != NULL)) {
			ptlrpc_req_finished(request);
			return -ERANGE;
		}

		if (buffer != NULL) {
			buffer += rc;
			memcpy(buffer, XATTR_LUSTRE_PREFIX, prefix_len);
			memcpy(buffer + prefix_len, "lov", name_len);
			buffer[prefix_len + name_len] = '\0';
		}
		rc2 = total_len;
	}
out:
	ptlrpc_req_finished(request);
	rc = rc + rc2;

	return rc;
}
示例#14
0
文件: xattr.c 项目: 3null/linux
ssize_t ll_getxattr(struct dentry *dentry, const char *name,
		    void *buffer, size_t size)
{
	struct inode *inode = dentry->d_inode;

	LASSERT(inode);
	LASSERT(name);

	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
	       inode->i_ino, inode->i_generation, inode, name);

	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);

	if ((strncmp(name, XATTR_TRUSTED_PREFIX,
		     sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
	     strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
	    (strncmp(name, XATTR_LUSTRE_PREFIX,
		     sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
	     strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
		struct lov_stripe_md *lsm;
		struct lov_user_md *lump;
		struct lov_mds_md *lmm = NULL;
		struct ptlrpc_request *request = NULL;
		int rc = 0, lmmsize = 0;

		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
			return -ENODATA;

		if (size == 0 && S_ISDIR(inode->i_mode)) {
			/* XXX directory EA is fix for now, optimize to save
			 * RPC transfer */
			rc = sizeof(struct lov_user_md);
			goto out;
		}

		lsm = ccc_inode_lsm_get(inode);
		if (lsm == NULL) {
			if (S_ISDIR(inode->i_mode)) {
				rc = ll_dir_getstripe(inode, &lmm,
						      &lmmsize, &request);
			} else {
				rc = -ENODATA;
			}
		} else {
			/* LSM is present already after lookup/getattr call.
			 * we need to grab layout lock once it is implemented */
			rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm);
			lmmsize = rc;
		}
		ccc_inode_lsm_put(inode, lsm);

		if (rc < 0)
			goto out;

		if (size == 0) {
			/* used to call ll_get_max_mdsize() forward to get
			 * the maximum buffer size, while some apps (such as
			 * rsync 3.0.x) care much about the exact xattr value
			 * size */
			rc = lmmsize;
			goto out;
		}

		if (size < lmmsize) {
			CERROR("server bug: replied size %d > %d for %s (%s)\n",
			       lmmsize, (int)size, dentry->d_name.name, name);
			rc = -ERANGE;
			goto out;
		}

		lump = (struct lov_user_md *)buffer;
		memcpy(lump, lmm, lmmsize);
		/* do not return layout gen for getxattr otherwise it would
		 * confuse tar --xattr by recognizing layout gen as stripe
		 * offset when the file is restored. See LU-2809. */
		lump->lmm_layout_gen = 0;

		rc = lmmsize;
out:
		if (request)
			ptlrpc_req_finished(request);
		else if (lmm)
			obd_free_diskmd(ll_i2dtexp(inode), &lmm);
		return rc;
	}

	return ll_getxattr_common(inode, name, buffer, size, OBD_MD_FLXATTR);
}
示例#15
0
/**
 * Implementation of ldlm_valblock_ops::lvbo_init for OFD.
 *
 * This function allocates and initializes new LVB data for the given
 * LDLM resource if it is not allocated yet. New LVB is filled with attributes
 * of the object associated with that resource. Function does nothing if LVB
 * for the given LDLM resource is allocated already.
 *
 * Called with res->lr_lvb_sem held.
 *
 * \param[in] lock	LDLM lock on resource
 *
 * \retval		0 on successful setup
 * \retval		negative value on error
 */
static int ofd_lvbo_init(const struct lu_env *env, struct ldlm_resource *res)
{
	struct ost_lvb		*lvb;
	struct ofd_device	*ofd;
	struct ofd_object	*fo;
	struct ofd_thread_info	*info;
	struct lu_env _env;
	int rc = 0;
	ENTRY;

	LASSERT(res);
	LASSERT(mutex_is_locked(&res->lr_lvb_mutex));

	if (res->lr_lvb_data != NULL)
		RETURN(0);

	ofd = ldlm_res_to_ns(res)->ns_lvbp;
	LASSERT(ofd != NULL);

	if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_OST_LVB))
		RETURN(-ENOMEM);

	if (!env) {
		rc = lu_env_init(&_env, LCT_DT_THREAD);
		if (rc)
			RETURN(rc);
		env = &_env;
	}

	OBD_ALLOC_PTR(lvb);
	if (lvb == NULL)
		GOTO(out, rc = -ENOMEM);

	info = ofd_info(env);
	res->lr_lvb_data = lvb;
	res->lr_lvb_len = sizeof(*lvb);

	ost_fid_from_resid(&info->fti_fid, &res->lr_name,
			   ofd->ofd_lut.lut_lsd.lsd_osd_index);
	fo = ofd_object_find(env, ofd, &info->fti_fid);
	if (IS_ERR(fo))
		GOTO(out_lvb, rc = PTR_ERR(fo));

	rc = ofd_attr_get(env, fo, &info->fti_attr);
	if (rc) {
		struct ofd_seq		*oseq;
		__u64			 seq;

		/* Object could be recreated during the first
		 * CLEANUP_ORPHAN request. */
		if (rc == -ENOENT) {
			seq = fid_seq(&info->fti_fid);
			oseq = ofd_seq_load(env, ofd, fid_seq_is_idif(seq) ?
					    FID_SEQ_OST_MDT0 : seq);
			if (!IS_ERR_OR_NULL(oseq)) {
				if (!oseq->os_last_id_synced)
					rc = -EAGAIN;
				ofd_seq_put(env, oseq);
			}
		}
		GOTO(out_obj, rc);
	}

	lvb->lvb_size = info->fti_attr.la_size;
	lvb->lvb_blocks = info->fti_attr.la_blocks;
	lvb->lvb_mtime = info->fti_attr.la_mtime;
	lvb->lvb_atime = info->fti_attr.la_atime;
	lvb->lvb_ctime = info->fti_attr.la_ctime;

	CDEBUG(D_DLMTRACE, "res: "DFID" initial lvb size: %llu, "
	       "mtime: %#llx, blocks: %#llx\n",
	       PFID(&info->fti_fid), lvb->lvb_size,
	       lvb->lvb_mtime, lvb->lvb_blocks);

	info->fti_attr.la_valid = 0;

	EXIT;
out_obj:
	ofd_object_put(env, fo);
out_lvb:
	if (rc != 0)
		OST_LVB_SET_ERR(lvb->lvb_blocks, rc);
out:
	/* Don't free lvb data on lookup error */
	if (env && env == &_env)
		lu_env_fini(&_env);
	return rc;
}
示例#16
0
int
lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
{
	int rc;
	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
	unsigned long then;
	struct timeval tv;

	LASSERT(nob > 0);
	LASSERT(jiffies_left > 0);

	for (;;) {
		struct kvec  iov = {
			.iov_base = buffer,
			.iov_len  = nob
		};
		struct msghdr msg = {
			.msg_flags = 0
		};

		/* Set receive timeout to remaining time */
		jiffies_to_timeval(jiffies_left, &tv);
		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
				       (char *)&tv, sizeof(tv));
		if (rc) {
			CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
			       (long)tv.tv_sec, (int)tv.tv_usec, rc);
			return rc;
		}

		then = jiffies;
		rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
		jiffies_left -= jiffies - then;

		if (rc < 0)
			return rc;

		if (!rc)
			return -ECONNRESET;

		buffer = ((char *)buffer) + rc;
		nob -= rc;

		if (!nob)
			return 0;

		if (jiffies_left <= 0)
			return -ETIMEDOUT;
	}
}
EXPORT_SYMBOL(lnet_sock_read);

static int
lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
		 int local_port)
{
	struct sockaddr_in locaddr;
	struct socket *sock;
	int rc;
	int option;

	/* All errors are fatal except bind failure if the port is in use */
	*fatal = 1;

	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
	*sockp = sock;
	if (rc) {
		CERROR("Can't create socket: %d\n", rc);
		return rc;
	}

	option = 1;
	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
			       (char *)&option, sizeof(option));
	if (rc) {
		CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
		goto failed;
	}

	if (local_ip || local_port) {
		memset(&locaddr, 0, sizeof(locaddr));
		locaddr.sin_family = AF_INET;
		locaddr.sin_port = htons(local_port);
		locaddr.sin_addr.s_addr = !local_ip ?
					  INADDR_ANY : htonl(local_ip);

		rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
				 sizeof(locaddr));
		if (rc == -EADDRINUSE) {
			CDEBUG(D_NET, "Port %d already in use\n", local_port);
			*fatal = 0;
			goto failed;
		}
		if (rc) {
			CERROR("Error trying to bind to port %d: %d\n",
			       local_port, rc);
			goto failed;
		}
	}
	return 0;

failed:
	sock_release(sock);
	return rc;
}

int
lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
{
	int option;
	int rc;

	if (txbufsize) {
		option = txbufsize;
		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
				       (char *)&option, sizeof(option));
		if (rc) {
			CERROR("Can't set send buffer %d: %d\n",
			       option, rc);
			return rc;
		}
	}

	if (rxbufsize) {
		option = rxbufsize;
		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
				       (char *)&option, sizeof(option));
		if (rc) {
			CERROR("Can't set receive buffer %d: %d\n",
			       option, rc);
			return rc;
		}
	}
	return 0;
}
示例#17
0
/* Allocate new fid on passed client @seq and save it to @fid. */
int seq_client_alloc_fid(const struct lu_env *env,
                         struct lu_client_seq *seq, struct lu_fid *fid)
{
        cfs_waitlink_t link;
        int rc;
        ENTRY;

        LASSERT(seq != NULL);
        LASSERT(fid != NULL);

        cfs_waitlink_init(&link);
	mutex_lock(&seq->lcs_mutex);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
		seq->lcs_fid.f_oid = seq->lcs_width;

        while (1) {
                seqno_t seqnr;

                if (!fid_is_zero(&seq->lcs_fid) &&
                    fid_oid(&seq->lcs_fid) < seq->lcs_width) {
                        /* Just bump last allocated fid and return to caller. */
                        seq->lcs_fid.f_oid += 1;
                        rc = 0;
                        break;
                }

                rc = seq_fid_alloc_prep(seq, &link);
                if (rc)
                        continue;

                rc = seq_client_alloc_seq(env, seq, &seqnr);
                if (rc) {
                        CERROR("%s: Can't allocate new sequence, "
                               "rc %d\n", seq->lcs_name, rc);
                        seq_fid_alloc_fini(seq);
			mutex_unlock(&seq->lcs_mutex);
                        RETURN(rc);
                }

                CDEBUG(D_INFO, "%s: Switch to sequence "
                       "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr);

                seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
                seq->lcs_fid.f_seq = seqnr;
                seq->lcs_fid.f_ver = 0;

                /*
                 * Inform caller that sequence switch is performed to allow it
                 * to setup FLD for it.
                 */
                rc = 1;

                seq_fid_alloc_fini(seq);
                break;
        }

        *fid = seq->lcs_fid;
	mutex_unlock(&seq->lcs_mutex);

        CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name,  PFID(fid));
        RETURN(rc);
}
示例#18
0
int
lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
{
	struct ifreq ifr;
	int nob;
	int rc;
	__u32 val;

	nob = strnlen(name, IFNAMSIZ);
	if (nob == IFNAMSIZ) {
		CERROR("Interface name %s too long\n", name);
		return -EINVAL;
	}

	CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);

	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
		return -E2BIG;
	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));

	rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
	if (rc) {
		CERROR("Can't get flags for interface %s\n", name);
		return rc;
	}

	if (!(ifr.ifr_flags & IFF_UP)) {
		CDEBUG(D_NET, "Interface %s down\n", name);
		*up = 0;
		*ip = *mask = 0;
		return 0;
	}
	*up = 1;

	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
		return -E2BIG;
	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));

	ifr.ifr_addr.sa_family = AF_INET;
	rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
	if (rc) {
		CERROR("Can't get IP address for interface %s\n", name);
		return rc;
	}

	val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
	*ip = ntohl(val);

	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
		return -E2BIG;
	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));

	ifr.ifr_addr.sa_family = AF_INET;
	rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
	if (rc) {
		CERROR("Can't get netmask for interface %s\n", name);
		return rc;
	}

	val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
	*mask = ntohl(val);

	return 0;
}
示例#19
0
static ssize_t lprocfs_lru_size_seq_write(struct file *file, const char *buffer,
				      size_t count, loff_t *off)
{
	struct ldlm_namespace *ns = ((struct seq_file *)file->private_data)->private;
	char dummy[MAX_STRING_SIZE + 1], *end;
	unsigned long tmp;
	int lru_resize;

	dummy[MAX_STRING_SIZE] = '\0';
	if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
		return -EFAULT;

	if (strncmp(dummy, "clear", 5) == 0) {
		CDEBUG(D_DLMTRACE,
		       "dropping all unused locks from namespace %s\n",
		       ldlm_ns_name(ns));
		if (ns_connect_lru_resize(ns)) {
			int canceled, unused  = ns->ns_nr_unused;

			/* Try to cancel all @ns_nr_unused locks. */
			canceled = ldlm_cancel_lru(ns, unused, 0,
						   LDLM_CANCEL_PASSED);
			if (canceled < unused) {
				CDEBUG(D_DLMTRACE,
				       "not all requested locks are canceled, "
				       "requested: %d, canceled: %d\n", unused,
				       canceled);
				return -EINVAL;
			}
		} else {
			tmp = ns->ns_max_unused;
			ns->ns_max_unused = 0;
			ldlm_cancel_lru(ns, 0, 0, LDLM_CANCEL_PASSED);
			ns->ns_max_unused = tmp;
		}
		return count;
	}

	tmp = simple_strtoul(dummy, &end, 0);
	if (dummy == end) {
		CERROR("invalid value written\n");
		return -EINVAL;
	}
	lru_resize = (tmp == 0);

	if (ns_connect_lru_resize(ns)) {
		if (!lru_resize)
			ns->ns_max_unused = (unsigned int)tmp;

		if (tmp > ns->ns_nr_unused)
			tmp = ns->ns_nr_unused;
		tmp = ns->ns_nr_unused - tmp;

		CDEBUG(D_DLMTRACE,
		       "changing namespace %s unused locks from %u to %u\n",
		       ldlm_ns_name(ns), ns->ns_nr_unused,
		       (unsigned int)tmp);
		ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_CANCEL_PASSED);

		if (!lru_resize) {
			CDEBUG(D_DLMTRACE,
			       "disable lru_resize for namespace %s\n",
			       ldlm_ns_name(ns));
			ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE;
		}
	} else {
		CDEBUG(D_DLMTRACE,
		       "changing namespace %s max_unused from %u to %u\n",
		       ldlm_ns_name(ns), ns->ns_max_unused,
		       (unsigned int)tmp);
		ns->ns_max_unused = (unsigned int)tmp;
		ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_CANCEL_PASSED);

		/* Make sure that LRU resize was originally supported before
		 * turning it on here. */
		if (lru_resize &&
		    (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
			CDEBUG(D_DLMTRACE,
			       "enable lru_resize for namespace %s\n",
			       ldlm_ns_name(ns));
			ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE;
		}
	}

	return count;
}
示例#20
0
/* Pack LOV object metadata for disk storage.  It is packed in LE byte
 * order and is opaque to the networking layer.
 *
 * XXX In the future, this will be enhanced to get the EA size from the
 *     underlying OSC device(s) to get their EA sizes so we can stack
 *     LOVs properly.  For now lov_mds_md_size() just assumes one obd_id
 *     per stripe.
 */
int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
               struct lov_stripe_md *lsm)
{
        struct obd_device *obd = class_exp2obd(exp);
        struct lov_obd *lov = &obd->u.lov;
        struct lov_mds_md_v1 *lmmv1;
        struct lov_mds_md_v3 *lmmv3;
        __u16 stripe_count;
        struct lov_ost_data_v1 *lmm_objects;
        int lmm_size, lmm_magic;
        int i;
        ENTRY;

        if (lsm) {
                lmm_magic = lsm->lsm_magic;
        } else {
                if (lmmp && *lmmp)
                        lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
                else
                        /* lsm == NULL and lmmp == NULL */
                        lmm_magic = LOV_MAGIC;
        }

        if ((lmm_magic != LOV_MAGIC_V1) &&
            (lmm_magic != LOV_MAGIC_V3)) {
                CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
                        lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
                RETURN(-EINVAL);

        }

        if (lsm) {
                /* If we are just sizing the EA, limit the stripe count
                 * to the actual number of OSTs in this filesystem. */
                if (!lmmp) {
                        stripe_count = lov_get_stripecnt(lov, lmm_magic,
                                                         lsm->lsm_stripe_count);
                        lsm->lsm_stripe_count = stripe_count;
                } else {
                        stripe_count = lsm->lsm_stripe_count;
                }
        } else {
                /* No need to allocate more than maximum supported stripes.
                 * Anyway, this is pretty inaccurate since ld_tgt_count now
                 * represents max index and we should rely on the actual number
                 * of OSTs instead */
                stripe_count = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
                                                    lmm_magic);
                if (stripe_count > lov->desc.ld_tgt_count)
                        stripe_count = lov->desc.ld_tgt_count;
        }

        /* XXX LOV STACKING call into osc for sizes */
        lmm_size = lov_mds_md_size(stripe_count, lmm_magic);

        if (!lmmp)
                RETURN(lmm_size);

        if (*lmmp && !lsm) {
                stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count);
                lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
                OBD_FREE_LARGE(*lmmp, lmm_size);
                *lmmp = NULL;
                RETURN(0);
        }

        if (!*lmmp) {
                OBD_ALLOC_LARGE(*lmmp, lmm_size);
                if (!*lmmp)
                        RETURN(-ENOMEM);
        }

        CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
               lmm_magic, lmm_size);

        lmmv1 = *lmmp;
        lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
        if (lmm_magic == LOV_MAGIC_V3)
                lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
        else
                lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);

        if (!lsm)
                RETURN(lmm_size);

        /* lmmv1 and lmmv3 point to the same struct and have the
         * same first fields
         */
        lmmv1->lmm_object_id = cpu_to_le64(lsm->lsm_object_id);
        lmmv1->lmm_object_seq = cpu_to_le64(lsm->lsm_object_seq);
        lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
        lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count);
        lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
        lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
        if (lsm->lsm_magic == LOV_MAGIC_V3) {
                strncpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
                        LOV_MAXPOOLNAME);
                lmm_objects = lmmv3->lmm_objects;
        } else {
                lmm_objects = lmmv1->lmm_objects;
        }

        for (i = 0; i < stripe_count; i++) {
                struct lov_oinfo *loi = lsm->lsm_oinfo[i];
                /* XXX LOV STACKING call down to osc_packmd() to do packing */
                LASSERTF(loi->loi_id, "lmm_oid "LPU64" stripe %u/%u idx %u\n",
                         lmmv1->lmm_object_id, i, stripe_count, loi->loi_ost_idx);
                lmm_objects[i].l_object_id = cpu_to_le64(loi->loi_id);
                lmm_objects[i].l_object_seq = cpu_to_le64(loi->loi_seq);
                lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
                lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
        }

        RETURN(lmm_size);
}
示例#21
0
文件: obd_mount.c 项目: 3null/linux
static int lustre_stop_mgc(struct super_block *sb)
{
	struct lustre_sb_info *lsi = s2lsi(sb);
	struct obd_device *obd;
	char *niduuid = NULL, *ptr = NULL;
	int i, rc = 0, len = 0;

	if (!lsi)
		return -ENOENT;
	obd = lsi->lsi_mgc;
	if (!obd)
		return -ENOENT;
	lsi->lsi_mgc = NULL;

	mutex_lock(&mgc_start_lock);
	LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
	if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
		/* This is not fatal, every client that stops
		   will call in here. */
		CDEBUG(D_MOUNT, "mgc still has %d references.\n",
		       atomic_read(&obd->u.cli.cl_mgc_refcount));
		rc = -EBUSY;
		goto out;
	}

	/* The MGC has no recoverable data in any case.
	 * force shutdown set in umount_begin */
	obd->obd_no_recov = 1;

	if (obd->u.cli.cl_mgc_mgsexp) {
		/* An error is not fatal, if we are unable to send the
		   disconnect mgs ping evictor cleans up the export */
		rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
		if (rc)
			CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
	}

	/* Save the obdname for cleaning the nid uuids, which are
	   obdname_XX */
	len = strlen(obd->obd_name) + 6;
	OBD_ALLOC(niduuid, len);
	if (niduuid) {
		strcpy(niduuid, obd->obd_name);
		ptr = niduuid + strlen(niduuid);
	}

	rc = class_manual_cleanup(obd);
	if (rc)
		goto out;

	/* Clean the nid uuids */
	if (!niduuid) {
		rc = -ENOMEM;
		goto out;
	}

	for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
		sprintf(ptr, "_%x", i);
		rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
			     niduuid, NULL, NULL, NULL);
		if (rc)
			CERROR("del MDC UUID %s failed: rc = %d\n",
			       niduuid, rc);
	}
out:
	if (niduuid)
		OBD_FREE(niduuid, len);

	/* class_import_put will get rid of the additional connections */
	mutex_unlock(&mgc_start_lock);
	return rc;
}
示例#22
0
static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
                           struct lov_stripe_md **lsmp,
                           struct lov_user_md *lump)
{
        struct obd_device *obd = class_exp2obd(exp);
        struct lov_obd *lov = &obd->u.lov;
        char buffer[sizeof(struct lov_user_md_v3)];
        struct lov_user_md_v3 *lumv3 = (struct lov_user_md_v3 *)&buffer[0];
        struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&buffer[0];
        int lmm_magic;
        __u16 stripe_count;
        int rc;
        ENTRY;

        rc = lov_lum_swab_if_needed(lumv3, &lmm_magic, lump);
        if (rc)
                RETURN(rc);

        /* in the rest of the tests, as *lumv1 and lumv3 have the same
         * fields, we use lumv1 to avoid code duplication */

        if (lumv1->lmm_pattern == 0) {
                lumv1->lmm_pattern = lov->desc.ld_pattern ?
                        lov->desc.ld_pattern : LOV_PATTERN_RAID0;
        }

        if (lumv1->lmm_pattern != LOV_PATTERN_RAID0) {
                CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
                       lumv1->lmm_pattern);
                RETURN(-EINVAL);
        }

        /* 64kB is the largest common page size we see (ia64), and matches the
         * check in lfs */
        if (lumv1->lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
                CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n",
                       lumv1->lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
                lumv1->lmm_stripe_size = LOV_MIN_STRIPE_SIZE;
        }

        if ((lumv1->lmm_stripe_offset >= lov->desc.ld_tgt_count) &&
            (lumv1->lmm_stripe_offset !=
             (typeof(lumv1->lmm_stripe_offset))(-1))) {
                CDEBUG(D_IOCTL, "stripe offset %u > number of OSTs %u\n",
                       lumv1->lmm_stripe_offset, lov->desc.ld_tgt_count);
                RETURN(-EINVAL);
        }
        stripe_count = lov_get_stripecnt(lov, lmm_magic,
                                         lumv1->lmm_stripe_count);

        if (max_lmm_size) {
                int max_stripes = (max_lmm_size -
                                   lov_mds_md_size(0, lmm_magic)) /
                                   sizeof(struct lov_ost_data_v1);
                if (unlikely(max_stripes < stripe_count)) {
                        CDEBUG(D_IOCTL, "stripe count reset from %d to %d\n",
                               stripe_count, max_stripes);
                        stripe_count = max_stripes;
                }
        }

        if (lmm_magic == LOV_USER_MAGIC_V3) {
                struct pool_desc *pool;

		/* In the function below, .hs_keycmp resolves to
		 * pool_hashkey_keycmp() */
		/* coverity[overrun-buffer-val] */
                pool = lov_find_pool(lov, lumv3->lmm_pool_name);
                if (pool != NULL) {
                        if (lumv3->lmm_stripe_offset !=
                            (typeof(lumv3->lmm_stripe_offset))(-1)) {
                                rc = lov_check_index_in_pool(
                                        lumv3->lmm_stripe_offset, pool);
                                if (rc < 0) {
                                        lov_pool_putref(pool);
                                        RETURN(-EINVAL);
                                }
                        }

                        if (stripe_count > pool_tgt_count(pool))
                                stripe_count = pool_tgt_count(pool);

                        lov_pool_putref(pool);
                }
        }

        rc = lov_alloc_memmd(lsmp, stripe_count, lumv1->lmm_pattern, lmm_magic);

        if (rc >= 0) {
                (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lumv1->lmm_stripe_offset;
                (*lsmp)->lsm_stripe_size = lumv1->lmm_stripe_size;
                if (lmm_magic == LOV_USER_MAGIC_V3)
                        strncpy((*lsmp)->lsm_pool_name, lumv3->lmm_pool_name,
                                LOV_MAXPOOLNAME);
                rc = 0;
        }

        RETURN(rc);
}
示例#23
0
static int out_xattr_get(struct tgt_session_info *tsi)
{
	const struct lu_env	   *env = tsi->tsi_env;
	struct tgt_thread_info	   *tti = tgt_th_info(env);
	struct object_update	   *update = tti->tti_u.update.tti_update;
	struct lu_buf		   *lbuf = &tti->tti_buf;
	struct object_update_reply *reply = tti->tti_u.update.tti_update_reply;
	struct dt_object           *obj = tti->tti_u.update.tti_dt_object;
	char			   *name;
	struct object_update_result *update_result;
	int			idx = tti->tti_u.update.tti_update_reply_index;
	int			   rc;

	ENTRY;

	if (!lu_object_exists(&obj->do_lu)) {
		set_bit(LU_OBJECT_HEARD_BANSHEE,
			&obj->do_lu.lo_header->loh_flags);
		RETURN(-ENOENT);
	}

	name = object_update_param_get(update, 0, NULL);
	if (name == NULL) {
		CERROR("%s: empty name for xattr get: rc = %d\n",
		       tgt_name(tsi->tsi_tgt), -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	update_result = object_update_result_get(reply, 0, NULL);
	if (update_result == NULL) {
		CERROR("%s: empty name for xattr get: rc = %d\n",
		       tgt_name(tsi->tsi_tgt), -EPROTO);
		RETURN(err_serious(-EPROTO));
	}

	lbuf->lb_buf = update_result->our_data;
	lbuf->lb_len = OUT_UPDATE_REPLY_SIZE -
		       cfs_size_round((unsigned long)update_result->our_data -
				      (unsigned long)update_result);
	dt_read_lock(env, obj, MOR_TGT_CHILD);
	rc = dt_xattr_get(env, obj, lbuf, name, NULL);
	dt_read_unlock(env, obj);
	if (rc < 0) {
		lbuf->lb_len = 0;
		GOTO(out, rc);
	}
	if (rc == 0) {
		lbuf->lb_len = 0;
		GOTO(out, rc = -ENOENT);
	}
	lbuf->lb_len = rc;
	rc = 0;
	CDEBUG(D_INFO, "%s: "DFID" get xattr %s len %d\n",
	       tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)),
	       name, (int)lbuf->lb_len);

	GOTO(out, rc);

out:
	object_update_result_insert(reply, lbuf->lb_buf, lbuf->lb_len, idx, rc);
	RETURN(rc);
}
示例#24
0
文件: recover.c 项目: 020gzh/linux
/**
 * Start recovery on disconnected import.
 * This is done by just attempting a connect
 */
void ptlrpc_initiate_recovery(struct obd_import *imp)
{
	CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
	ptlrpc_connect_import(imp);
}
示例#25
0
/*
 * Look-up a slave index file. If the slave index isn't found:
 * - if local is set to false, we allocate a FID from FID_SEQ_QUOTA sequence and
 *   create the index.
 * - otherwise, we create the index file with a local reserved FID (see
 *   lquota_local_oid)
 *
 * \param env - is the environment passed by the caller
 * \param dev - is the backend dt_device where to look-up/create the slave index
 * \param parent - is the parent directory where to create the slave index if
 *                 it does not exist already
 * \param glb_fid - is the fid of the global index file associated with this
 *                  slave index.
 * \param uuid    - is the uuid of slave which is (re)connecting to the master
 *                  target
 * \param local   - indicate whether to use local reserved FID (LQUOTA_USR_OID
 *                  & LQUOTA_GRP_OID) for the slave index creation or to
 *                  allocate a new fid from sequence FID_SEQ_QUOTA
 *
 * \retval     - pointer to the dt_object of the slave index on success,
 *               appropriate error on failure
 */
struct dt_object *lquota_disk_slv_find_create(const struct lu_env *env,
					      struct dt_device *dev,
					      struct dt_object *parent,
					      struct lu_fid *glb_fid,
					      struct obd_uuid *uuid,
					      bool local)
{
	struct lquota_thread_info	*qti = lquota_info(env);
	struct dt_object		*slv_idx;
	int				 rc;
	ENTRY;

	LASSERT(uuid != NULL);

	CDEBUG(D_QUOTA, "lookup/create slave index file for %s\n",
	       obd_uuid2str(uuid));

	/* generate filename associated with the slave */
	rc = lquota_disk_slv_filename(glb_fid, uuid, qti->qti_buf);
	if (rc)
		RETURN(ERR_PTR(rc));

	/* Slave indexes uses the FID_SEQ_QUOTA sequence since they can be read
	 * through the network */
	qti->qti_fid.f_seq = FID_SEQ_QUOTA;
	qti->qti_fid.f_ver = 0;
	if (local) {
		int type;

		rc = lquota_extract_fid(glb_fid, NULL, NULL, &type);
		if (rc)
			RETURN(ERR_PTR(rc));

		/* use predefined fid in the reserved oid list */
		qti->qti_fid.f_oid = (type == USRQUOTA) ? LQUOTA_USR_OID
							: LQUOTA_GRP_OID;

		slv_idx = local_index_find_or_create_with_fid(env, dev,
							      &qti->qti_fid,
							      parent,
							      qti->qti_buf,
							      LQUOTA_MODE,
							&dt_quota_slv_features);
	} else {
		/* allocate fid dynamically if index does not exist already */
		qti->qti_fid.f_oid = LQUOTA_GENERATED_OID;

		/* lookup/create slave index file */
		slv_idx = lquota_disk_find_create(env, dev, parent,
						  &qti->qti_fid,
						  &dt_quota_slv_features,
						  qti->qti_buf);
	}

	if (IS_ERR(slv_idx))
		RETURN(slv_idx);

	/* install index operation vector */
	if (slv_idx->do_index_ops == NULL) {
		rc = slv_idx->do_ops->do_index_try(env, slv_idx,
						   &dt_quota_slv_features);
		if (rc) {
			CERROR("%s: failed to setup index operations for "DFID
			       " rc:%d\n", dev->dd_lu_dev.ld_obd->obd_name,
			       PFID(lu_object_fid(&slv_idx->do_lu)), rc);
			lu_object_put(env, &slv_idx->do_lu);
			slv_idx = ERR_PTR(rc);
		}
	}

	RETURN(slv_idx);
}
示例#26
0
文件: recover.c 项目: 020gzh/linux
/**
 * Identify what request from replay list needs to be replayed next
 * (based on what we have already replayed) and send it to server.
 */
int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
{
	int rc = 0;
	struct list_head *tmp, *pos;
	struct ptlrpc_request *req = NULL;
	__u64 last_transno;

	*inflight = 0;

	/* It might have committed some after we last spoke, so make sure we
	 * get rid of them now.
	 */
	spin_lock(&imp->imp_lock);
	imp->imp_last_transno_checked = 0;
	ptlrpc_free_committed(imp);
	last_transno = imp->imp_last_replay_transno;
	spin_unlock(&imp->imp_lock);

	CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n",
	       imp, obd2cli_tgt(imp->imp_obd),
	       imp->imp_peer_committed_transno, last_transno);

	/* Do I need to hold a lock across this iteration?  We shouldn't be
	 * racing with any additions to the list, because we're in recovery
	 * and are therefore not processing additional requests to add.  Calls
	 * to ptlrpc_free_committed might commit requests, but nothing "newer"
	 * than the one we're replaying (it can't be committed until it's
	 * replayed, and we're doing that here).  l_f_e_safe protects against
	 * problems with the current request being committed, in the unlikely
	 * event of that race.  So, in conclusion, I think that it's safe to
	 * perform this list-walk without the imp_lock held.
	 *
	 * But, the {mdc,osc}_replay_open callbacks both iterate
	 * request lists, and have comments saying they assume the
	 * imp_lock is being held by ptlrpc_replay, but it's not. it's
	 * just a little race...
	 */

	/* Replay all the committed open requests on committed_list first */
	if (!list_empty(&imp->imp_committed_list)) {
		tmp = imp->imp_committed_list.prev;
		req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);

		/* The last request on committed_list hasn't been replayed */
		if (req->rq_transno > last_transno) {
			/* Since the imp_committed_list is immutable before
			 * all of it's requests being replayed, it's safe to
			 * use a cursor to accelerate the search
			 */
			imp->imp_replay_cursor = imp->imp_replay_cursor->next;

			while (imp->imp_replay_cursor !=
			       &imp->imp_committed_list) {
				req = list_entry(imp->imp_replay_cursor,
						 struct ptlrpc_request,
						 rq_replay_list);
				if (req->rq_transno > last_transno)
					break;

				req = NULL;
				imp->imp_replay_cursor =
					imp->imp_replay_cursor->next;
			}
		} else {
示例#27
0
int llog_reverse_process(const struct lu_env *env,
			 struct llog_handle *loghandle, llog_cb_t cb,
			 void *data, void *catdata)
{
	struct llog_log_hdr *llh = loghandle->lgh_hdr;
	struct llog_process_cat_data *cd = catdata;
	void *buf;
	int rc = 0, first_index = 1, index, idx;

	OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
	if (!buf)
		return -ENOMEM;

	if (cd != NULL)
		first_index = cd->lpcd_first_idx + 1;
	if (cd != NULL && cd->lpcd_last_idx)
		index = cd->lpcd_last_idx;
	else
		index = LLOG_BITMAP_BYTES * 8 - 1;

	while (rc == 0) {
		struct llog_rec_hdr *rec;
		struct llog_rec_tail *tail;

		/* skip records not set in bitmap */
		while (index >= first_index &&
		       !ext2_test_bit(index, llh->llh_bitmap))
			--index;

		LASSERT(index >= first_index - 1);
		if (index == first_index - 1)
			break;

		/* get the buf with our target record; avoid old garbage */
		memset(buf, 0, LLOG_CHUNK_SIZE);
		rc = llog_prev_block(env, loghandle, index, buf,
				     LLOG_CHUNK_SIZE);
		if (rc)
			GOTO(out, rc);

		rec = buf;
		idx = rec->lrh_index;
		CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
		while (idx < index) {
			rec = (void *)rec + rec->lrh_len;
			if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
				lustre_swab_llog_rec(rec);
			idx ++;
		}
		LASSERT(idx == index);
		tail = (void *)rec + rec->lrh_len - sizeof(*tail);

		/* process records in buffer, starting where we found one */
		while ((void *)tail > buf) {
			if (tail->lrt_index == 0)
				GOTO(out, rc = 0); /* no more records */

			/* if set, process the callback on this record */
			if (ext2_test_bit(index, llh->llh_bitmap)) {
				rec = (void *)tail - tail->lrt_len +
				      sizeof(*tail);

				rc = cb(env, loghandle, rec, data);
				if (rc == LLOG_PROC_BREAK) {
					GOTO(out, rc);
				} else if (rc == LLOG_DEL_RECORD) {
					llog_cancel_rec(env, loghandle,
							tail->lrt_index);
					rc = 0;
				}
				if (rc)
					GOTO(out, rc);
			}

			/* previous record, still in buffer? */
			--index;
			if (index < first_index)
				GOTO(out, rc = 0);
			tail = (void *)tail - tail->lrt_len;
		}
	}

out:
	if (buf)
		OBD_FREE(buf, LLOG_CHUNK_SIZE);
	return rc;
}
示例#28
0
/**
 * Implementation of ldlm_valblock_ops::lvbo_update for OFD.
 *
 * When a client generates a glimpse enqueue, it wants to get the current
 * file size and updated attributes for a stat() type operation, but these
 * attributes may be writeback cached on another client. The client with
 * the DLM extent lock at the highest offset is asked for its current
 * attributes via a glimpse callback on its extent lock, on the assumption
 * that it has the highest file size and the newest timestamps. The timestamps
 * are guaranteed to be correct if there is only a single writer on the file,
 * but may be slightly inaccurate if there are multiple concurrent writers on
 * the same object. In order to avoid race conditions between the glimpse AST
 * and the client cancelling the lock, ofd_lvbo_update() also updates
 * the attributes from the local object. If the last client hasn't done any
 * writes yet, or has already written its data and cancelled its lock before
 * it processed the glimpse, then the local inode will have more uptodate
 * information.
 *
 * This is called in two ways:
 *  \a req != NULL : called by the DLM itself after a glimpse callback
 *  \a req == NULL : called by the OFD after a disk write
 *
 * \param[in] lock		LDLM lock
 * \param[in] req		PTLRPC request
 * \param[in] increase_only	don't allow LVB values to decrease
 *
 * \retval		0 on successful setup
 * \retval		negative value on error
 */
static int ofd_lvbo_update(const struct lu_env *env, struct ldlm_resource *res,
			   struct ldlm_lock *lock, struct ptlrpc_request *req,
			   int increase_only)
{
	struct ofd_thread_info	*info;
	struct ofd_device	*ofd;
	struct ofd_object	*fo;
	struct ost_lvb		*lvb;
	int			 rc = 0;

	ENTRY;

	LASSERT(env);
	info = ofd_info(env);
	LASSERT(res != NULL);

	ofd = ldlm_res_to_ns(res)->ns_lvbp;
	LASSERT(ofd != NULL);

	fid_extract_from_res_name(&info->fti_fid, &res->lr_name);

	lvb = res->lr_lvb_data;
	if (lvb == NULL) {
		CERROR("%s: no LVB data for "DFID"\n",
		       ofd_name(ofd), PFID(&info->fti_fid));
		GOTO(out, rc = 0);
	}

	/* Update the LVB from the network message */
	if (req != NULL) {
		struct ost_lvb *rpc_lvb;
		bool lvb_type;

		if (req->rq_import != NULL)
			lvb_type = imp_connect_lvb_type(req->rq_import);
		else
			lvb_type = exp_connect_lvb_type(req->rq_export);

		if (!lvb_type) {
			struct ost_lvb_v1 *lvb_v1;

			lvb_v1 = req_capsule_server_swab_get(&req->rq_pill,
					&RMF_DLM_LVB, lustre_swab_ost_lvb_v1);
			if (lvb_v1 == NULL)
				goto disk_update;

			rpc_lvb = &info->fti_lvb;
			memcpy(rpc_lvb, lvb_v1, sizeof *lvb_v1);
			rpc_lvb->lvb_mtime_ns = 0;
			rpc_lvb->lvb_atime_ns = 0;
			rpc_lvb->lvb_ctime_ns = 0;
		} else {
			rpc_lvb = req_capsule_server_swab_get(&req->rq_pill,
							      &RMF_DLM_LVB,
							lustre_swab_ost_lvb);
			if (rpc_lvb == NULL)
				goto disk_update;
		}

		lock_res(res);
		if (rpc_lvb->lvb_size > lvb->lvb_size || !increase_only) {
			CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb size: "
			       "%llu -> %llu\n", PFID(&info->fti_fid),
			       lvb->lvb_size, rpc_lvb->lvb_size);
			lvb->lvb_size = rpc_lvb->lvb_size;
		}
		if (rpc_lvb->lvb_mtime > lvb->lvb_mtime || !increase_only) {
			CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb mtime: "
			       "%llu -> %llu\n", PFID(&info->fti_fid),
			       lvb->lvb_mtime, rpc_lvb->lvb_mtime);
			lvb->lvb_mtime = rpc_lvb->lvb_mtime;
		}
		if (rpc_lvb->lvb_atime > lvb->lvb_atime || !increase_only) {
			CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb atime: "
			       "%llu -> %llu\n", PFID(&info->fti_fid),
			       lvb->lvb_atime, rpc_lvb->lvb_atime);
			lvb->lvb_atime = rpc_lvb->lvb_atime;
		}
		if (rpc_lvb->lvb_ctime > lvb->lvb_ctime || !increase_only) {
			CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb ctime: "
			       "%llu -> %llu\n", PFID(&info->fti_fid),
			       lvb->lvb_ctime, rpc_lvb->lvb_ctime);
			lvb->lvb_ctime = rpc_lvb->lvb_ctime;
		}
		if (rpc_lvb->lvb_blocks > lvb->lvb_blocks || !increase_only) {
			CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb blocks: "
			       "%llu -> %llu\n", PFID(&info->fti_fid),
			       lvb->lvb_blocks, rpc_lvb->lvb_blocks);
			lvb->lvb_blocks = rpc_lvb->lvb_blocks;
		}
		unlock_res(res);
	}

disk_update:
	/* Update the LVB from the disk inode */
	ost_fid_from_resid(&info->fti_fid, &res->lr_name,
			   ofd->ofd_lut.lut_lsd.lsd_osd_index);
	fo = ofd_object_find(env, ofd, &info->fti_fid);
	if (IS_ERR(fo))
		GOTO(out, rc = PTR_ERR(fo));

	rc = ofd_attr_get(env, fo, &info->fti_attr);
	if (rc)
		GOTO(out_obj, rc);

	lock_res(res);
	if (info->fti_attr.la_size > lvb->lvb_size || !increase_only) {
		CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb size from disk: "
		       "%llu -> %llu\n", PFID(&info->fti_fid),
		       lvb->lvb_size, info->fti_attr.la_size);
		lvb->lvb_size = info->fti_attr.la_size;
	}

	if (info->fti_attr.la_mtime >lvb->lvb_mtime || !increase_only) {
		CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb mtime from disk: "
		       "%llu -> %llu\n", PFID(&info->fti_fid),
		       lvb->lvb_mtime, info->fti_attr.la_mtime);
		lvb->lvb_mtime = info->fti_attr.la_mtime;
	}
	if (info->fti_attr.la_atime >lvb->lvb_atime || !increase_only) {
		CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb atime from disk: "
		       "%llu -> %llu\n", PFID(&info->fti_fid),
		       lvb->lvb_atime, info->fti_attr.la_atime);
		lvb->lvb_atime = info->fti_attr.la_atime;
	}
	if (info->fti_attr.la_ctime >lvb->lvb_ctime || !increase_only) {
		CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb ctime from disk: "
		       "%llu -> %llu\n", PFID(&info->fti_fid),
		       lvb->lvb_ctime, info->fti_attr.la_ctime);
		lvb->lvb_ctime = info->fti_attr.la_ctime;
	}
	if (info->fti_attr.la_blocks > lvb->lvb_blocks || !increase_only) {
		CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb blocks from disk: "
		       "%llu -> %llu\n", PFID(&info->fti_fid), lvb->lvb_blocks,
		       (unsigned long long)info->fti_attr.la_blocks);
		lvb->lvb_blocks = info->fti_attr.la_blocks;
	}
	unlock_res(res);

out_obj:
	ofd_object_put(env, fo);
out:
	return rc;
}
示例#29
0
/*
 * Client's incoming reply callback
 */
void reply_in_callback(lnet_event_t *ev)
{
	struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
	struct ptlrpc_request *req = cbid->cbid_arg;

	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);

	LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
	LASSERT(ev->md.start == req->rq_repbuf);
	LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len);
	/* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
	   for adaptive timeouts' early reply. */
	LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);

	spin_lock(&req->rq_lock);

	req->rq_receiving_reply = 0;
	req->rq_early = 0;
	if (ev->unlinked)
		req->rq_reply_unlink = 0;

	if (ev->status)
		goto out_wake;

	if (ev->type == LNET_EVENT_UNLINK) {
		LASSERT(ev->unlinked);
		DEBUG_REQ(D_NET, req, "unlink");
		goto out_wake;
	}

	if (ev->mlength < ev->rlength) {
		CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
		       req->rq_replen, ev->rlength, ev->offset);
		req->rq_reply_truncate = 1;
		req->rq_replied = 1;
		req->rq_status = -EOVERFLOW;
		req->rq_nob_received = ev->rlength + ev->offset;
		goto out_wake;
	}

	if ((ev->offset == 0) &&
	    ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) {
		/* Early reply */
		DEBUG_REQ(D_ADAPTTO, req,
			  "Early reply received: mlen=%u offset=%d replen=%d "
			  "replied=%d unlinked=%d", ev->mlength, ev->offset,
			  req->rq_replen, req->rq_replied, ev->unlinked);

		req->rq_early_count++; /* number received, client side */

		if (req->rq_replied)   /* already got the real reply */
			goto out_wake;

		req->rq_early = 1;
		req->rq_reply_off = ev->offset;
		req->rq_nob_received = ev->mlength;
		/* And we're still receiving */
		req->rq_receiving_reply = 1;
	} else {
		/* Real reply */
		req->rq_rep_swab_mask = 0;
		req->rq_replied = 1;
		/* Got reply, no resend required */
		req->rq_resend = 0;
		req->rq_reply_off = ev->offset;
		req->rq_nob_received = ev->mlength;
		/* LNetMDUnlink can't be called under the LNET_LOCK,
		   so we must unlink in ptlrpc_unregister_reply */
		DEBUG_REQ(D_INFO, req,
			  "reply in flags=%x mlen=%u offset=%d replen=%d",
			  lustre_msg_get_flags(req->rq_reqmsg),
			  ev->mlength, ev->offset, req->rq_replen);
	}

	req->rq_import->imp_last_reply_time = get_seconds();

out_wake:
	/* NB don't unlock till after wakeup; req can disappear under us
	 * since we don't have our own ref */
	ptlrpc_client_wake_req(req);
	spin_unlock(&req->rq_lock);
}
示例#30
0
文件: watchdog.c 项目: DCteam/lustre
static int lcw_dispatch_main(void *data)
{
        int                 rc = 0;
        unsigned long       flags;
        struct lc_watchdog *lcw;
        CFS_LIST_HEAD      (zombies);

        ENTRY;

        cfs_daemonize("lc_watchdogd");

        SIGNAL_MASK_LOCK(current, flags);
        sigfillset(&current->blocked);
        RECALC_SIGPENDING;
        SIGNAL_MASK_UNLOCK(current, flags);

        cfs_complete(&lcw_start_completion);

        while (1) {
                int dumplog = 1;

                cfs_wait_event_interruptible(lcw_event_waitq,
                                             is_watchdog_fired(), rc);
                CDEBUG(D_INFO, "Watchdog got woken up...\n");
                if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) {
                        CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");

                        cfs_spin_lock_bh(&lcw_pending_timers_lock);
                        rc = !cfs_list_empty(&lcw_pending_timers);
                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
                        if (rc) {
                                CERROR("pending timers list was not empty at "
                                       "time of watchdog dispatch shutdown\n");
                        }
                        break;
                }

                cfs_spin_lock_bh(&lcw_pending_timers_lock);
                while (!cfs_list_empty(&lcw_pending_timers)) {
                        int is_dumplog;

                        lcw = cfs_list_entry(lcw_pending_timers.next,
                                             struct lc_watchdog, lcw_list);
                        /* +1 ref for callback to make sure lwc wouldn't be
                         * deleted after releasing lcw_pending_timers_lock */
                        lcw->lcw_refcount++;
                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);

                        /* lock ordering */
                        cfs_spin_lock_bh(&lcw->lcw_lock);
                        cfs_spin_lock_bh(&lcw_pending_timers_lock);

                        if (cfs_list_empty(&lcw->lcw_list)) {
                                /* already removed from pending list */
                                lcw->lcw_refcount--; /* -1 ref for callback */
                                if (lcw->lcw_refcount == 0)
                                        cfs_list_add(&lcw->lcw_list, &zombies);
                                cfs_spin_unlock_bh(&lcw->lcw_lock);
                                /* still hold lcw_pending_timers_lock */
                                continue;
                        }

                        cfs_list_del_init(&lcw->lcw_list);
                        lcw->lcw_refcount--; /* -1 ref for pending list */

                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
                        cfs_spin_unlock_bh(&lcw->lcw_lock);

                        CDEBUG(D_INFO, "found lcw for pid " LPPID "\n",
                               lcw->lcw_pid);
                        lcw_dump_stack(lcw);

                        is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
                        if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
                            (dumplog || !is_dumplog)) {
                                lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
                                if (dumplog && is_dumplog)
                                        dumplog = 0;
                        }

                        cfs_spin_lock_bh(&lcw_pending_timers_lock);
                        lcw->lcw_refcount--; /* -1 ref for callback */
                        if (lcw->lcw_refcount == 0)
                                cfs_list_add(&lcw->lcw_list, &zombies);
                }
                cfs_spin_unlock_bh(&lcw_pending_timers_lock);

                while (!cfs_list_empty(&zombies)) {
                        lcw = cfs_list_entry(lcw_pending_timers.next,
                                         struct lc_watchdog, lcw_list);
                        cfs_list_del(&lcw->lcw_list);
                        LIBCFS_FREE(lcw, sizeof(*lcw));
                }
        }

        cfs_complete(&lcw_stop_completion);

        RETURN(rc);
}