示例#1
0
static int lfsck_namespace_checkpoint(const struct lu_env *env,
				      struct lfsck_component *com, bool init)
{
	struct lfsck_instance	*lfsck = com->lc_lfsck;
	struct lfsck_namespace	*ns    = com->lc_file_ram;
	int			 rc;

	if (com->lc_new_checked == 0 && !init)
		return 0;

	down_write(&com->lc_sem);

	if (init) {
		ns->ln_pos_latest_start = lfsck->li_pos_current;
	} else {
		ns->ln_pos_last_checkpoint = lfsck->li_pos_current;
		ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
				HALF_SEC - lfsck->li_time_last_checkpoint);
		ns->ln_time_last_checkpoint = cfs_time_current_sec();
		ns->ln_items_checked += com->lc_new_checked;
		com->lc_new_checked = 0;
	}

	rc = lfsck_namespace_store(env, com, false);

	up_write(&com->lc_sem);
	return rc;
}
示例#2
0
/**
 * Callback handler for receiving incoming glimpse ASTs.
 *
 * This only can happen on client side.  After handling the glimpse AST
 * we also consider dropping the lock here if it is unused locally for a
 * long time.
 */
static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
				    struct ldlm_namespace *ns,
				    struct ldlm_request *dlm_req,
				    struct ldlm_lock *lock)
{
	int rc = -ENOSYS;

	LDLM_DEBUG(lock, "client glimpse AST callback handler");

	if (lock->l_glimpse_ast != NULL)
		rc = lock->l_glimpse_ast(lock, req);

	if (req->rq_repmsg != NULL) {
		ptlrpc_reply(req);
	} else {
		req->rq_status = rc;
		ptlrpc_error(req);
	}

	lock_res_and_lock(lock);
	if (lock->l_granted_mode == LCK_PW &&
	    !lock->l_readers && !lock->l_writers &&
	    cfs_time_after(cfs_time_current(),
			   cfs_time_add(lock->l_last_used,
					cfs_time_seconds(10)))) {
		unlock_res_and_lock(lock);
		if (ldlm_bl_to_thread_lock(ns, NULL, lock))
			ldlm_handle_bl_callback(ns, NULL, lock);

		return;
	}
	unlock_res_and_lock(lock);
	LDLM_LOCK_RELEASE(lock);
}
示例#3
0
int osc_object_is_contended(struct osc_object *obj)
{
	struct osc_device *dev  = lu2osc_dev(obj->oo_cl.co_lu.lo_dev);
	int osc_contention_time = dev->od_contention_time;
	unsigned long cur_time     = cfs_time_current();
	unsigned long retry_time;

	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION))
		return 1;

	if (!obj->oo_contended)
		return 0;

	/*
	 * I like copy-paste. the code is copied from
	 * ll_file_is_contended.
	 */
	retry_time = cfs_time_add(obj->oo_contention_time,
				  cfs_time_seconds(osc_contention_time));
	if (cfs_time_after(cur_time, retry_time)) {
		osc_object_clear_contended(obj);
		return 0;
	}
	return 1;
}
示例#4
0
int cfs_timer_is_armed(cfs_timer_t *l)
{
        if (cfs_time_before(cfs_time_current(), l->expires))
                return 1;
        else
                return 0;
}
示例#5
0
int
libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout)
{
        int           rc;
        struct pollfd pfd;
        cfs_time_t    start_time = cfs_time_current();

        pfd.fd = sock->s_fd;
        pfd.events = POLLOUT;
        pfd.revents = 0;

        /* poll(2) measures timeout in msec */
        timeout *= 1000;

        while (nob != 0 && timeout > 0) {
                cfs_time_t current_time;

                rc = poll(&pfd, 1, timeout);
                if (rc < 0)
                        return -errno;
                if (rc == 0)
                        return -ETIMEDOUT;
                if ((pfd.revents & POLLOUT) == 0)
                        return -EIO;

                rc = write(sock->s_fd, buffer, nob);
                if (rc < 0)
                        return -errno;
                if (rc == 0)
                        return -EIO;

                buffer = ((char *)buffer) + rc;
                nob -= rc;

                current_time = cfs_time_current();
                timeout -= 1000 *
                        cfs_duration_sec(cfs_time_sub(current_time,
                                                      start_time));
                start_time = current_time;
        }

        if (nob == 0)
                return 0;
        else
                return -ETIMEDOUT;
}
示例#6
0
int osp_init_precreate(struct osp_device *d)
{
	struct l_wait_info	 lwi = { 0 };
	struct task_struct		*task;

	ENTRY;

	OBD_ALLOC_PTR(d->opd_pre);
	if (d->opd_pre == NULL)
		RETURN(-ENOMEM);

	/* initially precreation isn't ready */
	d->opd_pre_status = -EAGAIN;
	fid_zero(&d->opd_pre_used_fid);
	d->opd_pre_used_fid.f_oid = 1;
	fid_zero(&d->opd_pre_last_created_fid);
	d->opd_pre_last_created_fid.f_oid = 1;
	d->opd_pre_reserved = 0;
	d->opd_got_disconnected = 1;
	d->opd_pre_grow_slow = 0;
	d->opd_pre_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_max_grow_count = OST_MAX_PRECREATE;

	spin_lock_init(&d->opd_pre_lock);
	init_waitqueue_head(&d->opd_pre_waitq);
	init_waitqueue_head(&d->opd_pre_user_waitq);
	init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);

	/*
	 * Initialize statfs-related things
	 */
	d->opd_statfs_maxage = 5; /* default update interval */
	d->opd_statfs_fresh_till = cfs_time_shift(-1000);
	CDEBUG(D_OTHER, "current %llu, fresh till %llu\n",
	       (unsigned long long)cfs_time_current(),
	       (unsigned long long)d->opd_statfs_fresh_till);
	cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d);

	/*
	 * start thread handling precreation and statfs updates
	 */
	task = kthread_run(osp_precreate_thread, d,
			   "osp-pre-%u-%u", d->opd_index, d->opd_group);
	if (IS_ERR(task)) {
		CERROR("can't start precreate thread %ld\n", PTR_ERR(task));
		RETURN(PTR_ERR(task));
	}

	l_wait_event(d->opd_pre_thread.t_ctl_waitq,
		     osp_precreate_running(d) || osp_precreate_stopped(d),
		     &lwi);

	RETURN(0);
}
示例#7
0
文件: user-tcpip.c 项目: pscedu/pfl
ssize_t
libcfs_sock_read(struct lnet_xport *lx, void *buffer, size_t nob,
    int timeout)
{
	ssize_t rc;
	struct pollfd pfd;
	cfs_time_t start_time = cfs_time_current();

	pfd.fd = lx->lx_fd;
	pfd.events = POLLIN;
	pfd.revents = 0;

	/* poll(2) measures timeout in msec */
	timeout *= 1000;

	while (nob != 0 && timeout > 0) {
		rc = poll(&pfd, 1, timeout);
		if (rc < 0)
			return -errno;
		if (rc == 0)
			return -ETIMEDOUT;
		if ((pfd.revents & POLLIN) == 0)
			return -EIO;

		rc = read(lx->lx_fd, buffer, nob);
		if (rc < 0)
			return -errno;
		if (rc == 0)
			return -EIO;

		buffer = ((char *)buffer) + rc;
		nob -= rc;

		timeout -= cfs_duration_sec(cfs_time_sub(cfs_time_current(),
							start_time));
	}

	if (nob == 0)
		return 0;
	else
		return -ETIMEDOUT;
}
示例#8
0
ssize_t
libcfs_ssl_sock_read(struct lnet_xport *lx, void *buf, size_t n,
    int timeout)
{
	cfs_time_t start_time = cfs_time_current();
	struct pollfd pfd;
	ssize_t rc;

	pfd.fd = lx->lx_fd;
	pfd.events = POLLIN;
	pfd.revents = 0;

	/* poll(2) measures timeout in msec */
	timeout *= 1000;

	while (n != 0 && timeout > 0) {
		rc = poll(&pfd, 1, timeout);
		if (rc < 0)
			return (-errno);
		if (rc == 0)
			return (-ETIMEDOUT);
		if ((pfd.revents & POLLIN) == 0)
			return (-EIO);

		rc = SSL_read(lx->lx_ssl, buf, n);
		if (rc < 0)
			return (-errno);
		if (rc == 0)
			return (-EIO);

		buf = (char *)buf + rc;
		n -= rc;

		timeout -= cfs_duration_sec(cfs_time_sub(
		    cfs_time_current(), start_time));
	}

	if (n == 0)
		return (0);
	return (-ETIMEDOUT);
}
示例#9
0
static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
			struct lu_buf *buf, loff_t *pos)
{
	struct osd_object *obj  = osd_dt_obj(dt);
	struct osd_device *osd = osd_obj2dev(obj);
	uint64_t	   old_size;
	int		   size = buf->lb_len;
	int		   rc;
	unsigned long	   start;

	LASSERT(dt_object_exists(dt));
	LASSERT(obj->oo_db);

	start = cfs_time_current();

	read_lock(&obj->oo_attr_lock);
	old_size = obj->oo_attr.la_size;
	read_unlock(&obj->oo_attr_lock);

	if (*pos + size > old_size) {
		if (old_size < *pos)
			return 0;
		else
			size = old_size - *pos;
	}

	record_start_io(osd, READ, 0);

	rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size,
			buf->lb_buf, DMU_READ_PREFETCH);

	record_end_io(osd, READ, cfs_time_current() - start, size,
		      size >> PAGE_CACHE_SHIFT);
	if (rc == 0) {
		rc = size;
		*pos += size;
	}
	return rc;
}
示例#10
0
文件: handlers.c 项目: Lezval/lustre
/* Read from wire as much data as possible.
 * Returns 0 or 1 on succsess, <0 if error or EOF.
 * 0 means partial read, 1 - complete */
int
usocklnd_read_data(usock_conn_t *conn)
{
        struct iovec *iov;
        int           nob;
        cfs_time_t    t;

        LASSERT (conn->uc_rx_nob_wanted != 0);

        do {
                usock_peer_t *peer = conn->uc_peer;

                LASSERT (conn->uc_rx_niov > 0);

                nob = libcfs_sock_readv(conn->uc_sock,
                                        conn->uc_rx_iov, conn->uc_rx_niov);
                if (nob <= 0) {/* read nothing or error */
                        if (nob < 0)
                                conn->uc_errored = 1;
                        return nob;
                }

                LASSERT (nob <= conn->uc_rx_nob_wanted);
                conn->uc_rx_nob_wanted -= nob;
                conn->uc_rx_nob_left -= nob;
                t = cfs_time_current();
                conn->uc_rx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout));

                if(peer != NULL)
                        peer->up_last_alive = t;

                /* "consume" iov */
                iov = conn->uc_rx_iov;
                do {
                        LASSERT (conn->uc_rx_niov > 0);

                        if (nob < iov->iov_len) {
                                iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
                                iov->iov_len -= nob;
                                break;
                        }

                        nob -= iov->iov_len;
                        conn->uc_rx_iov = ++iov;
                        conn->uc_rx_niov--;
                } while (nob != 0);

        } while (conn->uc_rx_nob_wanted != 0);

        return 1; /* read complete */
}
示例#11
0
文件: watchdog.c 项目: hpc/lustre
struct lc_watchdog *lc_watchdog_add(int timeout,
                                    void (*callback)(pid_t, void *),
                                    void *data)
{
        struct lc_watchdog *lcw = NULL;
        ENTRY;

        LIBCFS_ALLOC(lcw, sizeof(*lcw));
        if (lcw == NULL) {
                CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
                RETURN(ERR_PTR(-ENOMEM));
        }

        cfs_spin_lock_init(&lcw->lcw_lock);
        lcw->lcw_refcount = 1; /* refcount for owner */
        lcw->lcw_task     = cfs_current();
        lcw->lcw_pid      = cfs_curproc_pid();
        lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
        lcw->lcw_data     = data;
        lcw->lcw_state    = LC_WATCHDOG_DISABLED;

        CFS_INIT_LIST_HEAD(&lcw->lcw_list);
        cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);

        cfs_down(&lcw_refcount_sem);
        if (++lcw_refcount == 1)
                lcw_dispatch_start();
        cfs_up(&lcw_refcount_sem);

        /* Keep this working in case we enable them by default */
        if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
                lcw->lcw_last_touched = cfs_time_current();
                cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) +
                              cfs_time_current());
        }

        RETURN(lcw);
}
int osp_init_precreate(struct osp_device *d)
{
	struct l_wait_info	 lwi = { 0 };
	int			 rc;

	ENTRY;

	/* initially precreation isn't ready */
	d->opd_pre_status = -EAGAIN;
	fid_zero(&d->opd_pre_used_fid);
	d->opd_pre_used_fid.f_oid = 1;
	fid_zero(&d->opd_pre_last_created_fid);
	d->opd_pre_last_created_fid.f_oid = 1;
	d->opd_pre_reserved = 0;
	d->opd_got_disconnected = 1;
	d->opd_pre_grow_slow = 0;
	d->opd_pre_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_max_grow_count = OST_MAX_PRECREATE;

	spin_lock_init(&d->opd_pre_lock);
	cfs_waitq_init(&d->opd_pre_waitq);
	cfs_waitq_init(&d->opd_pre_user_waitq);
	cfs_waitq_init(&d->opd_pre_thread.t_ctl_waitq);

	/*
	 * Initialize statfs-related things
	 */
	d->opd_statfs_maxage = 5; /* default update interval */
	d->opd_statfs_fresh_till = cfs_time_shift(-1000);
	CDEBUG(D_OTHER, "current %llu, fresh till %llu\n",
	       (unsigned long long)cfs_time_current(),
	       (unsigned long long)d->opd_statfs_fresh_till);
	cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d);

	/*
	 * start thread handling precreation and statfs updates
	 */
	rc = cfs_create_thread(osp_precreate_thread, d, 0);
	if (rc < 0) {
		CERROR("can't start precreate thread %d\n", rc);
		RETURN(rc);
	}

	l_wait_event(d->opd_pre_thread.t_ctl_waitq,
		     osp_precreate_running(d) || osp_precreate_stopped(d),
		     &lwi);

	RETURN(0);
}
示例#13
0
static int lfsck_namespace_post(const struct lu_env *env,
				struct lfsck_component *com,
				int result, bool init)
{
	struct lfsck_instance	*lfsck = com->lc_lfsck;
	struct lfsck_namespace	*ns    = com->lc_file_ram;
	int			 rc;

	down_write(&com->lc_sem);

	spin_lock(&lfsck->li_lock);
	if (!init)
		ns->ln_pos_last_checkpoint = lfsck->li_pos_current;
	if (result > 0) {
		ns->ln_status = LS_SCANNING_PHASE2;
		ns->ln_flags |= LF_SCANNED_ONCE;
		ns->ln_flags &= ~LF_UPGRADE;
		cfs_list_del_init(&com->lc_link);
		cfs_list_del_init(&com->lc_link_dir);
		cfs_list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
	} else if (result == 0) {
		ns->ln_status = lfsck->li_status;
		if (ns->ln_status == 0)
			ns->ln_status = LS_STOPPED;
		if (ns->ln_status != LS_PAUSED) {
			cfs_list_del_init(&com->lc_link);
			cfs_list_del_init(&com->lc_link_dir);
			cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
		}
	} else {
		ns->ln_status = LS_FAILED;
		cfs_list_del_init(&com->lc_link);
		cfs_list_del_init(&com->lc_link_dir);
		cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
	}
	spin_unlock(&lfsck->li_lock);

	if (!init) {
		ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
				HALF_SEC - lfsck->li_time_last_checkpoint);
		ns->ln_time_last_checkpoint = cfs_time_current_sec();
		ns->ln_items_checked += com->lc_new_checked;
		com->lc_new_checked = 0;
	}

	rc = lfsck_namespace_store(env, com, false);

	up_write(&com->lc_sem);
	return rc;
}
示例#14
0
文件: handlers.c 项目: Lezval/lustre
/* Send as much tx data as possible.
 * Returns 0 or 1 on succsess, <0 if fatal error.
 * 0 means partial send or non-fatal error, 1 - complete.
 * Rely on libcfs_sock_writev() for differentiating fatal and
 * non-fatal errors. An error should be considered as non-fatal if:
 * 1) it still makes sense to continue reading &&
 * 2) anyway, poll() will set up POLLHUP|POLLERR flags */
int
usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx)
{
        struct iovec *iov;
        int           nob;
        cfs_time_t    t;

        LASSERT (tx->tx_resid != 0);

        do {
                usock_peer_t *peer = conn->uc_peer;

                LASSERT (tx->tx_niov > 0);

                nob = libcfs_sock_writev(conn->uc_sock,
                                         tx->tx_iov, tx->tx_niov);
                if (nob < 0)
                        conn->uc_errored = 1;
                if (nob <= 0) /* write queue is flow-controlled or error */
                        return nob;

                LASSERT (nob <= tx->tx_resid);
                tx->tx_resid -= nob;
                t = cfs_time_current();
                conn->uc_tx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout));

                if(peer != NULL)
                        peer->up_last_alive = t;

                /* "consume" iov */
                iov = tx->tx_iov;
                do {
                        LASSERT (tx->tx_niov > 0);

                        if (nob < iov->iov_len) {
                                iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
                                iov->iov_len -= nob;
                                break;
                        }

                        nob -= iov->iov_len;
                        tx->tx_iov = ++iov;
                        tx->tx_niov--;
                } while (nob != 0);

        } while (tx->tx_resid != 0);

        return 1; /* send complete */
}
示例#15
0
文件: watchdog.c 项目: hpc/lustre
void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
{
        ENTRY;
        LASSERT(lcw != NULL);

        lc_watchdog_del_pending(lcw);

        lcw_update_time(lcw, "resumed");
        lcw->lcw_state = LC_WATCHDOG_ENABLED;

        cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
                      cfs_time_seconds(timeout));

        EXIT;
}
示例#16
0
文件: watchdog.c 项目: hpc/lustre
static void lcw_dump_stack(struct lc_watchdog *lcw)
{
        cfs_time_t      current_time;
        cfs_duration_t  delta_time;
        struct timeval  timediff;

        current_time = cfs_time_current();
        delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
        cfs_duration_usec(delta_time, &timediff);

        /*
         * Check to see if we should throttle the watchdog timer to avoid
         * too many dumps going to the console thus triggering an NMI.
         */
        delta_time = cfs_duration_sec(cfs_time_sub(current_time,
                                                   lcw_last_watchdog_time));

        if (delta_time < libcfs_watchdog_ratelimit &&
            lcw_recent_watchdog_count > 3) {
                LCONSOLE_WARN("Service thread pid %u was inactive for "
                              "%lu.%.02lus. Watchdog stack traces are limited "
                              "to 3 per %d seconds, skipping this one.\n",
                              (int)lcw->lcw_pid,
                              timediff.tv_sec,
                              timediff.tv_usec / 10000,
                              libcfs_watchdog_ratelimit);
        } else {
                if (delta_time < libcfs_watchdog_ratelimit) {
                        lcw_recent_watchdog_count++;
                } else {
                        memcpy(&lcw_last_watchdog_time, &current_time,
                               sizeof(current_time));
                        lcw_recent_watchdog_count = 0;
                }

                LCONSOLE_WARN("Service thread pid %u was inactive for "
                              "%lu.%.02lus. The thread might be hung, or it "
                              "might only be slow and will resume later. "
                              "Dumping the stack trace for debugging purposes:"
                              "\n",
                              (int)lcw->lcw_pid,
                              timediff.tv_sec,
                              timediff.tv_usec / 10000);
                lcw_dump(lcw);
        }
}
示例#17
0
static void ctx_start_timer_kr(struct ptlrpc_cli_ctx *ctx, long timeout)
{
	struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
	struct timer_list          *timer = gctx_kr->gck_timer;

	LASSERT(timer);

	CDEBUG(D_SEC, "ctx %p: start timer %lds\n", ctx, timeout);
	timeout = msecs_to_jiffies(timeout * MSEC_PER_SEC) +
		  cfs_time_current();

	init_timer(timer);
	timer->expires = timeout;
	timer->data = (unsigned long ) ctx;
	timer->function = ctx_upcall_timeout_kr;

	add_timer(timer);
}
示例#18
0
文件: watchdog.c 项目: hpc/lustre
static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
{
        cfs_time_t newtime = cfs_time_current();;

        if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
                struct timeval timediff;
                cfs_time_t delta_time = cfs_time_sub(newtime,
                                                     lcw->lcw_last_touched);
                cfs_duration_usec(delta_time, &timediff);

                LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. "
                              "This indicates the system was overloaded (too "
                              "many service threads, or there were not enough "
                              "hardware resources).\n",
                              lcw->lcw_pid,
                              message,
                              timediff.tv_sec,
                              timediff.tv_usec / 10000);
        }
        lcw->lcw_last_touched = newtime;
}
示例#19
0
/**
 * Prepare buffers for read.
 *
 * The function maps the range described by \a off and \a len to \a lnb array.
 * dmu_buf_hold_array_by_bonus() finds/creates appropriate ARC buffers, then
 * we fill \a lnb array with the pages storing ARC buffers. Notice the current
 * implementationt passes TRUE to dmu_buf_hold_array_by_bonus() to fill ARC
 * buffers with actual data, I/O is done in the conext of osd_bufs_get_read().
 * A better implementation would just return the buffers (potentially unfilled)
 * and subsequent osd_read_prep() would do I/O for many ranges concurrently.
 *
 * \param[in] env	environment
 * \param[in] obj	object
 * \param[in] off	offset in bytes
 * \param[in] len	the number of bytes to access
 * \param[out] lnb	array of local niobufs pointing to the buffers with data
 *
 * \retval		0 for success
 * \retval		negative error number of failure
 */
static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj,
				loff_t off, ssize_t len, struct niobuf_local *lnb)
{
	struct osd_device *osd = osd_obj2dev(obj);
	unsigned long	   start = cfs_time_current();
	int                rc, i, numbufs, npages = 0;
	dmu_buf_t	 **dbp;
	ENTRY;

	record_start_io(osd, READ, 0);

	/* grab buffers for read:
	 * OSD API let us to grab buffers first, then initiate IO(s)
	 * so that all required IOs will be done in parallel, but at the
	 * moment DMU doesn't provide us with a method to grab buffers.
	 * If we discover this is a vital for good performance we
	 * can get own replacement for dmu_buf_hold_array_by_bonus().
	 */
	while (len > 0) {
		rc = -dmu_buf_hold_array_by_bonus(obj->oo_db, off, len, TRUE,
						  osd_zerocopy_tag, &numbufs,
						  &dbp);
		if (unlikely(rc))
			GOTO(err, rc);

		for (i = 0; i < numbufs; i++) {
			int bufoff, tocpy, thispage;
			void *dbf = dbp[i];

			LASSERT(len > 0);

			atomic_inc(&osd->od_zerocopy_pin);

			bufoff = off - dbp[i]->db_offset;
			tocpy = min_t(int, dbp[i]->db_size - bufoff, len);

			/* kind of trick to differentiate dbuf vs. arcbuf */
			LASSERT(((unsigned long)dbp[i] & 1) == 0);
			dbf = (void *) ((unsigned long)dbp[i] | 1);

			while (tocpy > 0) {
				thispage = PAGE_CACHE_SIZE;
				thispage -= bufoff & (PAGE_CACHE_SIZE - 1);
				thispage = min(tocpy, thispage);

				lnb->lnb_rc = 0;
				lnb->lnb_file_offset = off;
				lnb->lnb_page_offset = bufoff & ~PAGE_MASK;
				lnb->lnb_len = thispage;
				lnb->lnb_page = kmem_to_page(dbp[i]->db_data +
							     bufoff);
				/* mark just a single slot: we need this
				 * reference to dbuf to be released once */
				lnb->lnb_data = dbf;
				dbf = NULL;

				tocpy -= thispage;
				len -= thispage;
				bufoff += thispage;
				off += thispage;

				npages++;
				lnb++;
			}

			/* steal dbuf so dmu_buf_rele_array() can't release
			 * it */
			dbp[i] = NULL;
		}

		dmu_buf_rele_array(dbp, numbufs, osd_zerocopy_tag);
	}

	record_end_io(osd, READ, cfs_time_current() - start,
		      npages * PAGE_SIZE, npages);

	RETURN(npages);

err:
	LASSERT(rc < 0);
	osd_bufs_put(env, &obj->oo_dt, lnb - npages, npages);
	RETURN(rc);
}
示例#20
0
文件: handlers.c 项目: Lezval/lustre
/* All actions that we need after receiving hello on passive conn:
 * 1) Stash peer's nid, pid, incarnation and conn type
 * 2) Cope with easy case: conn[idx] is empty - just save conn there
 * 3) Resolve race:
 *    a) if our nid is higher - reply with CONN_NONE and make us zombie
 *    b) if peer's nid is higher - postpone race resolution till
 *       READY state
 * 4) Anyhow, send reply hello
*/
int
usocklnd_passiveconn_hellorecv(usock_conn_t *conn)
{
        ksock_hello_msg_t *hello = conn->uc_rx_hello;
        int                type;
        int                idx;
        int                rc;
        usock_peer_t      *peer;
        lnet_ni_t         *ni        = conn->uc_ni;
        __u32              peer_ip   = conn->uc_peer_ip;
        __u16              peer_port = conn->uc_peer_port;

        /* don't know parent peer yet and not zombie */
        LASSERT (conn->uc_peer == NULL &&
                 ni != NULL);

        /* don't know peer's nid and incarnation yet */
        if (peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
                /* do not trust liblustre clients */
                conn->uc_peerid.pid = peer_port | LNET_PID_USERFLAG;
                conn->uc_peerid.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
                                                 peer_ip);
                if (hello->kshm_ctype != SOCKLND_CONN_ANY) {
                        lnet_ni_decref(ni);
                        conn->uc_ni = NULL;
                        CERROR("Refusing to accept connection of type=%d from "
                               "userspace process %u.%u.%u.%u:%d\n", hello->kshm_ctype,
                               HIPQUAD(peer_ip), peer_port);
                        return -EINVAL;
                }
        } else {
                conn->uc_peerid.pid = hello->kshm_src_pid;
                conn->uc_peerid.nid = hello->kshm_src_nid;
        }
        conn->uc_type = type = usocklnd_invert_type(hello->kshm_ctype);

        rc = usocklnd_find_or_create_peer(ni, conn->uc_peerid, &peer);
        if (rc) {
                lnet_ni_decref(ni);
                conn->uc_ni = NULL;
                return rc;
        }

        peer->up_last_alive = cfs_time_current();

        idx = usocklnd_type2idx(conn->uc_type);

        /* safely check whether we're first */
        pthread_mutex_lock(&peer->up_lock);

        usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, NULL);

        if (peer->up_conns[idx] == NULL) {
                peer->up_last_alive = cfs_time_current();
                conn->uc_peer = peer;
                conn->uc_ni = NULL;
                usocklnd_link_conn_to_peer(conn, peer, idx);
                usocklnd_conn_addref(conn);
        } else {
                usocklnd_peer_decref(peer);

                /* Resolve race in favour of higher NID */
                if (conn->uc_peerid.nid < conn->uc_ni->ni_nid) {
                        /* make us zombie */
                        conn->uc_ni = NULL;
                        type = SOCKLND_CONN_NONE;
                }

                /* if conn->uc_peerid.nid > conn->uc_ni->ni_nid,
                 * postpone race resolution till READY state
                 * (hopefully that conn[idx] will die because of
                 * incoming hello of CONN_NONE type) */
        }
        pthread_mutex_unlock(&peer->up_lock);

        /* allocate and initialize fake tx with hello */
        conn->uc_tx_hello = usocklnd_create_hello_tx(ni, type,
                                                     conn->uc_peerid.nid);
        if (conn->uc_ni == NULL)
                lnet_ni_decref(ni);

        if (conn->uc_tx_hello == NULL)
                return -ENOMEM;

        /* rc == 0 */
        pthread_mutex_lock(&conn->uc_lock);
        if (conn->uc_state == UC_DEAD)
                goto passive_hellorecv_done;

        conn->uc_state = UC_SENDING_HELLO;
        conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout);
        conn->uc_tx_flag = 1;
        rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLOUT);

  passive_hellorecv_done:
        pthread_mutex_unlock(&conn->uc_lock);
        return rc;
}
示例#21
0
文件: sec_bulk.c 项目: DCteam/lustre
/*
 * we allocate the requested pages atomically.
 */
int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
{
        cfs_waitlink_t  waitlink;
        unsigned long   this_idle = -1;
        cfs_time_t      tick = 0;
        long            now;
        int             p_idx, g_idx;
        int             i;

        LASSERT(desc->bd_iov_count > 0);
        LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);

        /* resent bulk, enc iov might have been allocated previously */
        if (desc->bd_enc_iov != NULL)
                return 0;

        OBD_ALLOC(desc->bd_enc_iov,
                  desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
        if (desc->bd_enc_iov == NULL)
                return -ENOMEM;

        cfs_spin_lock(&page_pools.epp_lock);

        page_pools.epp_st_access++;
again:
        if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
                if (tick == 0)
                        tick = cfs_time_current();

                now = cfs_time_current_sec();

                page_pools.epp_st_missings++;
                page_pools.epp_pages_short += desc->bd_iov_count;

                if (enc_pools_should_grow(desc->bd_iov_count, now)) {
                        page_pools.epp_growing = 1;

                        cfs_spin_unlock(&page_pools.epp_lock);
                        enc_pools_add_pages(page_pools.epp_pages_short / 2);
                        cfs_spin_lock(&page_pools.epp_lock);

                        page_pools.epp_growing = 0;

                        enc_pools_wakeup();
                } else {
                        if (++page_pools.epp_waitqlen >
                            page_pools.epp_st_max_wqlen)
                                page_pools.epp_st_max_wqlen =
                                                page_pools.epp_waitqlen;

                        cfs_set_current_state(CFS_TASK_UNINT);
                        cfs_waitlink_init(&waitlink);
                        cfs_waitq_add(&page_pools.epp_waitq, &waitlink);

                        cfs_spin_unlock(&page_pools.epp_lock);
                        cfs_waitq_wait(&waitlink, CFS_TASK_UNINT);
                        cfs_waitq_del(&page_pools.epp_waitq, &waitlink);
                        LASSERT(page_pools.epp_waitqlen > 0);
                        cfs_spin_lock(&page_pools.epp_lock);
                        page_pools.epp_waitqlen--;
                }

                LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
                page_pools.epp_pages_short -= desc->bd_iov_count;

                this_idle = 0;
                goto again;
        }

        /* record max wait time */
        if (unlikely(tick != 0)) {
                tick = cfs_time_current() - tick;
                if (tick > page_pools.epp_st_max_wait)
                        page_pools.epp_st_max_wait = tick;
        }

        /* proceed with rest of allocation */
        page_pools.epp_free_pages -= desc->bd_iov_count;

        p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
        g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;

        for (i = 0; i < desc->bd_iov_count; i++) {
                LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
                desc->bd_enc_iov[i].kiov_page =
                                        page_pools.epp_pools[p_idx][g_idx];
                page_pools.epp_pools[p_idx][g_idx] = NULL;

                if (++g_idx == PAGES_PER_POOL) {
                        p_idx++;
                        g_idx = 0;
                }
        }

        if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
                page_pools.epp_st_lowfree = page_pools.epp_free_pages;

        /*
         * new idle index = (old * weight + new) / (weight + 1)
         */
        if (this_idle == -1) {
                this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
                            page_pools.epp_total_pages;
        }
        page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
                                   this_idle) /
                                  (IDLE_IDX_WEIGHT + 1);

        page_pools.epp_last_access = cfs_time_current_sec();

        cfs_spin_unlock(&page_pools.epp_lock);
        return 0;
}
示例#22
0
int
libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout)
{
        size_t          rcvlen;
        int             rc;
        cfs_duration_t  to = cfs_time_seconds(timeout);
        cfs_time_t      then;
        struct timeval  tv;

        LASSERT(nob > 0);

        for (;;) {
                struct iovec  iov = {
                        .iov_base = buffer,
                        .iov_len  = nob
                };
                struct  msghdr  msg = {
                        .msg_name       = NULL,
                        .msg_namelen    = 0,
                        .msg_iov        = &iov,
                        .msg_iovlen     = 1,
                        .msg_control    = NULL,
                        .msg_controllen = 0,
                        .msg_flags      = 0,
                };
                cfs_duration_usec(to, &tv);
                rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO,
                                      &tv, sizeof(tv));
                if (rc != 0) {
                        CERROR("Can't set socket recv timeout "
                                        "%ld.%06d: %d\n",
                                        (long)tv.tv_sec, (int)tv.tv_usec, rc);
                        return rc;
                }

                then = cfs_time_current();
                rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen);
                to -= cfs_time_current() - then;

                if (rc != 0 && rc != -EWOULDBLOCK)
                        return rc;
                if (rcvlen == nob)
                        return 0;

                if (to <= 0)
                        return -EAGAIN;

                buffer = ((char *)buffer) + rcvlen;
                nob -= rcvlen;
        }
        return 0;
}

int
libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout)
{
        size_t          sndlen;
        int             rc;
        cfs_duration_t  to = cfs_time_seconds(timeout);
        cfs_time_t      then;
        struct timeval  tv;

        LASSERT(nob > 0);

        for (;;) {
                struct iovec  iov = {
                        .iov_base = buffer,
                        .iov_len  = nob
                };
                struct  msghdr  msg = {
                        .msg_name       = NULL,
                        .msg_namelen    = 0,
                        .msg_iov        = &iov,
                        .msg_iovlen     = 1,
                        .msg_control    = NULL,
                        .msg_controllen = 0,
                        .msg_flags      = (timeout == 0) ? MSG_DONTWAIT : 0,
                };

                if (timeout != 0) {
                        cfs_duration_usec(to, &tv);
                        rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO,
                                              &tv, sizeof(tv));
                        if (rc != 0) {
                                CERROR("Can't set socket send timeout "
                                       "%ld.%06d: %d\n",
                                       (long)tv.tv_sec, (int)tv.tv_usec, rc);
                                return rc;
                        }
                }

                then = cfs_time_current();
                rc = -sock_send(C2B_SOCK(sock), &msg, 
                                ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen);
                to -= cfs_time_current() - then;

                if (rc != 0 && rc != -EWOULDBLOCK)
                        return rc;
                if (sndlen == nob)
                        return 0;

                if (to <= 0)
                        return -EAGAIN;
                buffer = ((char *)buffer) + sndlen;
                nob -= sndlen;
        }
        return 0;

}

int
libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port)
{
        struct sockaddr_in sin;
        int                rc;

        if (remote != 0) 
                /* Get remote address */
                rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
        else 
                /* Get local address */
                rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
        if (rc != 0) {
                CERROR ("Error %d getting sock %s IP/port\n",
                         rc, remote ? "peer" : "local");
                return rc;
        }

        if (ip != NULL)
                *ip = ntohl (sin.sin_addr.s_addr);

        if (port != NULL)
                *port = ntohs (sin.sin_port);
        return 0;
}

int
libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize)
{
        int                 option;
        int                 rc;
        
        if (txbufsize != 0) {
                option = txbufsize;
                rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
                                     (char *)&option, sizeof (option));
                if (rc != 0) {
                        CERROR ("Can't set send buffer %d: %d\n",
                                option, rc);
                        return (rc);
                } 
        } 
        
        if (rxbufsize != 0) {
                option = rxbufsize;
                rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
                                      (char *)&option, sizeof (option));
                if (rc != 0) {
                        CERROR ("Can't set receive buffer %d: %d\n",
                                option, rc);
                        return (rc);
                }
        }
        return 0;
}
示例#23
0
文件: remote_perm.c 项目: 3null/linux
int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm)
{
	struct ll_inode_info *lli = ll_i2info(inode);
	struct ll_remote_perm *lrp = NULL, *tmp = NULL;
	struct hlist_head *head, *perm_hash = NULL;

	LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT);

#if 0
	if (perm->rp_uid != current->uid ||
	    perm->rp_gid != current->gid ||
	    perm->rp_fsuid != current->fsuid ||
	    perm->rp_fsgid != current->fsgid) {
		/* user might setxid in this small period */
		CDEBUG(D_SEC,
		       "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n",
		       perm->rp_uid, perm->rp_gid, perm->rp_fsuid,
		       perm->rp_fsgid, current->uid, current->gid,
		       current->fsuid, current->fsgid);
		return -EAGAIN;
	}
#endif

	if (!lli->lli_remote_perms) {
		perm_hash = alloc_rmtperm_hash();
		if (perm_hash == NULL) {
			CERROR("alloc lli_remote_perms failed!\n");
			return -ENOMEM;
		}
	}

	spin_lock(&lli->lli_lock);

	if (!lli->lli_remote_perms)
		lli->lli_remote_perms = perm_hash;
	else if (perm_hash)
		free_rmtperm_hash(perm_hash);

	head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid);

again:
	hlist_for_each_entry(tmp, head, lrp_list) {
		if (tmp->lrp_uid != perm->rp_uid)
			continue;
		if (tmp->lrp_gid != perm->rp_gid)
			continue;
		if (tmp->lrp_fsuid != perm->rp_fsuid)
			continue;
		if (tmp->lrp_fsgid != perm->rp_fsgid)
			continue;
		if (lrp)
			free_ll_remote_perm(lrp);
		lrp = tmp;
		break;
	}

	if (!lrp) {
		spin_unlock(&lli->lli_lock);
		lrp = alloc_ll_remote_perm();
		if (!lrp) {
			CERROR("alloc memory for ll_remote_perm failed!\n");
			return -ENOMEM;
		}
		spin_lock(&lli->lli_lock);
		goto again;
	}

	lrp->lrp_access_perm = perm->rp_access_perm;
	if (lrp != tmp) {
		lrp->lrp_uid	 = perm->rp_uid;
		lrp->lrp_gid	 = perm->rp_gid;
		lrp->lrp_fsuid       = perm->rp_fsuid;
		lrp->lrp_fsgid       = perm->rp_fsgid;
		hlist_add_head(&lrp->lrp_list, head);
	}
	lli->lli_rmtperm_time = cfs_time_current();
	spin_unlock(&lli->lli_lock);

	CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
	       lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
	       lrp->lrp_access_perm);

	return 0;
}
示例#24
0
/*
 * statfs
 */
static inline int osp_statfs_need_update(struct osp_device *d)
{
	return !cfs_time_before(cfs_time_current(),
				d->opd_statfs_fresh_till);
}
示例#25
0
static inline int capa_is_to_expire(struct obd_capa *ocapa)
{
	return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
}
示例#26
0
void osc_object_set_contended(struct osc_object *obj)
{
	obj->oo_contention_time = cfs_time_current();
	/* mb(); */
	obj->oo_contended = 1;
}
示例#27
0
int
LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms,
            lnet_event_t *event, int *which)
{
        int              i;
        int              rc;
#ifdef __KERNEL__
        cfs_waitlink_t   wl;
        cfs_time_t       now;
#else
        struct timeval   then;
        struct timeval   now;
# ifdef HAVE_LIBPTHREAD
        struct timespec  ts;
# endif
        lnet_ni_t       *eqwaitni = the_lnet.ln_eqwaitni;
#endif
        ENTRY;

        LASSERT (the_lnet.ln_init);
        LASSERT (the_lnet.ln_refcount > 0);

        if (neq < 1)
                RETURN(-ENOENT);

        LNET_LOCK();

        for (;;) {
#ifndef __KERNEL__
                LNET_UNLOCK();

                /* Recursion breaker */
                if (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
                    !LNetHandleIsEqual(eventqs[0], the_lnet.ln_rc_eqh))
                        lnet_router_checker();

                LNET_LOCK();
#endif
                for (i = 0; i < neq; i++) {
                        lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]);

                        if (eq == NULL) {
                                LNET_UNLOCK();
                                RETURN(-ENOENT);
                        }

                        rc = lib_get_event (eq, event);
                        if (rc != 0) {
                                LNET_UNLOCK();
                                *which = i;
                                RETURN(rc);
                        }
                }

#ifdef __KERNEL__
                if (timeout_ms == 0) {
                        LNET_UNLOCK();
                        RETURN (0);
                }

                cfs_waitlink_init(&wl);
                set_current_state(TASK_INTERRUPTIBLE);
                cfs_waitq_add(&the_lnet.ln_waitq, &wl);

                LNET_UNLOCK();

                if (timeout_ms < 0) {
                        cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE);
                } else {
                        struct timeval tv;

                        now = cfs_time_current();
                        cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE,
                                            cfs_time_seconds(timeout_ms)/1000);
                        cfs_duration_usec(cfs_time_sub(cfs_time_current(), now),
                                          &tv);
                        timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000;
                        if (timeout_ms < 0)
                                timeout_ms = 0;
                }

                LNET_LOCK();
                cfs_waitq_del(&the_lnet.ln_waitq, &wl);
#else
                if (eqwaitni != NULL) {
                        /* I have a single NI that I have to call into, to get
                         * events queued, or to block. */
                        lnet_ni_addref_locked(eqwaitni);
                        LNET_UNLOCK();

                        if (timeout_ms <= 0) {
                                (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms);
                        } else {
                                gettimeofday(&then, NULL);

                                (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms);

                                gettimeofday(&now, NULL);
                                timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
                                              (now.tv_usec - then.tv_usec) / 1000;
                                if (timeout_ms < 0)
                                        timeout_ms = 0;
                        }

                        LNET_LOCK();
                        lnet_ni_decref_locked(eqwaitni);

                        /* don't call into eqwaitni again if timeout has
                         * expired */
                        if (timeout_ms == 0)
                                eqwaitni = NULL;

                        continue;               /* go back and check for events */
                }

                if (timeout_ms == 0) {
                        LNET_UNLOCK();
                        RETURN (0);
                }

# ifndef HAVE_LIBPTHREAD
                /* If I'm single-threaded, LNET fails at startup if it can't
                 * set the_lnet.ln_eqwaitni correctly.  */
                LBUG();
# else
                if (timeout_ms < 0) {
                        pthread_cond_wait(&the_lnet.ln_cond,
                                          &the_lnet.ln_lock);
                } else {
                        gettimeofday(&then, NULL);

                        ts.tv_sec = then.tv_sec + timeout_ms/1000;
                        ts.tv_nsec = then.tv_usec * 1000 +
                                     (timeout_ms%1000) * 1000000;
                        if (ts.tv_nsec >= 1000000000) {
                                ts.tv_sec++;
                                ts.tv_nsec -= 1000000000;
                        }

                        pthread_cond_timedwait(&the_lnet.ln_cond,
                                               &the_lnet.ln_lock, &ts);

                        gettimeofday(&now, NULL);
                        timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
                                      (now.tv_usec - then.tv_usec) / 1000;

                        if (timeout_ms < 0)
                                timeout_ms = 0;
                }
# endif
#endif
        }
}
示例#28
0
int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq,
                           struct lustre_quota_ctxt *qctxt)
{
        struct lustre_qunit_size *lqs = NULL;
        unsigned long *unit, *tune;
        signed long tmp = 0;
        cfs_time_t time_limit = 0, *shrink;
        int i, rc = 0;
        ENTRY;

        LASSERT(qctxt);
        lqs = quota_search_lqs(LQS_KEY(QAQ_IS_GRP(oqaq), oqaq->qaq_id),
                               qctxt, QAQ_IS_CREATE_LQS(oqaq) ? 1 : 0);
        if (lqs == NULL || IS_ERR(lqs)){
                CERROR("fail to find a lqs for %sid %u!\n",
                       QAQ_IS_GRP(oqaq) ? "g" : "u", oqaq->qaq_id);
                RETURN(PTR_ERR(lqs));
        }

        CDEBUG(D_QUOTA, "before: bunit: %lu, iunit: %lu.\n",
               lqs->lqs_bunit_sz, lqs->lqs_iunit_sz);
        cfs_spin_lock(&lqs->lqs_lock);
        for (i = 0; i < 2; i++) {
                if (i == 0 && !QAQ_IS_ADJBLK(oqaq))
                        continue;

                if (i == 1 && !QAQ_IS_ADJINO(oqaq))
                        continue;

                tmp = i ? (lqs->lqs_iunit_sz - oqaq->qaq_iunit_sz) :
                          (lqs->lqs_bunit_sz - oqaq->qaq_bunit_sz);
                shrink = i ? &lqs->lqs_last_ishrink :
                             &lqs->lqs_last_bshrink;
                time_limit = cfs_time_add(i ? lqs->lqs_last_ishrink :
                                              lqs->lqs_last_bshrink,
                                   cfs_time_seconds(qctxt->lqc_switch_seconds));
                unit = i ? &lqs->lqs_iunit_sz : &lqs->lqs_bunit_sz;
                tune = i ? &lqs->lqs_itune_sz : &lqs->lqs_btune_sz;

                /* quota master shrinks */
                if (qctxt->lqc_handler && tmp > 0)
                        *shrink = cfs_time_current();

                /* quota master enlarges */
                if (qctxt->lqc_handler && tmp < 0) {
                        /* in case of ping-pong effect, don't enlarge lqs
                         * in a short time */
                        if (*shrink &&
                            cfs_time_before(cfs_time_current(), time_limit))
                                tmp = 0;
                }

                /* when setquota, don't enlarge lqs b=18616 */
                if (QAQ_IS_CREATE_LQS(oqaq) && tmp < 0)
                        tmp = 0;

                if (tmp != 0) {
                        *unit = i ? oqaq->qaq_iunit_sz : oqaq->qaq_bunit_sz;
                        *tune = (*unit) / 2;
                }


                if (tmp > 0)
                        rc |= i ? LQS_INO_DECREASE : LQS_BLK_DECREASE;
                if (tmp < 0)
                        rc |= i ? LQS_INO_INCREASE : LQS_BLK_INCREASE;
        }
        cfs_spin_unlock(&lqs->lqs_lock);
        CDEBUG(D_QUOTA, "after: bunit: %lu, iunit: %lu.\n",
               lqs->lqs_bunit_sz, lqs->lqs_iunit_sz);

        lqs_putref(lqs);

        RETURN(rc);
}
示例#29
0
static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp,
                                const unsigned int id[], int pending[],
                                int count, quota_acquire acquire,
                                struct obd_trans_info *oti, int isblk,
                                struct inode *inode, int frags)
{
        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
        struct timeval work_start;
        struct timeval work_end;
        long timediff;
        struct l_wait_info lwi = { 0 };
        int rc = 0, cycle = 0, count_err = 1;
        ENTRY;

        if (!quota_is_set(obd, id, isblk ? QB_SET : QI_SET))
                RETURN(0);

        if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
                /* If the client has been evicted or if it
                 * timed out and tried to reconnect already,
                 * abort the request immediately */
                RETURN(-ENOTCONN);

        CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
        pending[USRQUOTA] = pending[GRPQUOTA] = 0;
        /* Unfortunately, if quota master is too busy to handle the
         * pre-dqacq in time and quota hash on ost is used up, we
         * have to wait for the completion of in flight dqacq/dqrel,
         * in order to get enough quota for write b=12588 */
        cfs_gettimeofday(&work_start);
        while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk,
                                        inode, frags)) &
               QUOTA_RET_ACQUOTA) {

                cfs_spin_lock(&qctxt->lqc_lock);
                if (!qctxt->lqc_import && oti) {
                        cfs_spin_unlock(&qctxt->lqc_lock);

                        LASSERT(oti && oti->oti_thread &&
                                oti->oti_thread->t_watchdog);

                        lc_watchdog_disable(oti->oti_thread->t_watchdog);
                        CDEBUG(D_QUOTA, "sleep for quota master\n");
                        l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
                                     &lwi);
                        CDEBUG(D_QUOTA, "wake up when quota master is back\n");
                        lc_watchdog_touch(oti->oti_thread->t_watchdog,
                                 CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
                } else {
                        cfs_spin_unlock(&qctxt->lqc_lock);
                }

                cycle++;
                if (isblk)
                        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
                /* after acquire(), we should run quota_check_common again
                 * so that we confirm there are enough quota to finish write */
                rc = acquire(obd, id, oti, isblk);

                /* please reference to dqacq_completion for the below */
                /* a new request is finished, try again */
                if (rc == QUOTA_REQ_RETURNED) {
                        CDEBUG(D_QUOTA, "finish a quota req, try again\n");
                        continue;
                }

                /* it is out of quota already */
                if (rc == -EDQUOT) {
                        CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
                        break;
                }

                /* Related quota has been disabled by master, but enabled by
                 * slave, do not try again. */
                if (unlikely(rc == -ESRCH)) {
                        CERROR("mismatched quota configuration, stop try.\n");
                        break;
                }

                if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
                        /* The client has been evicted or tried to
                         * to reconnect already, abort the request */
                        RETURN(-ENOTCONN);

                /* -EBUSY and others, wait a second and try again */
                if (rc < 0) {
                        cfs_waitq_t        waitq;
                        struct l_wait_info lwi;

                        if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
                                lc_watchdog_touch(oti->oti_thread->t_watchdog,
                                       CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
                        CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
                               count_err++);

                        cfs_waitq_init(&waitq);
                        lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
                                          NULL);
                        l_wait_event(waitq, 0, &lwi);
                }

                if (rc < 0 || cycle % 10 == 0) {
                        cfs_spin_lock(&last_print_lock);
                        if (last_print == 0 ||
                            cfs_time_before((last_print + cfs_time_seconds(30)),
                                            cfs_time_current())) {
                                last_print = cfs_time_current();
                                cfs_spin_unlock(&last_print_lock);
                                CWARN("still haven't managed to acquire quota "
                                      "space from the quota master after %d "
                                      "retries (err=%d, rc=%d)\n",
                                      cycle, count_err - 1, rc);
                        } else {
                                cfs_spin_unlock(&last_print_lock);
                        }
                }

                CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
                       cycle);
        }
        cfs_gettimeofday(&work_end);
        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
        lprocfs_counter_add(qctxt->lqc_stats,
                            isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
                                    LQUOTA_WAIT_FOR_CHK_INO,
                            timediff);

        if (rc > 0)
                rc = 0;
        RETURN(rc);
}
示例#30
0
/*
 * called to reserve object in the pool
 * return codes:
 *  ENOSPC - no space on corresponded OST
 *  EAGAIN - precreation is in progress, try later
 *  EIO    - no access to OST
 */
int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
{
	struct l_wait_info	 lwi;
	cfs_time_t		 expire = cfs_time_shift(obd_timeout);
	int			 precreated, rc;

	ENTRY;

	LASSERTF(osp_objs_precreated(env, d) >= 0, "Last created FID "DFID
		 "Next FID "DFID"\n", PFID(&d->opd_pre_last_created_fid),
		 PFID(&d->opd_pre_used_fid));

	/*
	 * wait till:
	 *  - preallocation is done
	 *  - no free space expected soon
	 *  - can't connect to OST for too long (obd_timeout)
	 *  - OST can allocate fid sequence.
	 */
	while ((rc = d->opd_pre_status) == 0 || rc == -ENOSPC ||
		rc == -ENODEV || rc == -EAGAIN || rc == -ENOTCONN) {

		/*
		 * increase number of precreations
		 */
		precreated = osp_objs_precreated(env, d);
		if (d->opd_pre_grow_count < d->opd_pre_max_grow_count &&
		    d->opd_pre_grow_slow == 0 &&
		    precreated <= (d->opd_pre_grow_count / 4 + 1)) {
			spin_lock(&d->opd_pre_lock);
			d->opd_pre_grow_slow = 1;
			d->opd_pre_grow_count *= 2;
			spin_unlock(&d->opd_pre_lock);
		}

		spin_lock(&d->opd_pre_lock);
		precreated = osp_objs_precreated(env, d);
		if (precreated > d->opd_pre_reserved &&
		    !d->opd_pre_recovering) {
			d->opd_pre_reserved++;
			spin_unlock(&d->opd_pre_lock);
			rc = 0;

			/* XXX: don't wake up if precreation is in progress */
			if (osp_precreate_near_empty_nolock(env, d) &&
			   !osp_precreate_end_seq_nolock(env, d))
				wake_up(&d->opd_pre_waitq);

			break;
		}
		spin_unlock(&d->opd_pre_lock);

		/*
		 * all precreated objects have been used and no-space
		 * status leave us no chance to succeed very soon
		 * but if there is destroy in progress, then we should
		 * wait till that is done - some space might be released
		 */
		if (unlikely(rc == -ENOSPC)) {
			if (d->opd_syn_changes) {
				/* force local commit to release space */
				dt_commit_async(env, d->opd_storage);
			}
			if (d->opd_syn_rpc_in_progress) {
				/* just wait till destroys are done */
				/* see l_wait_even() few lines below */
			}
			if (d->opd_syn_changes +
			    d->opd_syn_rpc_in_progress == 0) {
				/* no hope for free space */
				break;
			}
		}

		/* XXX: don't wake up if precreation is in progress */
		wake_up(&d->opd_pre_waitq);

		lwi = LWI_TIMEOUT(expire - cfs_time_current(),
				osp_precreate_timeout_condition, d);
		if (cfs_time_aftereq(cfs_time_current(), expire)) {
			rc = -ETIMEDOUT;
			break;
		}

		l_wait_event(d->opd_pre_user_waitq,
			     osp_precreate_ready_condition(env, d), &lwi);
	}

	RETURN(rc);
}