static int lfsck_namespace_checkpoint(const struct lu_env *env, struct lfsck_component *com, bool init) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; int rc; if (com->lc_new_checked == 0 && !init) return 0; down_write(&com->lc_sem); if (init) { ns->ln_pos_latest_start = lfsck->li_pos_current; } else { ns->ln_pos_last_checkpoint = lfsck->li_pos_current; ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + HALF_SEC - lfsck->li_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); ns->ln_items_checked += com->lc_new_checked; com->lc_new_checked = 0; } rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); return rc; }
/** * Callback handler for receiving incoming glimpse ASTs. * * This only can happen on client side. After handling the glimpse AST * we also consider dropping the lock here if it is unused locally for a * long time. */ static void ldlm_handle_gl_callback(struct ptlrpc_request *req, struct ldlm_namespace *ns, struct ldlm_request *dlm_req, struct ldlm_lock *lock) { int rc = -ENOSYS; LDLM_DEBUG(lock, "client glimpse AST callback handler"); if (lock->l_glimpse_ast != NULL) rc = lock->l_glimpse_ast(lock, req); if (req->rq_repmsg != NULL) { ptlrpc_reply(req); } else { req->rq_status = rc; ptlrpc_error(req); } lock_res_and_lock(lock); if (lock->l_granted_mode == LCK_PW && !lock->l_readers && !lock->l_writers && cfs_time_after(cfs_time_current(), cfs_time_add(lock->l_last_used, cfs_time_seconds(10)))) { unlock_res_and_lock(lock); if (ldlm_bl_to_thread_lock(ns, NULL, lock)) ldlm_handle_bl_callback(ns, NULL, lock); return; } unlock_res_and_lock(lock); LDLM_LOCK_RELEASE(lock); }
int osc_object_is_contended(struct osc_object *obj) { struct osc_device *dev = lu2osc_dev(obj->oo_cl.co_lu.lo_dev); int osc_contention_time = dev->od_contention_time; unsigned long cur_time = cfs_time_current(); unsigned long retry_time; if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION)) return 1; if (!obj->oo_contended) return 0; /* * I like copy-paste. the code is copied from * ll_file_is_contended. */ retry_time = cfs_time_add(obj->oo_contention_time, cfs_time_seconds(osc_contention_time)); if (cfs_time_after(cur_time, retry_time)) { osc_object_clear_contended(obj); return 0; } return 1; }
int cfs_timer_is_armed(cfs_timer_t *l) { if (cfs_time_before(cfs_time_current(), l->expires)) return 1; else return 0; }
int libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) { int rc; struct pollfd pfd; cfs_time_t start_time = cfs_time_current(); pfd.fd = sock->s_fd; pfd.events = POLLOUT; pfd.revents = 0; /* poll(2) measures timeout in msec */ timeout *= 1000; while (nob != 0 && timeout > 0) { cfs_time_t current_time; rc = poll(&pfd, 1, timeout); if (rc < 0) return -errno; if (rc == 0) return -ETIMEDOUT; if ((pfd.revents & POLLOUT) == 0) return -EIO; rc = write(sock->s_fd, buffer, nob); if (rc < 0) return -errno; if (rc == 0) return -EIO; buffer = ((char *)buffer) + rc; nob -= rc; current_time = cfs_time_current(); timeout -= 1000 * cfs_duration_sec(cfs_time_sub(current_time, start_time)); start_time = current_time; } if (nob == 0) return 0; else return -ETIMEDOUT; }
int osp_init_precreate(struct osp_device *d) { struct l_wait_info lwi = { 0 }; struct task_struct *task; ENTRY; OBD_ALLOC_PTR(d->opd_pre); if (d->opd_pre == NULL) RETURN(-ENOMEM); /* initially precreation isn't ready */ d->opd_pre_status = -EAGAIN; fid_zero(&d->opd_pre_used_fid); d->opd_pre_used_fid.f_oid = 1; fid_zero(&d->opd_pre_last_created_fid); d->opd_pre_last_created_fid.f_oid = 1; d->opd_pre_reserved = 0; d->opd_got_disconnected = 1; d->opd_pre_grow_slow = 0; d->opd_pre_grow_count = OST_MIN_PRECREATE; d->opd_pre_min_grow_count = OST_MIN_PRECREATE; d->opd_pre_max_grow_count = OST_MAX_PRECREATE; spin_lock_init(&d->opd_pre_lock); init_waitqueue_head(&d->opd_pre_waitq); init_waitqueue_head(&d->opd_pre_user_waitq); init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq); /* * Initialize statfs-related things */ d->opd_statfs_maxage = 5; /* default update interval */ d->opd_statfs_fresh_till = cfs_time_shift(-1000); CDEBUG(D_OTHER, "current %llu, fresh till %llu\n", (unsigned long long)cfs_time_current(), (unsigned long long)d->opd_statfs_fresh_till); cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d); /* * start thread handling precreation and statfs updates */ task = kthread_run(osp_precreate_thread, d, "osp-pre-%u-%u", d->opd_index, d->opd_group); if (IS_ERR(task)) { CERROR("can't start precreate thread %ld\n", PTR_ERR(task)); RETURN(PTR_ERR(task)); } l_wait_event(d->opd_pre_thread.t_ctl_waitq, osp_precreate_running(d) || osp_precreate_stopped(d), &lwi); RETURN(0); }
ssize_t libcfs_sock_read(struct lnet_xport *lx, void *buffer, size_t nob, int timeout) { ssize_t rc; struct pollfd pfd; cfs_time_t start_time = cfs_time_current(); pfd.fd = lx->lx_fd; pfd.events = POLLIN; pfd.revents = 0; /* poll(2) measures timeout in msec */ timeout *= 1000; while (nob != 0 && timeout > 0) { rc = poll(&pfd, 1, timeout); if (rc < 0) return -errno; if (rc == 0) return -ETIMEDOUT; if ((pfd.revents & POLLIN) == 0) return -EIO; rc = read(lx->lx_fd, buffer, nob); if (rc < 0) return -errno; if (rc == 0) return -EIO; buffer = ((char *)buffer) + rc; nob -= rc; timeout -= cfs_duration_sec(cfs_time_sub(cfs_time_current(), start_time)); } if (nob == 0) return 0; else return -ETIMEDOUT; }
ssize_t libcfs_ssl_sock_read(struct lnet_xport *lx, void *buf, size_t n, int timeout) { cfs_time_t start_time = cfs_time_current(); struct pollfd pfd; ssize_t rc; pfd.fd = lx->lx_fd; pfd.events = POLLIN; pfd.revents = 0; /* poll(2) measures timeout in msec */ timeout *= 1000; while (n != 0 && timeout > 0) { rc = poll(&pfd, 1, timeout); if (rc < 0) return (-errno); if (rc == 0) return (-ETIMEDOUT); if ((pfd.revents & POLLIN) == 0) return (-EIO); rc = SSL_read(lx->lx_ssl, buf, n); if (rc < 0) return (-errno); if (rc == 0) return (-EIO); buf = (char *)buf + rc; n -= rc; timeout -= cfs_duration_sec(cfs_time_sub( cfs_time_current(), start_time)); } if (n == 0) return (0); return (-ETIMEDOUT); }
static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *buf, loff_t *pos) { struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); uint64_t old_size; int size = buf->lb_len; int rc; unsigned long start; LASSERT(dt_object_exists(dt)); LASSERT(obj->oo_db); start = cfs_time_current(); read_lock(&obj->oo_attr_lock); old_size = obj->oo_attr.la_size; read_unlock(&obj->oo_attr_lock); if (*pos + size > old_size) { if (old_size < *pos) return 0; else size = old_size - *pos; } record_start_io(osd, READ, 0); rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size, buf->lb_buf, DMU_READ_PREFETCH); record_end_io(osd, READ, cfs_time_current() - start, size, size >> PAGE_CACHE_SHIFT); if (rc == 0) { rc = size; *pos += size; } return rc; }
/* Read from wire as much data as possible. * Returns 0 or 1 on succsess, <0 if error or EOF. * 0 means partial read, 1 - complete */ int usocklnd_read_data(usock_conn_t *conn) { struct iovec *iov; int nob; cfs_time_t t; LASSERT (conn->uc_rx_nob_wanted != 0); do { usock_peer_t *peer = conn->uc_peer; LASSERT (conn->uc_rx_niov > 0); nob = libcfs_sock_readv(conn->uc_sock, conn->uc_rx_iov, conn->uc_rx_niov); if (nob <= 0) {/* read nothing or error */ if (nob < 0) conn->uc_errored = 1; return nob; } LASSERT (nob <= conn->uc_rx_nob_wanted); conn->uc_rx_nob_wanted -= nob; conn->uc_rx_nob_left -= nob; t = cfs_time_current(); conn->uc_rx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); if(peer != NULL) peer->up_last_alive = t; /* "consume" iov */ iov = conn->uc_rx_iov; do { LASSERT (conn->uc_rx_niov > 0); if (nob < iov->iov_len) { iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); iov->iov_len -= nob; break; } nob -= iov->iov_len; conn->uc_rx_iov = ++iov; conn->uc_rx_niov--; } while (nob != 0); } while (conn->uc_rx_nob_wanted != 0); return 1; /* read complete */ }
struct lc_watchdog *lc_watchdog_add(int timeout, void (*callback)(pid_t, void *), void *data) { struct lc_watchdog *lcw = NULL; ENTRY; LIBCFS_ALLOC(lcw, sizeof(*lcw)); if (lcw == NULL) { CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); RETURN(ERR_PTR(-ENOMEM)); } cfs_spin_lock_init(&lcw->lcw_lock); lcw->lcw_refcount = 1; /* refcount for owner */ lcw->lcw_task = cfs_current(); lcw->lcw_pid = cfs_curproc_pid(); lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; lcw->lcw_data = data; lcw->lcw_state = LC_WATCHDOG_DISABLED; CFS_INIT_LIST_HEAD(&lcw->lcw_list); cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw); cfs_down(&lcw_refcount_sem); if (++lcw_refcount == 1) lcw_dispatch_start(); cfs_up(&lcw_refcount_sem); /* Keep this working in case we enable them by default */ if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { lcw->lcw_last_touched = cfs_time_current(); cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) + cfs_time_current()); } RETURN(lcw); }
int osp_init_precreate(struct osp_device *d) { struct l_wait_info lwi = { 0 }; int rc; ENTRY; /* initially precreation isn't ready */ d->opd_pre_status = -EAGAIN; fid_zero(&d->opd_pre_used_fid); d->opd_pre_used_fid.f_oid = 1; fid_zero(&d->opd_pre_last_created_fid); d->opd_pre_last_created_fid.f_oid = 1; d->opd_pre_reserved = 0; d->opd_got_disconnected = 1; d->opd_pre_grow_slow = 0; d->opd_pre_grow_count = OST_MIN_PRECREATE; d->opd_pre_min_grow_count = OST_MIN_PRECREATE; d->opd_pre_max_grow_count = OST_MAX_PRECREATE; spin_lock_init(&d->opd_pre_lock); cfs_waitq_init(&d->opd_pre_waitq); cfs_waitq_init(&d->opd_pre_user_waitq); cfs_waitq_init(&d->opd_pre_thread.t_ctl_waitq); /* * Initialize statfs-related things */ d->opd_statfs_maxage = 5; /* default update interval */ d->opd_statfs_fresh_till = cfs_time_shift(-1000); CDEBUG(D_OTHER, "current %llu, fresh till %llu\n", (unsigned long long)cfs_time_current(), (unsigned long long)d->opd_statfs_fresh_till); cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d); /* * start thread handling precreation and statfs updates */ rc = cfs_create_thread(osp_precreate_thread, d, 0); if (rc < 0) { CERROR("can't start precreate thread %d\n", rc); RETURN(rc); } l_wait_event(d->opd_pre_thread.t_ctl_waitq, osp_precreate_running(d) || osp_precreate_stopped(d), &lwi); RETURN(0); }
static int lfsck_namespace_post(const struct lu_env *env, struct lfsck_component *com, int result, bool init) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; int rc; down_write(&com->lc_sem); spin_lock(&lfsck->li_lock); if (!init) ns->ln_pos_last_checkpoint = lfsck->li_pos_current; if (result > 0) { ns->ln_status = LS_SCANNING_PHASE2; ns->ln_flags |= LF_SCANNED_ONCE; ns->ln_flags &= ~LF_UPGRADE; cfs_list_del_init(&com->lc_link); cfs_list_del_init(&com->lc_link_dir); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_double_scan); } else if (result == 0) { ns->ln_status = lfsck->li_status; if (ns->ln_status == 0) ns->ln_status = LS_STOPPED; if (ns->ln_status != LS_PAUSED) { cfs_list_del_init(&com->lc_link); cfs_list_del_init(&com->lc_link_dir); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle); } } else { ns->ln_status = LS_FAILED; cfs_list_del_init(&com->lc_link); cfs_list_del_init(&com->lc_link_dir); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle); } spin_unlock(&lfsck->li_lock); if (!init) { ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + HALF_SEC - lfsck->li_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); ns->ln_items_checked += com->lc_new_checked; com->lc_new_checked = 0; } rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); return rc; }
/* Send as much tx data as possible. * Returns 0 or 1 on succsess, <0 if fatal error. * 0 means partial send or non-fatal error, 1 - complete. * Rely on libcfs_sock_writev() for differentiating fatal and * non-fatal errors. An error should be considered as non-fatal if: * 1) it still makes sense to continue reading && * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ int usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx) { struct iovec *iov; int nob; cfs_time_t t; LASSERT (tx->tx_resid != 0); do { usock_peer_t *peer = conn->uc_peer; LASSERT (tx->tx_niov > 0); nob = libcfs_sock_writev(conn->uc_sock, tx->tx_iov, tx->tx_niov); if (nob < 0) conn->uc_errored = 1; if (nob <= 0) /* write queue is flow-controlled or error */ return nob; LASSERT (nob <= tx->tx_resid); tx->tx_resid -= nob; t = cfs_time_current(); conn->uc_tx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); if(peer != NULL) peer->up_last_alive = t; /* "consume" iov */ iov = tx->tx_iov; do { LASSERT (tx->tx_niov > 0); if (nob < iov->iov_len) { iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); iov->iov_len -= nob; break; } nob -= iov->iov_len; tx->tx_iov = ++iov; tx->tx_niov--; } while (nob != 0); } while (tx->tx_resid != 0); return 1; /* send complete */ }
void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout) { ENTRY; LASSERT(lcw != NULL); lc_watchdog_del_pending(lcw); lcw_update_time(lcw, "resumed"); lcw->lcw_state = LC_WATCHDOG_ENABLED; cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() + cfs_time_seconds(timeout)); EXIT; }
static void lcw_dump_stack(struct lc_watchdog *lcw) { cfs_time_t current_time; cfs_duration_t delta_time; struct timeval timediff; current_time = cfs_time_current(); delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched); cfs_duration_usec(delta_time, &timediff); /* * Check to see if we should throttle the watchdog timer to avoid * too many dumps going to the console thus triggering an NMI. */ delta_time = cfs_duration_sec(cfs_time_sub(current_time, lcw_last_watchdog_time)); if (delta_time < libcfs_watchdog_ratelimit && lcw_recent_watchdog_count > 3) { LCONSOLE_WARN("Service thread pid %u was inactive for " "%lu.%.02lus. Watchdog stack traces are limited " "to 3 per %d seconds, skipping this one.\n", (int)lcw->lcw_pid, timediff.tv_sec, timediff.tv_usec / 10000, libcfs_watchdog_ratelimit); } else { if (delta_time < libcfs_watchdog_ratelimit) { lcw_recent_watchdog_count++; } else { memcpy(&lcw_last_watchdog_time, ¤t_time, sizeof(current_time)); lcw_recent_watchdog_count = 0; } LCONSOLE_WARN("Service thread pid %u was inactive for " "%lu.%.02lus. The thread might be hung, or it " "might only be slow and will resume later. " "Dumping the stack trace for debugging purposes:" "\n", (int)lcw->lcw_pid, timediff.tv_sec, timediff.tv_usec / 10000); lcw_dump(lcw); } }
static void ctx_start_timer_kr(struct ptlrpc_cli_ctx *ctx, long timeout) { struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx); struct timer_list *timer = gctx_kr->gck_timer; LASSERT(timer); CDEBUG(D_SEC, "ctx %p: start timer %lds\n", ctx, timeout); timeout = msecs_to_jiffies(timeout * MSEC_PER_SEC) + cfs_time_current(); init_timer(timer); timer->expires = timeout; timer->data = (unsigned long ) ctx; timer->function = ctx_upcall_timeout_kr; add_timer(timer); }
static void lcw_update_time(struct lc_watchdog *lcw, const char *message) { cfs_time_t newtime = cfs_time_current();; if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { struct timeval timediff; cfs_time_t delta_time = cfs_time_sub(newtime, lcw->lcw_last_touched); cfs_duration_usec(delta_time, &timediff); LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. " "This indicates the system was overloaded (too " "many service threads, or there were not enough " "hardware resources).\n", lcw->lcw_pid, message, timediff.tv_sec, timediff.tv_usec / 10000); } lcw->lcw_last_touched = newtime; }
/** * Prepare buffers for read. * * The function maps the range described by \a off and \a len to \a lnb array. * dmu_buf_hold_array_by_bonus() finds/creates appropriate ARC buffers, then * we fill \a lnb array with the pages storing ARC buffers. Notice the current * implementationt passes TRUE to dmu_buf_hold_array_by_bonus() to fill ARC * buffers with actual data, I/O is done in the conext of osd_bufs_get_read(). * A better implementation would just return the buffers (potentially unfilled) * and subsequent osd_read_prep() would do I/O for many ranges concurrently. * * \param[in] env environment * \param[in] obj object * \param[in] off offset in bytes * \param[in] len the number of bytes to access * \param[out] lnb array of local niobufs pointing to the buffers with data * * \retval 0 for success * \retval negative error number of failure */ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj, loff_t off, ssize_t len, struct niobuf_local *lnb) { struct osd_device *osd = osd_obj2dev(obj); unsigned long start = cfs_time_current(); int rc, i, numbufs, npages = 0; dmu_buf_t **dbp; ENTRY; record_start_io(osd, READ, 0); /* grab buffers for read: * OSD API let us to grab buffers first, then initiate IO(s) * so that all required IOs will be done in parallel, but at the * moment DMU doesn't provide us with a method to grab buffers. * If we discover this is a vital for good performance we * can get own replacement for dmu_buf_hold_array_by_bonus(). */ while (len > 0) { rc = -dmu_buf_hold_array_by_bonus(obj->oo_db, off, len, TRUE, osd_zerocopy_tag, &numbufs, &dbp); if (unlikely(rc)) GOTO(err, rc); for (i = 0; i < numbufs; i++) { int bufoff, tocpy, thispage; void *dbf = dbp[i]; LASSERT(len > 0); atomic_inc(&osd->od_zerocopy_pin); bufoff = off - dbp[i]->db_offset; tocpy = min_t(int, dbp[i]->db_size - bufoff, len); /* kind of trick to differentiate dbuf vs. arcbuf */ LASSERT(((unsigned long)dbp[i] & 1) == 0); dbf = (void *) ((unsigned long)dbp[i] | 1); while (tocpy > 0) { thispage = PAGE_CACHE_SIZE; thispage -= bufoff & (PAGE_CACHE_SIZE - 1); thispage = min(tocpy, thispage); lnb->lnb_rc = 0; lnb->lnb_file_offset = off; lnb->lnb_page_offset = bufoff & ~PAGE_MASK; lnb->lnb_len = thispage; lnb->lnb_page = kmem_to_page(dbp[i]->db_data + bufoff); /* mark just a single slot: we need this * reference to dbuf to be released once */ lnb->lnb_data = dbf; dbf = NULL; tocpy -= thispage; len -= thispage; bufoff += thispage; off += thispage; npages++; lnb++; } /* steal dbuf so dmu_buf_rele_array() can't release * it */ dbp[i] = NULL; } dmu_buf_rele_array(dbp, numbufs, osd_zerocopy_tag); } record_end_io(osd, READ, cfs_time_current() - start, npages * PAGE_SIZE, npages); RETURN(npages); err: LASSERT(rc < 0); osd_bufs_put(env, &obj->oo_dt, lnb - npages, npages); RETURN(rc); }
/* All actions that we need after receiving hello on passive conn: * 1) Stash peer's nid, pid, incarnation and conn type * 2) Cope with easy case: conn[idx] is empty - just save conn there * 3) Resolve race: * a) if our nid is higher - reply with CONN_NONE and make us zombie * b) if peer's nid is higher - postpone race resolution till * READY state * 4) Anyhow, send reply hello */ int usocklnd_passiveconn_hellorecv(usock_conn_t *conn) { ksock_hello_msg_t *hello = conn->uc_rx_hello; int type; int idx; int rc; usock_peer_t *peer; lnet_ni_t *ni = conn->uc_ni; __u32 peer_ip = conn->uc_peer_ip; __u16 peer_port = conn->uc_peer_port; /* don't know parent peer yet and not zombie */ LASSERT (conn->uc_peer == NULL && ni != NULL); /* don't know peer's nid and incarnation yet */ if (peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { /* do not trust liblustre clients */ conn->uc_peerid.pid = peer_port | LNET_PID_USERFLAG; conn->uc_peerid.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), peer_ip); if (hello->kshm_ctype != SOCKLND_CONN_ANY) { lnet_ni_decref(ni); conn->uc_ni = NULL; CERROR("Refusing to accept connection of type=%d from " "userspace process %u.%u.%u.%u:%d\n", hello->kshm_ctype, HIPQUAD(peer_ip), peer_port); return -EINVAL; } } else { conn->uc_peerid.pid = hello->kshm_src_pid; conn->uc_peerid.nid = hello->kshm_src_nid; } conn->uc_type = type = usocklnd_invert_type(hello->kshm_ctype); rc = usocklnd_find_or_create_peer(ni, conn->uc_peerid, &peer); if (rc) { lnet_ni_decref(ni); conn->uc_ni = NULL; return rc; } peer->up_last_alive = cfs_time_current(); idx = usocklnd_type2idx(conn->uc_type); /* safely check whether we're first */ pthread_mutex_lock(&peer->up_lock); usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, NULL); if (peer->up_conns[idx] == NULL) { peer->up_last_alive = cfs_time_current(); conn->uc_peer = peer; conn->uc_ni = NULL; usocklnd_link_conn_to_peer(conn, peer, idx); usocklnd_conn_addref(conn); } else { usocklnd_peer_decref(peer); /* Resolve race in favour of higher NID */ if (conn->uc_peerid.nid < conn->uc_ni->ni_nid) { /* make us zombie */ conn->uc_ni = NULL; type = SOCKLND_CONN_NONE; } /* if conn->uc_peerid.nid > conn->uc_ni->ni_nid, * postpone race resolution till READY state * (hopefully that conn[idx] will die because of * incoming hello of CONN_NONE type) */ } pthread_mutex_unlock(&peer->up_lock); /* allocate and initialize fake tx with hello */ conn->uc_tx_hello = usocklnd_create_hello_tx(ni, type, conn->uc_peerid.nid); if (conn->uc_ni == NULL) lnet_ni_decref(ni); if (conn->uc_tx_hello == NULL) return -ENOMEM; /* rc == 0 */ pthread_mutex_lock(&conn->uc_lock); if (conn->uc_state == UC_DEAD) goto passive_hellorecv_done; conn->uc_state = UC_SENDING_HELLO; conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout); conn->uc_tx_flag = 1; rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLOUT); passive_hellorecv_done: pthread_mutex_unlock(&conn->uc_lock); return rc; }
/* * we allocate the requested pages atomically. */ int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) { cfs_waitlink_t waitlink; unsigned long this_idle = -1; cfs_time_t tick = 0; long now; int p_idx, g_idx; int i; LASSERT(desc->bd_iov_count > 0); LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages); /* resent bulk, enc iov might have been allocated previously */ if (desc->bd_enc_iov != NULL) return 0; OBD_ALLOC(desc->bd_enc_iov, desc->bd_iov_count * sizeof(*desc->bd_enc_iov)); if (desc->bd_enc_iov == NULL) return -ENOMEM; cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_st_access++; again: if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) { if (tick == 0) tick = cfs_time_current(); now = cfs_time_current_sec(); page_pools.epp_st_missings++; page_pools.epp_pages_short += desc->bd_iov_count; if (enc_pools_should_grow(desc->bd_iov_count, now)) { page_pools.epp_growing = 1; cfs_spin_unlock(&page_pools.epp_lock); enc_pools_add_pages(page_pools.epp_pages_short / 2); cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_growing = 0; enc_pools_wakeup(); } else { if (++page_pools.epp_waitqlen > page_pools.epp_st_max_wqlen) page_pools.epp_st_max_wqlen = page_pools.epp_waitqlen; cfs_set_current_state(CFS_TASK_UNINT); cfs_waitlink_init(&waitlink); cfs_waitq_add(&page_pools.epp_waitq, &waitlink); cfs_spin_unlock(&page_pools.epp_lock); cfs_waitq_wait(&waitlink, CFS_TASK_UNINT); cfs_waitq_del(&page_pools.epp_waitq, &waitlink); LASSERT(page_pools.epp_waitqlen > 0); cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_waitqlen--; } LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count); page_pools.epp_pages_short -= desc->bd_iov_count; this_idle = 0; goto again; } /* record max wait time */ if (unlikely(tick != 0)) { tick = cfs_time_current() - tick; if (tick > page_pools.epp_st_max_wait) page_pools.epp_st_max_wait = tick; } /* proceed with rest of allocation */ page_pools.epp_free_pages -= desc->bd_iov_count; p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; for (i = 0; i < desc->bd_iov_count; i++) { LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); desc->bd_enc_iov[i].kiov_page = page_pools.epp_pools[p_idx][g_idx]; page_pools.epp_pools[p_idx][g_idx] = NULL; if (++g_idx == PAGES_PER_POOL) { p_idx++; g_idx = 0; } } if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) page_pools.epp_st_lowfree = page_pools.epp_free_pages; /* * new idle index = (old * weight + new) / (weight + 1) */ if (this_idle == -1) { this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX / page_pools.epp_total_pages; } page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT + this_idle) / (IDLE_IDX_WEIGHT + 1); page_pools.epp_last_access = cfs_time_current_sec(); cfs_spin_unlock(&page_pools.epp_lock); return 0; }
int libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t rcvlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0, }; cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket recv timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } then = cfs_time_current(); rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (rcvlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + rcvlen; nob -= rcvlen; } return 0; } int libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t sndlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, }; if (timeout != 0) { cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket send timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } } then = cfs_time_current(); rc = -sock_send(C2B_SOCK(sock), &msg, ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (sndlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + sndlen; nob -= sndlen; } return 0; } int libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) { struct sockaddr_in sin; int rc; if (remote != 0) /* Get remote address */ rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); else /* Get local address */ rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); if (rc != 0) { CERROR ("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; } if (ip != NULL) *ip = ntohl (sin.sin_addr.s_addr); if (port != NULL) *port = ntohs (sin.sin_port); return 0; } int libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) { int option; int rc; if (txbufsize != 0) { option = txbufsize; rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set send buffer %d: %d\n", option, rc); return (rc); } } if (rxbufsize != 0) { option = rxbufsize; rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set receive buffer %d: %d\n", option, rc); return (rc); } } return 0; }
int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_remote_perm *lrp = NULL, *tmp = NULL; struct hlist_head *head, *perm_hash = NULL; LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT); #if 0 if (perm->rp_uid != current->uid || perm->rp_gid != current->gid || perm->rp_fsuid != current->fsuid || perm->rp_fsgid != current->fsgid) { /* user might setxid in this small period */ CDEBUG(D_SEC, "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n", perm->rp_uid, perm->rp_gid, perm->rp_fsuid, perm->rp_fsgid, current->uid, current->gid, current->fsuid, current->fsgid); return -EAGAIN; } #endif if (!lli->lli_remote_perms) { perm_hash = alloc_rmtperm_hash(); if (perm_hash == NULL) { CERROR("alloc lli_remote_perms failed!\n"); return -ENOMEM; } } spin_lock(&lli->lli_lock); if (!lli->lli_remote_perms) lli->lli_remote_perms = perm_hash; else if (perm_hash) free_rmtperm_hash(perm_hash); head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid); again: hlist_for_each_entry(tmp, head, lrp_list) { if (tmp->lrp_uid != perm->rp_uid) continue; if (tmp->lrp_gid != perm->rp_gid) continue; if (tmp->lrp_fsuid != perm->rp_fsuid) continue; if (tmp->lrp_fsgid != perm->rp_fsgid) continue; if (lrp) free_ll_remote_perm(lrp); lrp = tmp; break; } if (!lrp) { spin_unlock(&lli->lli_lock); lrp = alloc_ll_remote_perm(); if (!lrp) { CERROR("alloc memory for ll_remote_perm failed!\n"); return -ENOMEM; } spin_lock(&lli->lli_lock); goto again; } lrp->lrp_access_perm = perm->rp_access_perm; if (lrp != tmp) { lrp->lrp_uid = perm->rp_uid; lrp->lrp_gid = perm->rp_gid; lrp->lrp_fsuid = perm->rp_fsuid; lrp->lrp_fsgid = perm->rp_fsgid; hlist_add_head(&lrp->lrp_list, head); } lli->lli_rmtperm_time = cfs_time_current(); spin_unlock(&lli->lli_lock); CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n", lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, lrp->lrp_access_perm); return 0; }
/* * statfs */ static inline int osp_statfs_need_update(struct osp_device *d) { return !cfs_time_before(cfs_time_current(), d->opd_statfs_fresh_till); }
static inline int capa_is_to_expire(struct obd_capa *ocapa) { return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current()); }
void osc_object_set_contended(struct osc_object *obj) { obj->oo_contention_time = cfs_time_current(); /* mb(); */ obj->oo_contended = 1; }
int LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms, lnet_event_t *event, int *which) { int i; int rc; #ifdef __KERNEL__ cfs_waitlink_t wl; cfs_time_t now; #else struct timeval then; struct timeval now; # ifdef HAVE_LIBPTHREAD struct timespec ts; # endif lnet_ni_t *eqwaitni = the_lnet.ln_eqwaitni; #endif ENTRY; LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); if (neq < 1) RETURN(-ENOENT); LNET_LOCK(); for (;;) { #ifndef __KERNEL__ LNET_UNLOCK(); /* Recursion breaker */ if (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING && !LNetHandleIsEqual(eventqs[0], the_lnet.ln_rc_eqh)) lnet_router_checker(); LNET_LOCK(); #endif for (i = 0; i < neq; i++) { lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]); if (eq == NULL) { LNET_UNLOCK(); RETURN(-ENOENT); } rc = lib_get_event (eq, event); if (rc != 0) { LNET_UNLOCK(); *which = i; RETURN(rc); } } #ifdef __KERNEL__ if (timeout_ms == 0) { LNET_UNLOCK(); RETURN (0); } cfs_waitlink_init(&wl); set_current_state(TASK_INTERRUPTIBLE); cfs_waitq_add(&the_lnet.ln_waitq, &wl); LNET_UNLOCK(); if (timeout_ms < 0) { cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE); } else { struct timeval tv; now = cfs_time_current(); cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE, cfs_time_seconds(timeout_ms)/1000); cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv); timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000; if (timeout_ms < 0) timeout_ms = 0; } LNET_LOCK(); cfs_waitq_del(&the_lnet.ln_waitq, &wl); #else if (eqwaitni != NULL) { /* I have a single NI that I have to call into, to get * events queued, or to block. */ lnet_ni_addref_locked(eqwaitni); LNET_UNLOCK(); if (timeout_ms <= 0) { (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); } else { gettimeofday(&then, NULL); (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); gettimeofday(&now, NULL); timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + (now.tv_usec - then.tv_usec) / 1000; if (timeout_ms < 0) timeout_ms = 0; } LNET_LOCK(); lnet_ni_decref_locked(eqwaitni); /* don't call into eqwaitni again if timeout has * expired */ if (timeout_ms == 0) eqwaitni = NULL; continue; /* go back and check for events */ } if (timeout_ms == 0) { LNET_UNLOCK(); RETURN (0); } # ifndef HAVE_LIBPTHREAD /* If I'm single-threaded, LNET fails at startup if it can't * set the_lnet.ln_eqwaitni correctly. */ LBUG(); # else if (timeout_ms < 0) { pthread_cond_wait(&the_lnet.ln_cond, &the_lnet.ln_lock); } else { gettimeofday(&then, NULL); ts.tv_sec = then.tv_sec + timeout_ms/1000; ts.tv_nsec = then.tv_usec * 1000 + (timeout_ms%1000) * 1000000; if (ts.tv_nsec >= 1000000000) { ts.tv_sec++; ts.tv_nsec -= 1000000000; } pthread_cond_timedwait(&the_lnet.ln_cond, &the_lnet.ln_lock, &ts); gettimeofday(&now, NULL); timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + (now.tv_usec - then.tv_usec) / 1000; if (timeout_ms < 0) timeout_ms = 0; } # endif #endif } }
int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, struct lustre_quota_ctxt *qctxt) { struct lustre_qunit_size *lqs = NULL; unsigned long *unit, *tune; signed long tmp = 0; cfs_time_t time_limit = 0, *shrink; int i, rc = 0; ENTRY; LASSERT(qctxt); lqs = quota_search_lqs(LQS_KEY(QAQ_IS_GRP(oqaq), oqaq->qaq_id), qctxt, QAQ_IS_CREATE_LQS(oqaq) ? 1 : 0); if (lqs == NULL || IS_ERR(lqs)){ CERROR("fail to find a lqs for %sid %u!\n", QAQ_IS_GRP(oqaq) ? "g" : "u", oqaq->qaq_id); RETURN(PTR_ERR(lqs)); } CDEBUG(D_QUOTA, "before: bunit: %lu, iunit: %lu.\n", lqs->lqs_bunit_sz, lqs->lqs_iunit_sz); cfs_spin_lock(&lqs->lqs_lock); for (i = 0; i < 2; i++) { if (i == 0 && !QAQ_IS_ADJBLK(oqaq)) continue; if (i == 1 && !QAQ_IS_ADJINO(oqaq)) continue; tmp = i ? (lqs->lqs_iunit_sz - oqaq->qaq_iunit_sz) : (lqs->lqs_bunit_sz - oqaq->qaq_bunit_sz); shrink = i ? &lqs->lqs_last_ishrink : &lqs->lqs_last_bshrink; time_limit = cfs_time_add(i ? lqs->lqs_last_ishrink : lqs->lqs_last_bshrink, cfs_time_seconds(qctxt->lqc_switch_seconds)); unit = i ? &lqs->lqs_iunit_sz : &lqs->lqs_bunit_sz; tune = i ? &lqs->lqs_itune_sz : &lqs->lqs_btune_sz; /* quota master shrinks */ if (qctxt->lqc_handler && tmp > 0) *shrink = cfs_time_current(); /* quota master enlarges */ if (qctxt->lqc_handler && tmp < 0) { /* in case of ping-pong effect, don't enlarge lqs * in a short time */ if (*shrink && cfs_time_before(cfs_time_current(), time_limit)) tmp = 0; } /* when setquota, don't enlarge lqs b=18616 */ if (QAQ_IS_CREATE_LQS(oqaq) && tmp < 0) tmp = 0; if (tmp != 0) { *unit = i ? oqaq->qaq_iunit_sz : oqaq->qaq_bunit_sz; *tune = (*unit) / 2; } if (tmp > 0) rc |= i ? LQS_INO_DECREASE : LQS_BLK_DECREASE; if (tmp < 0) rc |= i ? LQS_INO_INCREASE : LQS_BLK_INCREASE; } cfs_spin_unlock(&lqs->lqs_lock); CDEBUG(D_QUOTA, "after: bunit: %lu, iunit: %lu.\n", lqs->lqs_bunit_sz, lqs->lqs_iunit_sz); lqs_putref(lqs); RETURN(rc); }
static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp, const unsigned int id[], int pending[], int count, quota_acquire acquire, struct obd_trans_info *oti, int isblk, struct inode *inode, int frags) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; struct timeval work_start; struct timeval work_end; long timediff; struct l_wait_info lwi = { 0 }; int rc = 0, cycle = 0, count_err = 1; ENTRY; if (!quota_is_set(obd, id, isblk ? QB_SET : QI_SET)) RETURN(0); if (isblk && (exp->exp_failed || exp->exp_abort_active_req)) /* If the client has been evicted or if it * timed out and tried to reconnect already, * abort the request immediately */ RETURN(-ENOTCONN); CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name); pending[USRQUOTA] = pending[GRPQUOTA] = 0; /* Unfortunately, if quota master is too busy to handle the * pre-dqacq in time and quota hash on ost is used up, we * have to wait for the completion of in flight dqacq/dqrel, * in order to get enough quota for write b=12588 */ cfs_gettimeofday(&work_start); while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk, inode, frags)) & QUOTA_RET_ACQUOTA) { cfs_spin_lock(&qctxt->lqc_lock); if (!qctxt->lqc_import && oti) { cfs_spin_unlock(&qctxt->lqc_lock); LASSERT(oti && oti->oti_thread && oti->oti_thread->t_watchdog); lc_watchdog_disable(oti->oti_thread->t_watchdog); CDEBUG(D_QUOTA, "sleep for quota master\n"); l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt), &lwi); CDEBUG(D_QUOTA, "wake up when quota master is back\n"); lc_watchdog_touch(oti->oti_thread->t_watchdog, CFS_GET_TIMEOUT(oti->oti_thread->t_svc)); } else { cfs_spin_unlock(&qctxt->lqc_lock); } cycle++; if (isblk) OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90); /* after acquire(), we should run quota_check_common again * so that we confirm there are enough quota to finish write */ rc = acquire(obd, id, oti, isblk); /* please reference to dqacq_completion for the below */ /* a new request is finished, try again */ if (rc == QUOTA_REQ_RETURNED) { CDEBUG(D_QUOTA, "finish a quota req, try again\n"); continue; } /* it is out of quota already */ if (rc == -EDQUOT) { CDEBUG(D_QUOTA, "out of quota, return -EDQUOT\n"); break; } /* Related quota has been disabled by master, but enabled by * slave, do not try again. */ if (unlikely(rc == -ESRCH)) { CERROR("mismatched quota configuration, stop try.\n"); break; } if (isblk && (exp->exp_failed || exp->exp_abort_active_req)) /* The client has been evicted or tried to * to reconnect already, abort the request */ RETURN(-ENOTCONN); /* -EBUSY and others, wait a second and try again */ if (rc < 0) { cfs_waitq_t waitq; struct l_wait_info lwi; if (oti && oti->oti_thread && oti->oti_thread->t_watchdog) lc_watchdog_touch(oti->oti_thread->t_watchdog, CFS_GET_TIMEOUT(oti->oti_thread->t_svc)); CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc, count_err++); cfs_waitq_init(&waitq); lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL, NULL); l_wait_event(waitq, 0, &lwi); } if (rc < 0 || cycle % 10 == 0) { cfs_spin_lock(&last_print_lock); if (last_print == 0 || cfs_time_before((last_print + cfs_time_seconds(30)), cfs_time_current())) { last_print = cfs_time_current(); cfs_spin_unlock(&last_print_lock); CWARN("still haven't managed to acquire quota " "space from the quota master after %d " "retries (err=%d, rc=%d)\n", cycle, count_err - 1, rc); } else { cfs_spin_unlock(&last_print_lock); } } CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc, cycle); } cfs_gettimeofday(&work_end); timediff = cfs_timeval_sub(&work_end, &work_start, NULL); lprocfs_counter_add(qctxt->lqc_stats, isblk ? LQUOTA_WAIT_FOR_CHK_BLK : LQUOTA_WAIT_FOR_CHK_INO, timediff); if (rc > 0) rc = 0; RETURN(rc); }
/* * called to reserve object in the pool * return codes: * ENOSPC - no space on corresponded OST * EAGAIN - precreation is in progress, try later * EIO - no access to OST */ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d) { struct l_wait_info lwi; cfs_time_t expire = cfs_time_shift(obd_timeout); int precreated, rc; ENTRY; LASSERTF(osp_objs_precreated(env, d) >= 0, "Last created FID "DFID "Next FID "DFID"\n", PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_pre_used_fid)); /* * wait till: * - preallocation is done * - no free space expected soon * - can't connect to OST for too long (obd_timeout) * - OST can allocate fid sequence. */ while ((rc = d->opd_pre_status) == 0 || rc == -ENOSPC || rc == -ENODEV || rc == -EAGAIN || rc == -ENOTCONN) { /* * increase number of precreations */ precreated = osp_objs_precreated(env, d); if (d->opd_pre_grow_count < d->opd_pre_max_grow_count && d->opd_pre_grow_slow == 0 && precreated <= (d->opd_pre_grow_count / 4 + 1)) { spin_lock(&d->opd_pre_lock); d->opd_pre_grow_slow = 1; d->opd_pre_grow_count *= 2; spin_unlock(&d->opd_pre_lock); } spin_lock(&d->opd_pre_lock); precreated = osp_objs_precreated(env, d); if (precreated > d->opd_pre_reserved && !d->opd_pre_recovering) { d->opd_pre_reserved++; spin_unlock(&d->opd_pre_lock); rc = 0; /* XXX: don't wake up if precreation is in progress */ if (osp_precreate_near_empty_nolock(env, d) && !osp_precreate_end_seq_nolock(env, d)) wake_up(&d->opd_pre_waitq); break; } spin_unlock(&d->opd_pre_lock); /* * all precreated objects have been used and no-space * status leave us no chance to succeed very soon * but if there is destroy in progress, then we should * wait till that is done - some space might be released */ if (unlikely(rc == -ENOSPC)) { if (d->opd_syn_changes) { /* force local commit to release space */ dt_commit_async(env, d->opd_storage); } if (d->opd_syn_rpc_in_progress) { /* just wait till destroys are done */ /* see l_wait_even() few lines below */ } if (d->opd_syn_changes + d->opd_syn_rpc_in_progress == 0) { /* no hope for free space */ break; } } /* XXX: don't wake up if precreation is in progress */ wake_up(&d->opd_pre_waitq); lwi = LWI_TIMEOUT(expire - cfs_time_current(), osp_precreate_timeout_condition, d); if (cfs_time_aftereq(cfs_time_current(), expire)) { rc = -ETIMEDOUT; break; } l_wait_event(d->opd_pre_user_waitq, osp_precreate_ready_condition(env, d), &lwi); } RETURN(rc); }