/** * Server side bulk abort. Idempotent. Not thread-safe (i.e. only * serialises with completion callback) */ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) { struct l_wait_info lwi; int rc; LASSERT(!in_interrupt()); /* might sleep */ if (!ptlrpc_server_bulk_active(desc)) /* completed or */ return; /* never started */ /* We used to poison the pages with 0xab here because we did not want to * send any meaningful data over the wire for evicted clients (bug 9297) * However, this is no longer safe now that we use the page cache on the * OSS (bug 20560) */ /* The unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still l_wait_event() in this case, to give liblustre * a chance to run server_bulk_callback()*/ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); rc = l_wait_event(desc->bd_waitq, !ptlrpc_server_bulk_active(desc), &lwi); if (rc == 0) return; LASSERT(rc == -ETIMEDOUT); CWARN("Unexpectedly long timeout: desc %p\n", desc); } }
/** * Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). * Returns 1 on success or 0 if network unregistration failed for whatever * reason. */ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; wait_queue_head_t *wq; struct l_wait_info lwi; int rc; LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ return 1; /* never registered */ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still l_wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ return 1; /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK); /* Do not wait for unlink to finish. */ if (async) return 0; if (req->rq_set) wq = &req->rq_set->set_waitq; else wq = &req->rq_reply_waitq; for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish LNDs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi); if (rc == 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); return 1; } LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } return 0; }
/** * Callback handler for receiving incoming glimpse ASTs. * * This only can happen on client side. After handling the glimpse AST * we also consider dropping the lock here if it is unused locally for a * long time. */ static void ldlm_handle_gl_callback(struct ptlrpc_request *req, struct ldlm_namespace *ns, struct ldlm_request *dlm_req, struct ldlm_lock *lock) { int rc = -ENOSYS; LDLM_DEBUG(lock, "client glimpse AST callback handler"); if (lock->l_glimpse_ast != NULL) rc = lock->l_glimpse_ast(lock, req); if (req->rq_repmsg != NULL) { ptlrpc_reply(req); } else { req->rq_status = rc; ptlrpc_error(req); } lock_res_and_lock(lock); if (lock->l_granted_mode == LCK_PW && !lock->l_readers && !lock->l_writers && cfs_time_after(cfs_time_current(), cfs_time_add(lock->l_last_used, cfs_time_seconds(10)))) { unlock_res_and_lock(lock); if (ldlm_bl_to_thread_lock(ns, NULL, lock)) ldlm_handle_bl_callback(ns, NULL, lock); return; } unlock_res_and_lock(lock); LDLM_LOCK_RELEASE(lock); }
static int proc_trigger_stack_reset(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int rc = 0; int i = 1; kgn_device_t *dev; ENTRY; if (!write) { /* read */ rc = proc_dointvec(table, write, buffer, lenp, ppos); RETURN(rc); } /* only device 0 gets the handle, see kgnilnd_dev_init */ dev = &kgnilnd_data.kgn_devices[0]; LASSERTF(dev != NULL, "dev 0 is NULL\n"); kgnilnd_critical_error(dev->gnd_err_handle); /* Wait for the reset to complete. This prevents any races in testing * where we'd immediately try to send traffic again */ while (kgnilnd_data.kgn_needs_reset != 0) { i++; LCONSOLE((((i) & (-i)) == i) ? D_WARNING : D_NET, "Waiting for stack reset request to clear\n"); cfs_pause(cfs_time_seconds(1 * i)); } RETURN(rc); }
int osc_object_is_contended(struct osc_object *obj) { struct osc_device *dev = lu2osc_dev(obj->oo_cl.co_lu.lo_dev); int osc_contention_time = dev->od_contention_time; unsigned long cur_time = cfs_time_current(); unsigned long retry_time; if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION)) return 1; if (!obj->oo_contended) return 0; /* * I like copy-paste. the code is copied from * ll_file_is_contended. */ retry_time = cfs_time_add(obj->oo_contention_time, cfs_time_seconds(osc_contention_time)); if (cfs_time_after(cur_time, retry_time)) { osc_object_clear_contended(obj); return 0; } return 1; }
static int proc_console_min_delay_cs(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int rc, min_delay_cs; struct ctl_table dummy = *table; long d; dummy.data = &min_delay_cs; dummy.proc_handler = &proc_dointvec; if (!write) { /* read */ min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100); rc = proc_dointvec(&dummy, write, buffer, lenp, ppos); return rc; } /* write */ min_delay_cs = 0; rc = proc_dointvec(&dummy, write, buffer, lenp, ppos); if (rc < 0) return rc; if (min_delay_cs <= 0) return -EINVAL; d = cfs_time_seconds(min_delay_cs) / 100; if (d == 0 || d > libcfs_console_max_delay) return -EINVAL; libcfs_console_min_delay = d; return rc; }
void ptlrpc_ni_fini(void) { wait_queue_head_t waitq; struct l_wait_info lwi; int rc; int retries; /* Wait for the event queue to become idle since there may still be * messages in flight with pending events (i.e. the fire-and-forget * messages == client requests and "non-difficult" server * replies */ for (retries = 0;; retries++) { rc = LNetEQFree(ptlrpc_eq_h); switch (rc) { default: LBUG(); case 0: LNetNIFini(); return; case -EBUSY: if (retries != 0) CWARN("Event queue still busy\n"); /* Wait for a bit */ init_waitqueue_head(&waitq); lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); l_wait_event(waitq, 0, &lwi); break; } } /* notreached */ }
void cfs_wi_shutdown (void) { int i; if (cfs_wi_data.wi_scheds == NULL) return; for (i = 0; i < cfs_wi_data.wi_nsched; i++) cfs_wi_sched_shutdown(&cfs_wi_data.wi_scheds[i]); #ifdef __KERNEL__ cfs_spin_lock(&cfs_wi_data.wi_glock); i = 2; while (cfs_wi_data.wi_nthreads != 0) { CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET, "waiting for %d threads to terminate\n", cfs_wi_data.wi_nthreads); cfs_spin_unlock(&cfs_wi_data.wi_glock); cfs_pause(cfs_time_seconds(1)); cfs_spin_lock(&cfs_wi_data.wi_glock); } cfs_spin_unlock(&cfs_wi_data.wi_glock); #endif LIBCFS_FREE(cfs_wi_data.wi_scheds, cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t)); return; }
int LL_PROC_PROTO(proc_console_min_delay_cs) { int rc, min_delay_cs; cfs_sysctl_table_t dummy = *table; cfs_duration_t d; dummy.data = &min_delay_cs; dummy.proc_handler = &proc_dointvec; if (!write) { /* read */ min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100); rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); return rc; } /* write */ min_delay_cs = 0; rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); if (rc < 0) return rc; if (min_delay_cs <= 0) return -EINVAL; d = cfs_time_seconds(min_delay_cs) / 100; if (d == 0 || d > libcfs_console_max_delay) return -EINVAL; libcfs_console_min_delay = d; return rc; }
static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev, struct lu_fid *fid, uint64_t *oid) { struct l_wait_info lwi = { 0 }; struct lustre_scrub *scrub = &dev->od_scrub; struct ptlrpc_thread *thread = &scrub->os_thread; struct osd_otable_it *it = dev->od_otable_it; struct lustre_mdt_attrs *lma = NULL; nvlist_t *nvbuf = NULL; int size = 0; int rc = 0; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) { lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL); if (likely(lwi.lwi_timeout > 0)) { l_wait_event(thread->t_ctl_waitq, !list_empty(&scrub->os_inconsistent_items) || !thread_is_running(thread), &lwi); if (unlikely(!thread_is_running(thread))) RETURN(SCRUB_NEXT_EXIT); } } if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) { spin_lock(&scrub->os_lock); thread_set_flags(thread, SVC_STOPPING); spin_unlock(&scrub->os_lock); RETURN(SCRUB_NEXT_CRASH); } if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL)) RETURN(SCRUB_NEXT_FATAL); again: if (nvbuf) { nvlist_free(nvbuf); nvbuf = NULL; lma = NULL; } if (!list_empty(&scrub->os_inconsistent_items)) { spin_lock(&scrub->os_lock); if (likely(!list_empty(&scrub->os_inconsistent_items))) { struct osd_inconsistent_item *oii; oii = list_entry(scrub->os_inconsistent_items.next, struct osd_inconsistent_item, oii_list); *fid = oii->oii_cache.oic_fid; *oid = oii->oii_cache.oic_dnode; scrub->os_in_prior = 1; spin_unlock(&scrub->os_lock); GOTO(out, rc = 0); } spin_unlock(&scrub->os_lock); }
/* Read from wire as much data as possible. * Returns 0 or 1 on succsess, <0 if error or EOF. * 0 means partial read, 1 - complete */ int usocklnd_read_data(usock_conn_t *conn) { struct iovec *iov; int nob; cfs_time_t t; LASSERT (conn->uc_rx_nob_wanted != 0); do { usock_peer_t *peer = conn->uc_peer; LASSERT (conn->uc_rx_niov > 0); nob = libcfs_sock_readv(conn->uc_sock, conn->uc_rx_iov, conn->uc_rx_niov); if (nob <= 0) {/* read nothing or error */ if (nob < 0) conn->uc_errored = 1; return nob; } LASSERT (nob <= conn->uc_rx_nob_wanted); conn->uc_rx_nob_wanted -= nob; conn->uc_rx_nob_left -= nob; t = cfs_time_current(); conn->uc_rx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); if(peer != NULL) peer->up_last_alive = t; /* "consume" iov */ iov = conn->uc_rx_iov; do { LASSERT (conn->uc_rx_niov > 0); if (nob < iov->iov_len) { iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); iov->iov_len -= nob; break; } nob -= iov->iov_len; conn->uc_rx_iov = ++iov; conn->uc_rx_niov--; } while (nob != 0); } while (conn->uc_rx_nob_wanted != 0); return 1; /* read complete */ }
int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) { int ret; ret = __cfs_fail_check_set(id, value, set); if (ret && likely(ms > 0)) { CERROR("cfs_fail_timeout id %x sleeping for %dms\n", id, ms); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(ms) / 1000); CERROR("cfs_fail_timeout id %x awake\n", id); } return ret; }
/* Send as much tx data as possible. * Returns 0 or 1 on succsess, <0 if fatal error. * 0 means partial send or non-fatal error, 1 - complete. * Rely on libcfs_sock_writev() for differentiating fatal and * non-fatal errors. An error should be considered as non-fatal if: * 1) it still makes sense to continue reading && * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ int usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx) { struct iovec *iov; int nob; cfs_time_t t; LASSERT (tx->tx_resid != 0); do { usock_peer_t *peer = conn->uc_peer; LASSERT (tx->tx_niov > 0); nob = libcfs_sock_writev(conn->uc_sock, tx->tx_iov, tx->tx_niov); if (nob < 0) conn->uc_errored = 1; if (nob <= 0) /* write queue is flow-controlled or error */ return nob; LASSERT (nob <= tx->tx_resid); tx->tx_resid -= nob; t = cfs_time_current(); conn->uc_tx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); if(peer != NULL) peer->up_last_alive = t; /* "consume" iov */ iov = tx->tx_iov; do { LASSERT (tx->tx_niov > 0); if (nob < iov->iov_len) { iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); iov->iov_len -= nob; break; } nob -= iov->iov_len; tx->tx_iov = ++iov; tx->tx_niov--; } while (nob != 0); } while (tx->tx_resid != 0); return 1; /* send complete */ }
void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout) { ENTRY; LASSERT(lcw != NULL); lc_watchdog_del_pending(lcw); lcw_update_time(lcw, "resumed"); lcw->lcw_state = LC_WATCHDOG_ENABLED; cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() + cfs_time_seconds(timeout)); EXIT; }
int __obd_fail_timeout_set(__u32 id, __u32 value, int ms, int set) { int ret = 0; ret = __obd_fail_check_set(id, value, set); if (ret) { CERROR("obd_fail_timeout id %x sleeping for %dms\n", id, ms); cfs_schedule_timeout_and_set_state(CFS_TASK_UNINT, cfs_time_seconds(ms) / 1000); cfs_set_current_state(CFS_TASK_RUNNING); CERROR("obd_fail_timeout id %x awake\n", id); } return ret; }
static int param_set_delay_minmax(const char *val, const struct kernel_param *kp, long min, long max) { long d; int sec; int rc; rc = kstrtoint(val, 0, &sec); if (rc) return -EINVAL; d = cfs_time_seconds(sec) / 100; if (d < min || d > max) return -EINVAL; *((unsigned int *)kp->arg) = d; return 0; }
struct lc_watchdog *lc_watchdog_add(int timeout, void (*callback)(pid_t, void *), void *data) { struct lc_watchdog *lcw = NULL; ENTRY; LIBCFS_ALLOC(lcw, sizeof(*lcw)); if (lcw == NULL) { CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); RETURN(ERR_PTR(-ENOMEM)); } cfs_spin_lock_init(&lcw->lcw_lock); lcw->lcw_refcount = 1; /* refcount for owner */ lcw->lcw_task = cfs_current(); lcw->lcw_pid = cfs_curproc_pid(); lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; lcw->lcw_data = data; lcw->lcw_state = LC_WATCHDOG_DISABLED; CFS_INIT_LIST_HEAD(&lcw->lcw_list); cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw); cfs_down(&lcw_refcount_sem); if (++lcw_refcount == 1) lcw_dispatch_start(); cfs_up(&lcw_refcount_sem); /* Keep this working in case we enable them by default */ if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { lcw->lcw_last_touched = cfs_time_current(); cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) + cfs_time_current()); } RETURN(lcw); }
/* Request sequence-controller node to allocate new meta-sequence. */ static int seq_client_alloc_meta(const struct lu_env *env, struct lu_client_seq *seq) { int rc; ENTRY; if (seq->lcs_srv) { #ifdef HAVE_SEQ_SERVER LASSERT(env != NULL); rc = seq_server_alloc_meta(seq->lcs_srv, &seq->lcs_space, env); #else rc = 0; #endif } else { do { /* If meta server return -EINPROGRESS or EAGAIN, * it means meta server might not be ready to * allocate super sequence from sequence controller * (MDT0)yet */ rc = seq_client_rpc(seq, &seq->lcs_space, SEQ_ALLOC_META, "meta"); if (rc == -EINPROGRESS || rc == -EAGAIN) { wait_queue_head_t waitq; struct l_wait_info lwi; /* MDT0 is not ready, let's wait for 2 * seconds and retry. */ init_waitqueue_head(&waitq); lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); l_wait_event(waitq, 0, &lwi); } } while (rc == -EINPROGRESS || rc == -EAGAIN); } RETURN(rc); }
/** * Callback handler for receiving incoming completion ASTs. * * This only can happen on client side. */ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, struct ldlm_namespace *ns, struct ldlm_request *dlm_req, struct ldlm_lock *lock) { int lvb_len; LIST_HEAD(ast_list); int rc = 0; LDLM_DEBUG(lock, "client completion callback handler START"); if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { int to = cfs_time_seconds(1); while (to > 0) { schedule_timeout_and_set_state( TASK_INTERRUPTIBLE, to); if (lock->l_granted_mode == lock->l_req_mode || lock->l_flags & LDLM_FL_DESTROYED) break; } } lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT); if (lvb_len < 0) { LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len); GOTO(out, rc = lvb_len); } else if (lvb_len > 0) { if (lock->l_lvb_len > 0) { /* for extent lock, lvb contains ost_lvb{}. */ LASSERT(lock->l_lvb_data != NULL); if (unlikely(lock->l_lvb_len < lvb_len)) { LDLM_ERROR(lock, "Replied LVB is larger than " "expectation, expected = %d, " "replied = %d", lock->l_lvb_len, lvb_len); GOTO(out, rc = -EINVAL); } } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has * variable length */ void *lvb_data; OBD_ALLOC(lvb_data, lvb_len); if (lvb_data == NULL) { LDLM_ERROR(lock, "No memory: %d.\n", lvb_len); GOTO(out, rc = -ENOMEM); } lock_res_and_lock(lock); LASSERT(lock->l_lvb_data == NULL); lock->l_lvb_data = lvb_data; lock->l_lvb_len = lvb_len; unlock_res_and_lock(lock); } } lock_res_and_lock(lock); if ((lock->l_flags & LDLM_FL_DESTROYED) || lock->l_granted_mode == lock->l_req_mode) { /* bug 11300: the lock has already been granted */ unlock_res_and_lock(lock); LDLM_DEBUG(lock, "Double grant race happened"); GOTO(out, rc = 0); } /* If we receive the completion AST before the actual enqueue returned, * then we might need to switch lock modes, resources, or extents. */ if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; LDLM_DEBUG(lock, "completion AST, new lock mode"); } if (lock->l_resource->lr_type != LDLM_PLAIN) { ldlm_convert_policy_to_local(req->rq_export, dlm_req->lock_desc.l_resource.lr_type, &dlm_req->lock_desc.l_policy_data, &lock->l_policy_data); LDLM_DEBUG(lock, "completion AST, new policy data"); } ldlm_resource_unlink_lock(lock); if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, &lock->l_resource->lr_name, sizeof(lock->l_resource->lr_name)) != 0) { unlock_res_and_lock(lock); rc = ldlm_lock_change_resource(ns, lock, &dlm_req->lock_desc.l_resource.lr_name); if (rc < 0) { LDLM_ERROR(lock, "Failed to allocate resource"); GOTO(out, rc); } LDLM_DEBUG(lock, "completion AST, new resource"); CERROR("change resource!\n"); lock_res_and_lock(lock); } if (dlm_req->lock_flags & LDLM_FL_AST_SENT) { /* BL_AST locks are not needed in LRU. * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; LDLM_DEBUG(lock, "completion AST includes blocking AST"); } if (lock->l_lvb_len > 0) { rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT, lock->l_lvb_data, lvb_len); if (rc < 0) { unlock_res_and_lock(lock); GOTO(out, rc); } } ldlm_grant_lock(lock, &ast_list); unlock_res_and_lock(lock); LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work"); /* Let Enqueue to call osc_lock_upcall() and initialize * l_ast_data */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2); ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST); LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)", lock); GOTO(out, rc); out: if (rc < 0) { lock_res_and_lock(lock); lock->l_flags |= LDLM_FL_FAILED; unlock_res_and_lock(lock); wake_up(&lock->l_waitq); } LDLM_LOCK_RELEASE(lock); }
/* * Release qsd_qtype_info structure which contains data associated with a * given quota type. This releases the accounting objects. * It's called on OSD cleanup when the qsd instance is released. * * \param env - is the environment passed by the caller * \param qsd - is the qsd instance managing the qsd_qtype_info structure * to be released * \param qtype - is the quota type to be shutdown */ static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd, int qtype) { struct qsd_qtype_info *qqi; int repeat = 0; ENTRY; if (qsd->qsd_type_array[qtype] == NULL) RETURN_EXIT; qqi = qsd->qsd_type_array[qtype]; qsd->qsd_type_array[qtype] = NULL; /* all deferred work lists should be empty */ LASSERT(cfs_list_empty(&qqi->qqi_deferred_glb)); LASSERT(cfs_list_empty(&qqi->qqi_deferred_slv)); /* shutdown lquota site */ if (qqi->qqi_site != NULL && !IS_ERR(qqi->qqi_site)) { lquota_site_free(env, qqi->qqi_site); qqi->qqi_site = NULL; } /* The qqi may still be holding by global locks which are being * canceled asynchronously (LU-4365), see the following steps: * * - On server umount, we try to clear all quota locks first by * disconnecting LWP (which will invalidate import and cleanup * all locks on it), however, if quota reint process is holding * the global lock for reintegration at that time, global lock * will fail to be cleared on LWP disconnection. * * - Umount process goes on and stops reint process, the global * lock will be dropped on reint process exit, however, the lock * cancel in done in asynchronous way, so the * qsd_glb_blocking_ast() might haven't been called yet when we * get here. */ while (atomic_read(&qqi->qqi_ref) > 1) { CDEBUG(D_QUOTA, "qqi reference count %u, repeat: %d\n", atomic_read(&qqi->qqi_ref), repeat); repeat++; schedule_timeout_and_set_state(TASK_INTERRUPTIBLE, cfs_time_seconds(1)); } /* by now, all qqi users should have gone away */ LASSERT(atomic_read(&qqi->qqi_ref) == 1); lu_ref_fini(&qqi->qqi_reference); /* release accounting object */ if (qqi->qqi_acct_obj != NULL && !IS_ERR(qqi->qqi_acct_obj)) { lu_object_put(env, &qqi->qqi_acct_obj->do_lu); qqi->qqi_acct_obj = NULL; } /* release slv index */ if (qqi->qqi_slv_obj != NULL && !IS_ERR(qqi->qqi_slv_obj)) { lu_object_put(env, &qqi->qqi_slv_obj->do_lu); qqi->qqi_slv_obj = NULL; qqi->qqi_slv_ver = 0; } /* release global index */ if (qqi->qqi_glb_obj != NULL && !IS_ERR(qqi->qqi_glb_obj)) { lu_object_put(env, &qqi->qqi_glb_obj->do_lu); qqi->qqi_glb_obj = NULL; qqi->qqi_glb_ver = 0; } OBD_FREE_PTR(qqi); EXIT; }
static int lfsck_master_oit_engine(const struct lu_env *env, struct lfsck_instance *lfsck) { struct lfsck_thread_info *info = lfsck_env_info(env); const struct dt_it_ops *iops = &lfsck->li_obj_oit->do_index_ops->dio_it; struct dt_it *di = lfsck->li_di_oit; struct lu_fid *fid = &info->lti_fid; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct ptlrpc_thread *thread = &lfsck->li_thread; __u32 idx = lfsck_dev_idx(lfsck->li_bottom); int rc; ENTRY; do { struct dt_object *target; bool update_lma = false; if (lfsck->li_di_dir != NULL) { rc = lfsck_master_dir_engine(env, lfsck); if (rc <= 0) RETURN(rc); } if (unlikely(lfsck->li_oit_over)) RETURN(1); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) && cfs_fail_val > 0) { struct l_wait_info lwi; lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL); l_wait_event(thread->t_ctl_waitq, !thread_is_running(thread), &lwi); } if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) RETURN(0); lfsck->li_current_oit_processed = 1; lfsck->li_new_scanned++; rc = iops->rec(env, di, (struct dt_rec *)fid, 0); if (rc != 0) { lfsck_fail(env, lfsck, true); if (rc < 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); else goto checkpoint; } if (fid_is_idif(fid)) { __u32 idx1 = fid_idif_ost_idx(fid); LASSERT(!lfsck->li_master); /* It is an old format device, update the LMA. */ if (idx != idx1) { struct ost_id *oi = &info->lti_oi; fid_to_ostid(fid, oi); ostid_to_fid(fid, oi, idx); update_lma = true; } } else if (!fid_is_norm(fid) && !fid_is_igif(fid) && !fid_is_last_id(fid) && !fid_is_root(fid) && !fid_seq_is_dot(fid_seq(fid))) { /* If the FID/object is only used locally and invisible * to external nodes, then LFSCK will not handle it. */ goto checkpoint; } target = lfsck_object_find(env, lfsck, fid); if (target == NULL) { goto checkpoint; } else if (IS_ERR(target)) { lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) RETURN(PTR_ERR(target)); else goto checkpoint; } /* XXX: Currently, skip remote object, the consistency for * remote object will be processed in LFSCK phase III. */ if (dt_object_exists(target) && !dt_object_remote(target)) { if (update_lma) rc = lfsck_update_lma(env, lfsck, target); if (rc == 0) rc = lfsck_exec_oit(env, lfsck, target); } lfsck_object_put(env, target); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); checkpoint: rc = lfsck_checkpoint(env, lfsck); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); /* Rate control. */ lfsck_control_speed(lfsck); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) { spin_lock(&lfsck->li_lock); thread_set_flags(thread, SVC_STOPPING); spin_unlock(&lfsck->li_lock); RETURN(-EINVAL); } rc = iops->next(env, di); if (unlikely(rc > 0)) lfsck->li_oit_over = 1; else if (likely(rc == 0)) lfsck->li_current_oit_processed = 0; if (unlikely(!thread_is_running(thread))) RETURN(0); } while (rc == 0 || lfsck->li_di_dir != NULL); RETURN(rc); }
int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask, const char *file, const char *fn, const int line, const char *format1, va_list args, const char *format2, ...) { struct cfs_trace_cpu_data *tcd = NULL; struct ptldebug_header header = {0}; struct cfs_trace_page *tage; /* string_buf is used only if tcd != NULL, and is always set then */ char *string_buf = NULL; char *debug_buf; int known_size; int needed = 85; /* average message length */ int max_nob; va_list ap; int depth; int i; int remain; if (strchr(file, '/')) file = strrchr(file, '/') + 1; tcd = cfs_trace_get_tcd(); /* cfs_trace_get_tcd() grabs a lock, which disables preemption and * pins us to a particular CPU. This avoids an smp_processor_id() * warning on Linux when debugging is enabled. */ cfs_set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK()); if (tcd == NULL) /* arch may not log in IRQ context */ goto console; if (tcd->tcd_cur_pages == 0) header.ph_flags |= PH_FLAG_FIRST_RECORD; if (tcd->tcd_shutting_down) { cfs_trace_put_tcd(tcd); tcd = NULL; goto console; } depth = __current_nesting_level(); known_size = strlen(file) + 1 + depth; if (fn) known_size += strlen(fn) + 1; if (libcfs_debug_binary) known_size += sizeof(header); /*/ * '2' used because vsnprintf return real size required for output * _without_ terminating NULL. * if needed is to small for this format. */ for (i = 0; i < 2; i++) { tage = cfs_trace_get_tage(tcd, needed + known_size + 1); if (tage == NULL) { if (needed + known_size > CFS_PAGE_SIZE) mask |= D_ERROR; cfs_trace_put_tcd(tcd); tcd = NULL; goto console; } string_buf = (char *)cfs_page_address(tage->page) + tage->used + known_size; max_nob = CFS_PAGE_SIZE - tage->used - known_size; if (max_nob <= 0) { printk(CFS_KERN_EMERG "negative max_nob: %d\n", max_nob); mask |= D_ERROR; cfs_trace_put_tcd(tcd); tcd = NULL; goto console; } needed = 0; if (format1) { va_copy(ap, args); needed = vsnprintf(string_buf, max_nob, format1, ap); va_end(ap); } if (format2) { remain = max_nob - needed; if (remain < 0) remain = 0; va_start(ap, format2); needed += vsnprintf(string_buf + needed, remain, format2, ap); va_end(ap); } if (needed < max_nob) /* well. printing ok.. */ break; } if (*(string_buf+needed-1) != '\n') printk(CFS_KERN_INFO "format at %s:%d:%s doesn't end in newline\n", file, line, fn); header.ph_len = known_size + needed; debug_buf = (char *)cfs_page_address(tage->page) + tage->used; if (libcfs_debug_binary) { memcpy(debug_buf, &header, sizeof(header)); tage->used += sizeof(header); debug_buf += sizeof(header); } /* indent message according to the nesting level */ while (depth-- > 0) { *(debug_buf++) = '.'; ++ tage->used; } strcpy(debug_buf, file); tage->used += strlen(file) + 1; debug_buf += strlen(file) + 1; if (fn) { strcpy(debug_buf, fn); tage->used += strlen(fn) + 1; debug_buf += strlen(fn) + 1; } __LASSERT(debug_buf == string_buf); tage->used += needed; __LASSERT (tage->used <= CFS_PAGE_SIZE); console: if ((mask & libcfs_printk) == 0) { /* no console output requested */ if (tcd != NULL) cfs_trace_put_tcd(tcd); return 1; } if (cdls != NULL) { if (libcfs_console_ratelimit && cdls->cdls_next != 0 && /* not first time ever */ !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { /* skipping a console message */ cdls->cdls_count++; if (tcd != NULL) cfs_trace_put_tcd(tcd); return 1; } if (cfs_time_after(cfs_time_current(), cdls->cdls_next + libcfs_console_max_delay + cfs_time_seconds(10))) { /* last timeout was a long time ago */ cdls->cdls_delay /= libcfs_console_backoff * 4; } else { cdls->cdls_delay *= libcfs_console_backoff; if (cdls->cdls_delay < libcfs_console_min_delay) cdls->cdls_delay = libcfs_console_min_delay; else if (cdls->cdls_delay > libcfs_console_max_delay) cdls->cdls_delay = libcfs_console_max_delay; } /* ensure cdls_next is never zero after it's been seen */ cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; } if (tcd != NULL) { cfs_print_to_console(&header, mask, string_buf, needed, file, fn); cfs_trace_put_tcd(tcd); } else { string_buf = cfs_trace_get_console_buffer(); needed = 0; if (format1 != NULL) { va_copy(ap, args); needed = vsnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, format1, ap); va_end(ap); } if (format2 != NULL) { remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; if (remain > 0) { va_start(ap, format2); needed += vsnprintf(string_buf+needed, remain, format2, ap); va_end(ap); } } cfs_print_to_console(&header, mask, string_buf, needed, file, fn); cfs_trace_put_console_buffer(string_buf); } if (cdls != NULL && cdls->cdls_count != 0) { string_buf = cfs_trace_get_console_buffer(); needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, "Skipped %d previous similar message%s\n", cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); cfs_print_to_console(&header, mask, string_buf, needed, file, fn); cfs_trace_put_console_buffer(string_buf); cdls->cdls_count = 0; } return 0; }
int libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t rcvlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0, }; cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket recv timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } then = cfs_time_current(); rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (rcvlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + rcvlen; nob -= rcvlen; } return 0; } int libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) { size_t sndlen; int rc; cfs_duration_t to = cfs_time_seconds(timeout); cfs_time_t then; struct timeval tv; LASSERT(nob > 0); for (;;) { struct iovec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, .msg_control = NULL, .msg_controllen = 0, .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, }; if (timeout != 0) { cfs_duration_usec(to, &tv); rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket send timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } } then = cfs_time_current(); rc = -sock_send(C2B_SOCK(sock), &msg, ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); to -= cfs_time_current() - then; if (rc != 0 && rc != -EWOULDBLOCK) return rc; if (sndlen == nob) return 0; if (to <= 0) return -EAGAIN; buffer = ((char *)buffer) + sndlen; nob -= sndlen; } return 0; } int libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) { struct sockaddr_in sin; int rc; if (remote != 0) /* Get remote address */ rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); else /* Get local address */ rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); if (rc != 0) { CERROR ("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; } if (ip != NULL) *ip = ntohl (sin.sin_addr.s_addr); if (port != NULL) *port = ntohs (sin.sin_port); return 0; } int libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) { int option; int rc; if (txbufsize != 0) { option = txbufsize; rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set send buffer %d: %d\n", option, rc); return (rc); } } if (rxbufsize != 0) { option = rxbufsize; rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, (char *)&option, sizeof (option)); if (rc != 0) { CERROR ("Can't set receive buffer %d: %d\n", option, rc); return (rc); } } return 0; }
static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa) { return cfs_time_sub(ocapa->c_expiry, cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2); }
int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) { #if SOCKNAL_SINGLE_FRAG_TX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct socket *sock = conn->ksnc_sock; int nob; int rc; int i; struct uio suio = { .uio_iov = scratchiov, .uio_iovcnt = niov, .uio_offset = 0, .uio_resid = 0, /* This will be valued after a while */ .uio_segflg = UIO_SYSSPACE, .uio_rw = UIO_WRITE, .uio_procp = NULL }; int flags = MSG_DONTWAIT; CFS_DECL_NET_DATA; for (nob = i = 0; i < niov; i++) { scratchiov[i] = tx->tx_iov[i]; nob += scratchiov[i].iov_len; } suio.uio_resid = nob; CFS_NET_IN; rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); CFS_NET_EX; /* NB there is no return value can indicate how many * have been sent and how many resid, we have to get * sent bytes from suio. */ if (rc != 0) { if (suio.uio_resid != nob &&\ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) /* We have sent something */ rc = nob - suio.uio_resid; else if ( rc == EWOULDBLOCK ) /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */ rc = -EAGAIN; else rc = -rc; } else /* rc == 0 */ rc = nob - suio.uio_resid; return rc; } int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) { #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct socket *sock = conn->ksnc_sock; lnet_kiov_t *kiov = tx->tx_kiov; int nob; int rc; int i; struct uio suio = { .uio_iov = scratchiov, .uio_iovcnt = niov, .uio_offset = 0, .uio_resid = 0, /* It should be valued after a while */ .uio_segflg = UIO_SYSSPACE, .uio_rw = UIO_WRITE, .uio_procp = NULL }; int flags = MSG_DONTWAIT; CFS_DECL_NET_DATA; for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } suio.uio_resid = nob; CFS_NET_IN; rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); CFS_NET_EX; for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc != 0) { if (suio.uio_resid != nob &&\ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) /* We have sent something */ rc = nob - suio.uio_resid; else if ( rc == EWOULDBLOCK ) /* EAGAIN and EWOULD BLOCK have same value in OSX */ rc = -EAGAIN; else rc = -rc; } else /* rc == 0 */ rc = nob - suio.uio_resid; return rc; } /* * liang: Hack of inpcb and tcpcb. * To get tcpcb of a socket, and call tcp_output * to send quick ack. */ struct ks_tseg_qent{ int foo; }; struct ks_tcptemp{ int foo; }; LIST_HEAD(ks_tsegqe_head, ks_tseg_qent); struct ks_tcpcb { struct ks_tsegqe_head t_segq; int t_dupacks; struct ks_tcptemp *unused; int t_timer[4]; struct inpcb *t_inpcb; int t_state; u_int t_flags; /* * There are more fields but we dont need * ...... */ }; #define TF_ACKNOW 0x00001 #define TF_DELACK 0x00002 struct ks_inpcb { LIST_ENTRY(ks_inpcb) inp_hash; struct in_addr reserved1; struct in_addr reserved2; u_short inp_fport; u_short inp_lport; LIST_ENTRY(inpcb) inp_list; caddr_t inp_ppcb; /* * There are more fields but we dont need * ...... */ }; #define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb) #define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb) #define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so))) void ksocknal_lib_eager_ack (ksock_conn_t *conn) { struct socket *sock = conn->ksnc_sock; struct ks_inpcb *inp = ks_sotoinpcb(sock); struct ks_tcpcb *tp = ks_intotcpcb(inp); int s; CFS_DECL_NET_DATA; extern int tcp_output(register struct ks_tcpcb *tp); CFS_NET_IN; s = splnet(); /* * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo * to send immediate ACK. */ if (tp && tp->t_flags & TF_DELACK){ tp->t_flags &= ~TF_DELACK; tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); } splx(s); CFS_NET_EX; return; } int ksocknal_lib_recv_iov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; int nob; int rc; int i; struct uio ruio = { .uio_iov = scratchiov, .uio_iovcnt = niov, .uio_offset = 0, .uio_resid = 0, /* It should be valued after a while */ .uio_segflg = UIO_SYSSPACE, .uio_rw = UIO_READ, .uio_procp = NULL }; int flags = MSG_DONTWAIT; CFS_DECL_NET_DATA; for (nob = i = 0; i < niov; i++) { scratchiov[i] = iov[i]; nob += scratchiov[i].iov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); ruio.uio_resid = nob; CFS_NET_IN; rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags); CFS_NET_EX; if (rc){ if (ruio.uio_resid != nob && \ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN)) /* data particially received */ rc = nob - ruio.uio_resid; else if (rc == EWOULDBLOCK) /* EAGAIN and EWOULD BLOCK have same value in OSX */ rc = -EAGAIN; else rc = -rc; } else rc = nob - ruio.uio_resid; return (rc); } int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK struct iovec scratch; struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; int nob; int rc; int i; struct uio ruio = { .uio_iov = scratchiov, .uio_iovcnt = niov, .uio_offset = 0, .uio_resid = 0, .uio_segflg = UIO_SYSSPACE, .uio_rw = UIO_READ, .uio_procp = NULL }; int flags = MSG_DONTWAIT; CFS_DECL_NET_DATA; for (nob = i = 0; i < niov; i++) { scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; nob += scratchiov[i].iov_len = kiov[i].kiov_len; } LASSERT (nob <= conn->ksnc_rx_nob_wanted); ruio.uio_resid = nob; CFS_NET_IN; rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags); CFS_NET_EX; for (i = 0; i < niov; i++) cfs_kunmap(kiov[i].kiov_page); if (rc){ if (ruio.uio_resid != nob && \ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) /* data particially received */ rc = nob - ruio.uio_resid; else if (rc == EWOULDBLOCK) /* receive blocked, EWOULDBLOCK == EAGAIN */ rc = -EAGAIN; else rc = -rc; } else rc = nob - ruio.uio_resid; return (rc); } int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) { struct socket *sock = conn->ksnc_sock; int rc; rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; return -ESHUTDOWN; } rc = libcfs_sock_getbuf(sock, txmem, rxmem); if (rc == 0) { struct sockopt sopt; int len; CFS_DECL_NET_DATA; len = sizeof(*nagle); bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_GET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = nagle; sopt.sopt_valsize = len; CFS_NET_IN; rc = -sogetopt(sock, &sopt); CFS_NET_EX; } ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; else *txmem = *rxmem = *nagle = 0; return (rc); } int ksocknal_lib_setup_sock (struct socket *so) { struct sockopt sopt; int rc; int option; int keep_idle; int keep_intvl; int keep_count; int do_keepalive; struct linger linger; CFS_DECL_NET_DATA; rc = libcfs_sock_setbuf(so, *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size); if (rc != 0) { CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", *ksocknal_tunables.ksnd_tx_buffer_size, *ksocknal_tunables.ksnd_rx_buffer_size, rc); return (rc); } /* Ensure this socket aborts active sends immediately when we close * it. */ bzero(&sopt, sizeof sopt); linger.l_onoff = 0; linger.l_linger = 0; sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_LINGER; sopt.sopt_val = &linger; sopt.sopt_valsize = sizeof(linger); CFS_NET_IN; rc = -sosetopt(so, &sopt); if (rc != 0) { CERROR ("Can't set SO_LINGER: %d\n", rc); goto out; } if (!*ksocknal_tunables.ksnd_nagle) { option = 1; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = &option; sopt.sopt_valsize = sizeof(option); rc = -sosetopt(so, &sopt); if (rc != 0) { CERROR ("Can't disable nagle: %d\n", rc); goto out; } } /* snapshot tunables */ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; keep_count = *ksocknal_tunables.ksnd_keepalive_count; keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); option = (do_keepalive ? 1 : 0); bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_KEEPALIVE; sopt.sopt_val = &option; sopt.sopt_valsize = sizeof(option); rc = -sosetopt(so, &sopt); if (rc != 0) { CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); goto out; } if (!do_keepalive) { /* no more setting, just return */ rc = 0; goto out; } bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_KEEPALIVE; sopt.sopt_val = &keep_idle; sopt.sopt_valsize = sizeof(keep_idle); rc = -sosetopt(so, &sopt); if (rc != 0) { CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc); goto out; } out: CFS_NET_EX; return (rc); } void ksocknal_lib_push_conn(ksock_conn_t *conn) { struct socket *sock; struct sockopt sopt; int val = 1; int rc; CFS_DECL_NET_DATA; rc = ksocknal_connsock_addref(conn); if (rc != 0) /* being shut down */ return; sock = conn->ksnc_sock; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; CFS_NET_IN; sosetopt(sock, &sopt); CFS_NET_EX; ksocknal_connsock_decref(conn); return; } extern void ksocknal_read_callback (ksock_conn_t *conn); extern void ksocknal_write_callback (ksock_conn_t *conn); static void ksocknal_upcall(struct socket *so, caddr_t arg, int waitf) { ksock_conn_t *conn = (ksock_conn_t *)arg; ENTRY; read_lock (&ksocknal_data.ksnd_global_lock); if (conn == NULL) goto out; if (so->so_rcv.sb_flags & SB_UPCALL) { extern int soreadable(struct socket *so); if (conn->ksnc_rx_nob_wanted && soreadable(so)) /* To verify whether the upcall is for receive */ ksocknal_read_callback (conn); } /* go foward? */ if (so->so_snd.sb_flags & SB_UPCALL){ extern int sowriteable(struct socket *so); if (sowriteable(so)) /* socket is writable */ ksocknal_write_callback(conn); } out: read_unlock (&ksocknal_data.ksnd_global_lock); EXIT; } void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) { /* No callback need to save in osx */ return; } void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) { CFS_DECL_NET_DATA; CFS_NET_IN; sock->so_upcallarg = (void *)conn; sock->so_upcall = ksocknal_upcall; sock->so_snd.sb_timeo = 0; sock->so_rcv.sb_timeo = cfs_time_seconds(2); sock->so_rcv.sb_flags |= SB_UPCALL; sock->so_snd.sb_flags |= SB_UPCALL; CFS_NET_EX; return; } void ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn) { CFS_DECL_NET_DATA; CFS_NET_IN; ksocknal_upcall (sock, (void *)conn, 0); CFS_NET_EX; }
int LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms, lnet_event_t *event, int *which) { int i; int rc; #ifdef __KERNEL__ cfs_waitlink_t wl; cfs_time_t now; #else struct timeval then; struct timeval now; # ifdef HAVE_LIBPTHREAD struct timespec ts; # endif lnet_ni_t *eqwaitni = the_lnet.ln_eqwaitni; #endif ENTRY; LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); if (neq < 1) RETURN(-ENOENT); LNET_LOCK(); for (;;) { #ifndef __KERNEL__ LNET_UNLOCK(); /* Recursion breaker */ if (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING && !LNetHandleIsEqual(eventqs[0], the_lnet.ln_rc_eqh)) lnet_router_checker(); LNET_LOCK(); #endif for (i = 0; i < neq; i++) { lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]); if (eq == NULL) { LNET_UNLOCK(); RETURN(-ENOENT); } rc = lib_get_event (eq, event); if (rc != 0) { LNET_UNLOCK(); *which = i; RETURN(rc); } } #ifdef __KERNEL__ if (timeout_ms == 0) { LNET_UNLOCK(); RETURN (0); } cfs_waitlink_init(&wl); set_current_state(TASK_INTERRUPTIBLE); cfs_waitq_add(&the_lnet.ln_waitq, &wl); LNET_UNLOCK(); if (timeout_ms < 0) { cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE); } else { struct timeval tv; now = cfs_time_current(); cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE, cfs_time_seconds(timeout_ms)/1000); cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv); timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000; if (timeout_ms < 0) timeout_ms = 0; } LNET_LOCK(); cfs_waitq_del(&the_lnet.ln_waitq, &wl); #else if (eqwaitni != NULL) { /* I have a single NI that I have to call into, to get * events queued, or to block. */ lnet_ni_addref_locked(eqwaitni); LNET_UNLOCK(); if (timeout_ms <= 0) { (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); } else { gettimeofday(&then, NULL); (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); gettimeofday(&now, NULL); timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + (now.tv_usec - then.tv_usec) / 1000; if (timeout_ms < 0) timeout_ms = 0; } LNET_LOCK(); lnet_ni_decref_locked(eqwaitni); /* don't call into eqwaitni again if timeout has * expired */ if (timeout_ms == 0) eqwaitni = NULL; continue; /* go back and check for events */ } if (timeout_ms == 0) { LNET_UNLOCK(); RETURN (0); } # ifndef HAVE_LIBPTHREAD /* If I'm single-threaded, LNET fails at startup if it can't * set the_lnet.ln_eqwaitni correctly. */ LBUG(); # else if (timeout_ms < 0) { pthread_cond_wait(&the_lnet.ln_cond, &the_lnet.ln_lock); } else { gettimeofday(&then, NULL); ts.tv_sec = then.tv_sec + timeout_ms/1000; ts.tv_nsec = then.tv_usec * 1000 + (timeout_ms%1000) * 1000000; if (ts.tv_nsec >= 1000000000) { ts.tv_sec++; ts.tv_nsec -= 1000000000; } pthread_cond_timedwait(&the_lnet.ln_cond, &the_lnet.ln_lock, &ts); gettimeofday(&now, NULL); timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + (now.tv_usec - then.tv_usec) / 1000; if (timeout_ms < 0) timeout_ms = 0; } # endif #endif } }
static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp, const unsigned int id[], int pending[], int count, quota_acquire acquire, struct obd_trans_info *oti, int isblk, struct inode *inode, int frags) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; struct timeval work_start; struct timeval work_end; long timediff; struct l_wait_info lwi = { 0 }; int rc = 0, cycle = 0, count_err = 1; ENTRY; if (!quota_is_set(obd, id, isblk ? QB_SET : QI_SET)) RETURN(0); if (isblk && (exp->exp_failed || exp->exp_abort_active_req)) /* If the client has been evicted or if it * timed out and tried to reconnect already, * abort the request immediately */ RETURN(-ENOTCONN); CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name); pending[USRQUOTA] = pending[GRPQUOTA] = 0; /* Unfortunately, if quota master is too busy to handle the * pre-dqacq in time and quota hash on ost is used up, we * have to wait for the completion of in flight dqacq/dqrel, * in order to get enough quota for write b=12588 */ cfs_gettimeofday(&work_start); while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk, inode, frags)) & QUOTA_RET_ACQUOTA) { cfs_spin_lock(&qctxt->lqc_lock); if (!qctxt->lqc_import && oti) { cfs_spin_unlock(&qctxt->lqc_lock); LASSERT(oti && oti->oti_thread && oti->oti_thread->t_watchdog); lc_watchdog_disable(oti->oti_thread->t_watchdog); CDEBUG(D_QUOTA, "sleep for quota master\n"); l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt), &lwi); CDEBUG(D_QUOTA, "wake up when quota master is back\n"); lc_watchdog_touch(oti->oti_thread->t_watchdog, CFS_GET_TIMEOUT(oti->oti_thread->t_svc)); } else { cfs_spin_unlock(&qctxt->lqc_lock); } cycle++; if (isblk) OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90); /* after acquire(), we should run quota_check_common again * so that we confirm there are enough quota to finish write */ rc = acquire(obd, id, oti, isblk); /* please reference to dqacq_completion for the below */ /* a new request is finished, try again */ if (rc == QUOTA_REQ_RETURNED) { CDEBUG(D_QUOTA, "finish a quota req, try again\n"); continue; } /* it is out of quota already */ if (rc == -EDQUOT) { CDEBUG(D_QUOTA, "out of quota, return -EDQUOT\n"); break; } /* Related quota has been disabled by master, but enabled by * slave, do not try again. */ if (unlikely(rc == -ESRCH)) { CERROR("mismatched quota configuration, stop try.\n"); break; } if (isblk && (exp->exp_failed || exp->exp_abort_active_req)) /* The client has been evicted or tried to * to reconnect already, abort the request */ RETURN(-ENOTCONN); /* -EBUSY and others, wait a second and try again */ if (rc < 0) { cfs_waitq_t waitq; struct l_wait_info lwi; if (oti && oti->oti_thread && oti->oti_thread->t_watchdog) lc_watchdog_touch(oti->oti_thread->t_watchdog, CFS_GET_TIMEOUT(oti->oti_thread->t_svc)); CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc, count_err++); cfs_waitq_init(&waitq); lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL, NULL); l_wait_event(waitq, 0, &lwi); } if (rc < 0 || cycle % 10 == 0) { cfs_spin_lock(&last_print_lock); if (last_print == 0 || cfs_time_before((last_print + cfs_time_seconds(30)), cfs_time_current())) { last_print = cfs_time_current(); cfs_spin_unlock(&last_print_lock); CWARN("still haven't managed to acquire quota " "space from the quota master after %d " "retries (err=%d, rc=%d)\n", cycle, count_err - 1, rc); } else { cfs_spin_unlock(&last_print_lock); } } CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc, cycle); } cfs_gettimeofday(&work_end); timediff = cfs_timeval_sub(&work_end, &work_start, NULL); lprocfs_counter_add(qctxt->lqc_stats, isblk ? LQUOTA_WAIT_FOR_CHK_BLK : LQUOTA_WAIT_FOR_CHK_INO, timediff); if (rc > 0) rc = 0; RETURN(rc); }
int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, struct lustre_quota_ctxt *qctxt) { struct lustre_qunit_size *lqs = NULL; unsigned long *unit, *tune; signed long tmp = 0; cfs_time_t time_limit = 0, *shrink; int i, rc = 0; ENTRY; LASSERT(qctxt); lqs = quota_search_lqs(LQS_KEY(QAQ_IS_GRP(oqaq), oqaq->qaq_id), qctxt, QAQ_IS_CREATE_LQS(oqaq) ? 1 : 0); if (lqs == NULL || IS_ERR(lqs)){ CERROR("fail to find a lqs for %sid %u!\n", QAQ_IS_GRP(oqaq) ? "g" : "u", oqaq->qaq_id); RETURN(PTR_ERR(lqs)); } CDEBUG(D_QUOTA, "before: bunit: %lu, iunit: %lu.\n", lqs->lqs_bunit_sz, lqs->lqs_iunit_sz); cfs_spin_lock(&lqs->lqs_lock); for (i = 0; i < 2; i++) { if (i == 0 && !QAQ_IS_ADJBLK(oqaq)) continue; if (i == 1 && !QAQ_IS_ADJINO(oqaq)) continue; tmp = i ? (lqs->lqs_iunit_sz - oqaq->qaq_iunit_sz) : (lqs->lqs_bunit_sz - oqaq->qaq_bunit_sz); shrink = i ? &lqs->lqs_last_ishrink : &lqs->lqs_last_bshrink; time_limit = cfs_time_add(i ? lqs->lqs_last_ishrink : lqs->lqs_last_bshrink, cfs_time_seconds(qctxt->lqc_switch_seconds)); unit = i ? &lqs->lqs_iunit_sz : &lqs->lqs_bunit_sz; tune = i ? &lqs->lqs_itune_sz : &lqs->lqs_btune_sz; /* quota master shrinks */ if (qctxt->lqc_handler && tmp > 0) *shrink = cfs_time_current(); /* quota master enlarges */ if (qctxt->lqc_handler && tmp < 0) { /* in case of ping-pong effect, don't enlarge lqs * in a short time */ if (*shrink && cfs_time_before(cfs_time_current(), time_limit)) tmp = 0; } /* when setquota, don't enlarge lqs b=18616 */ if (QAQ_IS_CREATE_LQS(oqaq) && tmp < 0) tmp = 0; if (tmp != 0) { *unit = i ? oqaq->qaq_iunit_sz : oqaq->qaq_bunit_sz; *tune = (*unit) / 2; } if (tmp > 0) rc |= i ? LQS_INO_DECREASE : LQS_BLK_DECREASE; if (tmp < 0) rc |= i ? LQS_INO_INCREASE : LQS_BLK_INCREASE; } cfs_spin_unlock(&lqs->lqs_lock); CDEBUG(D_QUOTA, "after: bunit: %lu, iunit: %lu.\n", lqs->lqs_bunit_sz, lqs->lqs_iunit_sz); lqs_putref(lqs); RETURN(rc); }
inline cfs_time_t round_timeout(cfs_time_t timeout) { return cfs_time_seconds((int)cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1); }
int lnet_acceptor(void *arg) { socket_t *newsock; int rc; __u32 magic; __u32 peer_ip; int peer_port; int secure = (int)((long_ptr_t)arg); LASSERT(lnet_acceptor_state.pta_sock == NULL); cfs_block_allsigs(); rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port, accept_backlog); if (rc != 0) { if (rc == -EADDRINUSE) LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n", accept_port); else LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n", accept_port, rc); lnet_acceptor_state.pta_sock = NULL; } else { LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port); } /* set init status and unblock parent */ lnet_acceptor_state.pta_shutdown = rc; complete(&lnet_acceptor_state.pta_signal); if (rc != 0) return rc; while (!lnet_acceptor_state.pta_shutdown) { rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock); if (rc != 0) { if (rc != -EAGAIN) { CWARN("Accept error %d: pausing...\n", rc); cfs_pause(cfs_time_seconds(1)); } continue; } /* maybe we're waken up with libcfs_sock_abort_accept() */ if (lnet_acceptor_state.pta_shutdown) { libcfs_sock_release(newsock); break; } rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port); if (rc != 0) { CERROR("Can't determine new connection's address\n"); goto failed; } if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { CERROR("Refusing connection from %pI4h: insecure port %d\n", &peer_ip, peer_port); goto failed; } rc = libcfs_sock_read(newsock, &magic, sizeof(magic), accept_timeout); if (rc != 0) { CERROR("Error %d reading connection request from %pI4h\n", rc, &peer_ip); goto failed; } rc = lnet_accept(newsock, magic); if (rc != 0) goto failed; continue; failed: libcfs_sock_release(newsock); } libcfs_sock_release(lnet_acceptor_state.pta_sock); lnet_acceptor_state.pta_sock = NULL; CDEBUG(D_NET, "Acceptor stopping\n"); /* unblock lnet_acceptor_stop() */ complete(&lnet_acceptor_state.pta_signal); return 0; }