void * osl_malloc(osl_t *osh, uint size) { void *addr; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)) gfp_t flags; if (osh) ASSERT(osh->magic == OS_HANDLE_MAGIC); flags = (in_atomic()) ? GFP_ATOMIC : GFP_KERNEL; if ((addr = kmalloc(size, flags)) == NULL) { #else if ((addr = kmalloc(size, GFP_ATOMIC)) == NULL) { #endif if (osh) osh->failed++; return (NULL); } if (osh) atomic_add(size, &osh->malloced); return (addr); } void osl_mfree(osl_t *osh, void *addr, uint size) { if (osh) { ASSERT(osh->magic == OS_HANDLE_MAGIC); atomic_sub(size, &osh->malloced); } kfree(addr); } uint osl_malloced(osl_t *osh) { ASSERT((osh && (osh->magic == OS_HANDLE_MAGIC))); return (atomic_read(&osh->malloced)); }
/** * kernfs_activate - activate a node which started deactivated * @kn: kernfs_node whose subtree is to be activated * * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node * needs to be explicitly activated. A node which hasn't been activated * isn't visible to userland and deactivation is skipped during its * removal. This is useful to construct atomic init sequences where * creation of multiple nodes should either succeed or fail atomically. * * The caller is responsible for ensuring that this function is not called * after kernfs_remove*() is invoked on @kn. */ void kernfs_activate(struct kernfs_node *kn) { struct kernfs_node *pos; mutex_lock(&kernfs_mutex); pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) { if (!pos || (pos->flags & KERNFS_ACTIVATED)) continue; WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); pos->flags |= KERNFS_ACTIVATED; } mutex_unlock(&kernfs_mutex); }
/* * freeque() wakes up waiters on the sender and receiver waiting queue, * removes the message queue from message queue ID * array, and cleans up all the messages associated with this queue. * * msg_ids.sem and the spinlock for this message queue is hold * before freeque() is called. msg_ids.sem remains locked on exit. */ static void freeque (struct msg_queue *msq, int id) { struct list_head *tmp; expunge_all(msq,-EIDRM); ss_wakeup(&msq->q_senders,1); msq = msg_rmid(id); msg_unlock(msq); tmp = msq->q_messages.next; while(tmp != &msq->q_messages) { struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list); tmp = tmp->next; atomic_dec(&msg_hdrs); free_msg(msg); } atomic_sub(msq->q_cbytes, &msg_bytes); security_msg_queue_free(msq); ipc_rcu_free(msq, sizeof(struct msg_queue)); }
void *amiga_chip_alloc_res(unsigned long size, struct resource *res) { int error; /* round up */ size = PAGE_ALIGN(size); pr_debug("amiga_chip_alloc_res: allocate %lu bytes\n", size); error = allocate_resource(&chipram_res, res, size, 0, UINT_MAX, PAGE_SIZE, NULL, NULL); if (error < 0) { pr_err("amiga_chip_alloc_res: allocate_resource() failed %d!\n", error); return NULL; } atomic_sub(size, &chipavail); pr_debug("amiga_chip_alloc_res: returning %pR\n", res); return (void *)ZTWO_VADDR(res->start); }
static void kcm_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct kcm_sock *kcm = kcm_sk(sk); struct kcm_mux *mux = kcm->mux; unsigned int len = skb->truesize; sk_mem_uncharge(sk, len); atomic_sub(len, &sk->sk_rmem_alloc); /* For reading rx_wait and rx_psock without holding lock */ smp_mb__after_atomic(); if (!kcm->rx_wait && !kcm->rx_psock && sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); spin_unlock_bh(&mux->rx_lock); } }
int det_poll_cq(struct det_cq * const cq, __u32 * const num_wc, struct det_wc * const wc_array) { u32 i, n, count; struct det_wq *wq; cq_lock_bh(&cq->lock); n = min(*num_wc, (__u32)atomic_read(&cq->depth)); if (n) { det_kcopy_cqes(cq, wc_array, n); cq->head = (cq->head + n) % cq->attr.size; atomic_sub(n, &cq->depth); } cq_unlock_bh(&cq->lock); /* * Retire WQEs for polled CQEs. Coalesce reap counts * from back-to-back completions on the same work queue. * This reduces the number of calls to det_retire_wqes. */ if (n) { wq = (struct det_wq *)(unsigned long)wc_array[0].reserved; count = wc_array[0].reap_cnt; for (i = 1; i < n; i++) { if (wq == (struct det_wq *)(unsigned long) wc_array[i].reserved) { count += wc_array[i].reap_cnt; } else { det_retire_wqes(wq, count); wq = (struct det_wq *)(unsigned long) wc_array[i].reserved; count = wc_array[i].reap_cnt; } } det_retire_wqes(wq, count); } *num_wc = n; return (n) ? 0 : -EAGAIN; }
static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, struct rdma_conn_param *conn_param, struct rds_ib_connect_private *dp, u32 protocol_version, u32 max_responder_resources, u32 max_initiator_depth) { struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_device *rds_ibdev = ic->rds_ibdev; memset(conn_param, 0, sizeof(struct rdma_conn_param)); conn_param->responder_resources = min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources); conn_param->initiator_depth = min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth); conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); conn_param->rnr_retry_count = 7; if (dp) { memset(dp, 0, sizeof(*dp)); dp->dp_saddr = conn->c_laddr; dp->dp_daddr = conn->c_faddr; dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); /* Advertise flow control */ if (ic->i_flowctl) { unsigned int credits; credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)); dp->dp_credit = cpu_to_be32(credits); atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); } conn_param->private_data = dp; conn_param->private_data_len = sizeof(*dp); } }
int det_destroy_cq(struct det_cq * const cq) { struct det_device *detdev = cq->detdev; assert(atomic_read(&cq->refcnt) == 1); atomic_dec(&cq->nic->refcnt); det_free_wc_array(cq->wc_array); atomic_sub(cq->page_cnt, &det_page_count); det_remove_events(cq->event, cq); write_lock(&detdev->lock); list_del(&cq->entry); detdev->cq_cnt--; write_unlock(&detdev->lock); return 0; }
/** * @return ture is relaxed, (flow.status == DOWN) */ bool FlowControllerImpl::AddUp(Flow &flow, int size) { if (atomic_read(&flow.curt_quantity) + size < 0) { // integer overflow flow.status = UP; } else { int curt = atomic_add_return(size, &flow.curt_quantity); if (curt < 0) { atomic_sub(size, &flow.curt_quantity); flow.status = UP; // integer overflow } if (curt / cal_interval_second_ >= atomic_read(&flow.upper_bound)) flow.status = UP; } // curt / spend return flow.status == DOWN; }
/* * Advance the clean counter. When the clean period has expired, * clean an entry. * * This is implemented in atomics to avoid locking. Because multiple * variables are involved, it can be racy which can lead to slightly * inaccurate information. Since this is only a heuristic, this is * OK. Any innaccuracies will clean themselves out as the counter * advances. That said, it is unlikely the entry clean operation will * race - the next possible racer will not start until the next clean * period. * * The clean counter is implemented as a decrement to zero. When zero * is reached an entry is cleaned. */ static void wss_advance_clean_counter(void) { int entry; int weight; unsigned long bits; /* become the cleaner if we decrement the counter to zero */ if (atomic_dec_and_test(&wss.clean_counter)) { /* * Set, not add, the clean period. This avoids an issue * where the counter could decrement below the clean period. * Doing a set can result in lost decrements, slowing the * clean advance. Since this a heuristic, this possible * slowdown is OK. * * An alternative is to loop, advancing the counter by a * clean period until the result is > 0. However, this could * lead to several threads keeping another in the clean loop. * This could be mitigated by limiting the number of times * we stay in the loop. */ atomic_set(&wss.clean_counter, wss_clean_period); /* * Uniquely grab the entry to clean and move to next. * The current entry is always the lower bits of * wss.clean_entry. The table size, wss.num_entries, * is always a power-of-2. */ entry = (atomic_inc_return(&wss.clean_entry) - 1) & (wss.num_entries - 1); /* clear the entry and count the bits */ bits = xchg(&wss.entries[entry], 0); weight = hweight64((u64)bits); /* only adjust the contended total count if needed */ if (weight) atomic_sub(weight, &wss.total_count); } }
/* * freeque() wakes up waiters on the sender and receiver waiting queue, * removes the message queue from message queue ID IDR, and cleans up all the * messages associated with this queue. * * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held * before freeque() is called. msg_ids.rw_mutex remains locked on exit. */ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { struct list_head *tmp; struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); expunge_all(msq, -EIDRM); ss_wakeup(&msq->q_senders, 1); msg_rmid(ns, msq); msg_unlock(msq); tmp = msq->q_messages.next; while (tmp != &msq->q_messages) { struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); tmp = tmp->next; atomic_dec(&ns->msg_hdrs); free_msg(msg); } atomic_sub(msq->q_cbytes, &ns->msg_bytes); security_msg_queue_free(msq); ipc_rcu_putref(msq); }
FlowStatus FlowControllerImpl::CalCurrentFlow(Flow &flow) { // step // Atomic read time and quantity int test_v1 = 0; int test_v2 = 0; timeval val; do { test_v1 = atomic_read(&flow.curt_quantity); gettimeofday(&val, NULL); test_v2 = atomic_read(&flow.curt_quantity); } while (test_v1 != test_v2 && false); // similar value enough int quantity = test_v1 + (test_v2 - test_v1) / 2; //Atomic END int now = (val.tv_sec % 100000) * 1000 + val.tv_usec / 1000; // 32bit, so mod 100000 int last_cal_time = atomic_read(&flow.last_cal_time); int spend = last_cal_time > 0 ? now - last_cal_time : cal_interval_second_ * 1000; int last_per_second = static_cast<int>(spend < 1000.00001 ? quantity : static_cast<double>(quantity) / spend * 1000); // stroe last flow for query atomic_set(&flow.last_per_second, last_per_second); // last update time atomic_set(&flow.last_cal_time, now); // reset quantity atomic_sub(quantity, &flow.curt_quantity); if (last_per_second >= atomic_read(&flow.upper_bound)) flow.status = UP; else if (flow.status == UP && last_per_second >= atomic_read(&flow.lower_bound)) flow.status = KEEP; else if (last_per_second < atomic_read(&flow.lower_bound)) flow.status = DOWN; return flow.status; }
/* * Signal to userspace an interrupt has occured. */ static ssize_t irq_proc_read(struct file *filp, char __user *bufp, size_t len, loff_t *ppos) { struct irq_proc *ip = (struct irq_proc *)filp->private_data; irq_desc_t *idp = irq_desc + ip->irq; int pending; DEFINE_WAIT(wait); if (len < sizeof(int)) return -EINVAL; pending = atomic_read(&ip->count); if (pending == 0) { if (idp->status & IRQ_DISABLED) enable_irq(ip->irq); if (filp->f_flags & O_NONBLOCK) return -EWOULDBLOCK; } while (pending == 0) { prepare_to_wait(&ip->q, &wait, TASK_INTERRUPTIBLE); pending = atomic_read(&ip->count); if (pending == 0) schedule(); finish_wait(&ip->q, &wait); if (signal_pending(current)) return -ERESTARTSYS; } if (copy_to_user(bufp, &pending, sizeof pending)) return -EFAULT; *ppos += sizeof pending; atomic_sub(pending, &ip->count); return sizeof pending; }
void __quadd_task_sched_out(struct task_struct *prev, struct task_struct *next) { int n; struct pt_regs *user_regs; struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); struct quadd_ctx *ctx = hrt.quadd_ctx; /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */ if (likely(!hrt.active)) return; /* if (__ratelimit(&ratelimit_state)) pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n", smp_processor_id(), (unsigned int)prev->pid, (unsigned int)prev->tgid, (unsigned int)next->pid, (unsigned int)next->tgid); */ if (is_profile_process(prev)) { user_regs = task_pt_regs(prev); if (user_regs) read_all_sources(user_regs, prev); n = remove_active_thread(cpu_ctx, prev->pid); atomic_sub(n, &cpu_ctx->nr_active); if (n && atomic_read(&cpu_ctx->nr_active) == 0) { cancel_hrtimer(cpu_ctx); atomic_dec(&hrt.nr_active_all_core); if (ctx->pmu) ctx->pmu->stop(); } put_sched_sample(prev, 0); } }
/* * Special call, doesn't claim any locks. This is only to be called * at panic or halt time, in run-to-completion mode, when the caller * is the only CPU and the only thing that will be going is these IPMI * calls. */ static void panic_halt_ipmi_set_timeout(void) { int send_heartbeat_now; int rv; /* Wait for the messages to be free. */ while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); atomic_add(2, &panic_done_count); rv = i_ipmi_set_timeout(&panic_halt_smi_msg, &panic_halt_recv_msg, &send_heartbeat_now); if (rv) { atomic_sub(2, &panic_done_count); printk(KERN_WARNING PFX "Unable to extend the watchdog timeout."); } else { if (send_heartbeat_now) panic_halt_ipmi_heartbeat(); } while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); }
static void raw_unlock_tx (cronyx_binder_item_t * h, unsigned long flags) { raw_t *p = h->sw; struct sk_buff *skb; int flip; if ((flip = atomic_read (&p->tx_done)) != 0) for (;;) { skb = skb_dequeue (&p->txdata); if (! skb) { atomic_set (&p->tx_done, 1); break; } if (! h->dispatch.transmit (h, skb)) { atomic_sub (flip, &p->tx_done); skb_queue_head (&p->txdata, skb); break; } dev_kfree_skb_any (skb); } spin_unlock_irqrestore (&p->tx_queue_lock, flags); }
/** * zfcp_qdio_send - set PCI flag in first SBALE and send req to QDIO * @qdio: pointer to struct zfcp_qdio * @q_req: pointer to struct zfcp_queue_req * Returns: 0 on success, error otherwise */ int zfcp_qdio_send(struct zfcp_qdio *qdio, struct zfcp_queue_req *q_req) { struct zfcp_qdio_queue *req_q = &qdio->req_q; int first = q_req->sbal_first; int count = q_req->sbal_number; int retval; unsigned int qdio_flags = QDIO_FLAG_SYNC_OUTPUT; zfcp_qdio_account(qdio); retval = do_QDIO(qdio->adapter->ccw_device, qdio_flags, 0, first, count); if (unlikely(retval)) { zfcp_qdio_zero_sbals(req_q->sbal, first, count); return retval; } /* account for transferred buffers */ atomic_sub(count, &req_q->count); req_q->first += count; req_q->first %= QDIO_MAX_BUFFERS_PER_Q; return 0; }
/* * This function assumes that the caller will free the mr->pages array. */ int det_dereg_mr(struct det_mr * const mr) { struct det_device *detdev = mr->base.detdev; int i; if (atomic_read(&mr->windows)) return -EBUSY; /* Remove the key from the map. */ wiremap_write_lock_bh(); idr_remove(&det_wire_map, mr->attr.l_key); wiremap_write_unlock_bh(); /* Release an MR reference. */ if (atomic_dec_and_test(&mr->base.refcnt)) complete(&mr->base.done); /* Wait for all MR references to go away. */ det_user_unlock(); wait_for_completion(&mr->base.done); det_user_lock(); atomic_sub(mr->page_cnt, &det_page_count); /* Drop a reference on the pages. */ for (i = 0; i < mr->page_cnt; i++) put_page(mr->pages[i]); atomic_dec(&mr->attr.base.pd->refcnt); write_lock(&detdev->lock); list_del(&mr->base.entry); detdev->mr_cnt--; write_unlock(&detdev->lock); return 0; }
/** * queued_read_lock_slowpath - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure * @cnts: Current qrwlock lock value */ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) { /* * Readers come here when they cannot get the lock without waiting */ if (unlikely(in_interrupt())) { /* * Readers in interrupt context will get the lock immediately * if the writer is just waiting (not holding the lock yet). * The rspin_until_writer_unlock() function returns immediately * in this case. Otherwise, they will spin (with ACQUIRE * semantics) until the lock is available without waiting in * the queue. */ rspin_until_writer_unlock(lock, cnts); return; } atomic_sub(_QR_BIAS, &lock->cnts); /* * Put the reader into the wait queue */ arch_spin_lock(&lock->lock); /* * The ACQUIRE semantics of the following spinning code ensure * that accesses can't leak upwards out of our subsequent critical * section in the case that the lock is currently held for write. */ cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS; rspin_until_writer_unlock(lock, cnts); /* * Signal the next one in queue to become queue head */ arch_spin_unlock(&lock->lock); }
void mm_physpage_free(int page) { mm_phys_page *p; int flags; p = &(mm_phys_pages.pages[page]); if(atomic_sub(&(p->refcount),1) > 0) return; flags = int_disable(); spinlock_grab(&mm_phys_pages_lock); p->next = mm_phys_pages.head; mm_phys_pages.head = p; mm_phys_pages.free_pagecount++; p->type = PHYSPAGE_FREE; p->refcount = 0; spinlock_release(&mm_phys_pages_lock); int_enable(flags); }
bool should_fail(struct fault_attr *attr, ssize_t size) { /* No need to check any other properties if the probability is 0 */ if (attr->probability == 0) return false; if (attr->task_filter && !fail_task(attr, current)) return false; if (atomic_read(&attr->times) == 0) return false; if (atomic_read(&attr->space) > size) { atomic_sub(size, &attr->space); return false; } if (attr->interval > 1) { attr->count++; if (attr->count % attr->interval) return false; } if (attr->probability <= prandom_u32() % 100) return false; if (!fail_stacktrace(attr)) return false; fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; }
/* * Read buffer destructor automatically called from kfree_skb. */ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; atomic_sub(skb->truesize, &sk->sk_rmem_alloc); }
static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb) { struct mpoa_client *mpc = find_mpc_by_vcc(vcc); struct k_message *mesg = (struct k_message*)skb->data; atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (mpc == NULL) { printk("mpoa: msg_from_mpoad: no mpc found\n"); return 0; } dprintk("mpoa: (%s) msg_from_mpoad:", (mpc->dev) ? mpc->dev->name : "<unknown>"); switch(mesg->type) { case MPOA_RES_REPLY_RCVD: dprintk(" mpoa_res_reply_rcvd\n"); MPOA_res_reply_rcvd(mesg, mpc); break; case MPOA_TRIGGER_RCVD: dprintk(" mpoa_trigger_rcvd\n"); MPOA_trigger_rcvd(mesg, mpc); break; case INGRESS_PURGE_RCVD: dprintk(" nhrp_purge_rcvd\n"); ingress_purge_rcvd(mesg, mpc); break; case EGRESS_PURGE_RCVD: dprintk(" egress_purge_reply_rcvd\n"); egress_purge_rcvd(mesg, mpc); break; case MPS_DEATH: dprintk(" mps_death\n"); mps_death(mesg, mpc); break; case CACHE_IMPOS_RCVD: dprintk(" cache_impos_rcvd\n"); MPOA_cache_impos_rcvd(mesg, mpc); break; case SET_MPC_CTRL_ADDR: dprintk(" set_mpc_ctrl_addr\n"); set_mpc_ctrl_addr_rcvd(mesg, mpc); break; case SET_MPS_MAC_ADDR: dprintk(" set_mps_mac_addr\n"); set_mps_mac_addr_rcvd(mesg, mpc); break; case CLEAN_UP_AND_EXIT: dprintk(" clean_up_and_exit\n"); clean_up(mesg, mpc, DIE); break; case RELOAD: dprintk(" reload\n"); clean_up(mesg, mpc, RELOAD); break; case SET_MPC_PARAMS: dprintk(" set_mpc_params\n"); mpc->parameters = mesg->content.params; break; default: dprintk(" unknown message %d\n", mesg->type); break; } kfree_skb(skb); return 0; }
/*** * rt_socket_common_ioctl */ int rt_socket_common_ioctl(struct rtdm_dev_context *context, int call_flags, int request, void *arg) { struct rtsocket *sock = (struct rtsocket *)&context->dev_private; int ret = 0; struct rtnet_callback *callback = arg; unsigned int rtskbs; unsigned long flags; switch (request) { case RTNET_RTIOC_PRIORITY: sock->priority = *(unsigned int *)arg; break; case RTNET_RTIOC_TIMEOUT: rtos_spin_lock_irqsave(&sock->param_lock, flags); rtos_nanosecs_to_time(*(nanosecs_t *)arg, &sock->timeout); rtos_spin_unlock_irqrestore(&sock->param_lock, flags); break; case RTNET_RTIOC_CALLBACK: if (test_bit(RTDM_USER_MODE_CALL, &context->context_flags)) return -EACCES; rtos_spin_lock_irqsave(&sock->param_lock, flags); sock->callback_func = callback->func; sock->callback_arg = callback->arg; rtos_spin_unlock_irqrestore(&sock->param_lock, flags); break; case RTNET_RTIOC_NONBLOCK: if (*(unsigned int *)arg != 0) set_bit(RT_SOCK_NONBLOCK, &context->context_flags); else clear_bit(RT_SOCK_NONBLOCK, &context->context_flags); break; case RTNET_RTIOC_EXTPOOL: rtskbs = *(unsigned int *)arg; rtos_spin_lock_irqsave(&sock->param_lock, flags); if (test_bit(SKB_POOL_CLOSED, &context->context_flags)) { rtos_spin_unlock_irqrestore(&sock->param_lock, flags); return -EBADF; } atomic_add(rtskbs, &sock->pool_size); rtos_spin_unlock_irqrestore(&sock->param_lock, flags); if (test_bit(RTDM_CREATED_IN_NRT, &context->context_flags)) { if (!(call_flags & RTDM_NRT_CALL)) return -EACCES; ret = rtskb_pool_extend(&sock->skb_pool, rtskbs); } else ret = rtskb_pool_extend_rt(&sock->skb_pool, rtskbs); atomic_sub(rtskbs-ret, &sock->pool_size); break; case RTNET_RTIOC_SHRPOOL: rtskbs = *(unsigned int *)arg; rtos_spin_lock_irqsave(&sock->param_lock, flags); if (test_bit(SKB_POOL_CLOSED, &context->context_flags)) { rtos_spin_unlock_irqrestore(&sock->param_lock, flags); return -EBADF; } atomic_sub(rtskbs, &sock->pool_size); rtos_spin_unlock_irqrestore(&sock->param_lock, flags); if (test_bit(RTDM_CREATED_IN_NRT, &context->context_flags)) { if (!(call_flags & RTDM_NRT_CALL)) return -EACCES; ret = rtskb_pool_shrink(&sock->skb_pool, *(unsigned int *)arg); } else ret = rtskb_pool_shrink_rt(&sock->skb_pool, *(unsigned int *)arg); atomic_add(rtskbs-ret, &sock->pool_size); break; default: ret = -EOPNOTSUPP; break; } return ret; }
/* * Free an option memory block. */ void sock_kfree_s(struct sock *sk, void *mem, int size) { kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); }
inline void ATOMIC_SUB(ATOMIC_T *v, int i) { atomic_sub(i, v); }
inline void atomic_dec(atomic_t *v) { atomic_sub(1, v); }
static int aio_event_thread(void *data) { struct aio_threadinfo *tinfo = data; struct aio_output *output = tinfo->output; struct aio_threadinfo *other = &output->tinfo[2]; int err = -ENOMEM; MARS_DBG("event thread has started.\n"); //set_user_nice(current, -20); use_fake_mm(); if (!current->mm) goto err; err = aio_start_thread(output, &output->tinfo[2], aio_sync_thread, 'y'); if (unlikely(err < 0)) goto err; while (!brick_thread_should_stop() || atomic_read(&tinfo->queued_sum) > 0) { mm_segment_t oldfs; int count; int i; struct timespec timeout = { .tv_sec = 1, }; struct io_event events[MARS_MAX_AIO_READ]; oldfs = get_fs(); set_fs(get_ds()); /* TODO: don't timeout upon termination. * Probably we should submit a dummy request. */ count = sys_io_getevents(output->ctxp, 1, MARS_MAX_AIO_READ, events, &timeout); set_fs(oldfs); if (likely(count > 0)) { atomic_sub(count, &output->submit_count); } for (i = 0; i < count; i++) { struct aio_mref_aspect *mref_a = (void*)events[i].data; struct mref_object *mref; int err = events[i].res; if (!mref_a) { continue; // this was a dummy request } mref = mref_a->object; MARS_IO("AIO done %p pos = %lld len = %d rw = %d\n", mref, mref->ref_pos, mref->ref_len, mref->ref_rw); mapfree_set(output->mf, mref->ref_pos, mref->ref_pos + mref->ref_len); if (output->brick->o_fdsync && err >= 0 && mref->ref_rw != READ && !mref->ref_skip_sync && !mref_a->resubmit++) { // workaround for non-implemented AIO FSYNC operation if (output->mf && output->mf->mf_filp && output->mf->mf_filp->f_op && !output->mf->mf_filp->f_op->aio_fsync) { mars_trace(mref, "aio_fsync"); _enqueue(other, mref_a, mref->ref_prio, true); continue; } err = aio_submit(output, mref_a, true); if (likely(err >= 0)) continue; } _complete(output, mref_a, err); } } err = 0; err: MARS_DBG("event thread has stopped, err = %d\n", err); aio_stop_thread(output, 2, false); unuse_fake_mm(); tinfo->terminated = true; wake_up_interruptible_all(&tinfo->terminate_event); return err; } #if 1 /* This should go to fs/open.c (as long as vfs_submit() is not implemented) */ #include <linux/fdtable.h> void fd_uninstall(unsigned int fd) { struct files_struct *files = current->files; struct fdtable *fdt; MARS_DBG("fd = %d\n", fd); if (unlikely(fd < 0)) { MARS_ERR("bad fd = %d\n", fd); return; } spin_lock(&files->file_lock); fdt = files_fdtable(files); rcu_assign_pointer(fdt->fd[fd], NULL); spin_unlock(&files->file_lock); } EXPORT_SYMBOL(fd_uninstall); #endif static atomic_t ioctx_count = ATOMIC_INIT(0); static void _destroy_ioctx(struct aio_output *output) { if (unlikely(!output)) goto done; aio_stop_thread(output, 1, true); use_fake_mm(); if (likely(output->ctxp)) { mm_segment_t oldfs; int err; MARS_DBG("ioctx count = %d destroying %p\n", atomic_read(&ioctx_count), (void*)output->ctxp); oldfs = get_fs(); set_fs(get_ds()); err = sys_io_destroy(output->ctxp); set_fs(oldfs); atomic_dec(&ioctx_count); MARS_DBG("ioctx count = %d status = %d\n", atomic_read(&ioctx_count), err); output->ctxp = 0; } if (likely(output->fd >= 0)) { MARS_DBG("destroying fd %d\n", output->fd); fd_uninstall(output->fd); put_unused_fd(output->fd); output->fd = -1; } done: if (likely(current->mm)) { unuse_fake_mm(); } } static int _create_ioctx(struct aio_output *output) { struct file *file; mm_segment_t oldfs; int err = -EINVAL; CHECK_PTR_NULL(output, done); CHECK_PTR_NULL(output->mf, done); file = output->mf->mf_filp; CHECK_PTR_NULL(file, done); /* TODO: this is provisionary. We only need it for sys_io_submit() * which uses userspace concepts like file handles. * This should be accompanied by a future kernelsapce vfs_submit() or * do_submit() which currently does not exist :( */ err = get_unused_fd(); MARS_DBG("file %p '%s' new fd = %d\n", file, output->mf->mf_name, err); if (unlikely(err < 0)) { MARS_ERR("cannot get fd, err=%d\n", err); goto done; } output->fd = err; fd_install(err, file); use_fake_mm(); err = -ENOMEM; if (unlikely(!current->mm)) { MARS_ERR("cannot fake mm\n"); goto done; } MARS_DBG("ioctx count = %d old = %p\n", atomic_read(&ioctx_count), (void*)output->ctxp); output->ctxp = 0; oldfs = get_fs(); set_fs(get_ds()); err = sys_io_setup(MARS_MAX_AIO, &output->ctxp); set_fs(oldfs); if (likely(output->ctxp)) atomic_inc(&ioctx_count); MARS_DBG("ioctx count = %d new = %p status = %d\n", atomic_read(&ioctx_count), (void*)output->ctxp, err); if (unlikely(err < 0)) { MARS_ERR("io_setup failed, err=%d\n", err); goto done; } err = aio_start_thread(output, &output->tinfo[1], aio_event_thread, 'e'); if (unlikely(err < 0)) { MARS_ERR("could not start event thread\n"); goto done; } done: if (likely(current->mm)) { unuse_fake_mm(); } return err; } static int aio_submit_thread(void *data) { struct aio_threadinfo *tinfo = data; struct aio_output *output = tinfo->output; struct file *file; int err = -EINVAL; MARS_DBG("submit thread has started.\n"); file = output->mf->mf_filp; use_fake_mm(); while (!brick_thread_should_stop() || atomic_read(&output->read_count) + atomic_read(&output->write_count) + atomic_read(&tinfo->queued_sum) > 0) { struct aio_mref_aspect *mref_a; struct mref_object *mref; int sleeptime; int status; wait_event_interruptible_timeout( tinfo->event, atomic_read(&tinfo->queued_sum) > 0, HZ / 4); mref_a = _dequeue(tinfo); if (!mref_a) { continue; } mref = mref_a->object; status = -EINVAL; CHECK_PTR(mref, error); mapfree_set(output->mf, mref->ref_pos, -1); if (mref->ref_rw) { insert_dirty(output, mref_a); } // check for reads exactly at EOF (special case) if (mref->ref_pos == mref->ref_total_size && !mref->ref_rw && mref->ref_timeout > 0) { loff_t total_size = i_size_read(file->f_mapping->host); loff_t len = total_size - mref->ref_pos; if (len > 0) { mref->ref_total_size = total_size; mref->ref_len = len; } else { if (!mref_a->start_jiffies) { mref_a->start_jiffies = jiffies; } if ((long long)jiffies - mref_a->start_jiffies <= mref->ref_timeout) { if (atomic_read(&tinfo->queued_sum) <= 0) { atomic_inc(&output->total_msleep_count); brick_msleep(1000 * 4 / HZ); } _enqueue(tinfo, mref_a, MARS_PRIO_LOW, true); continue; } MARS_DBG("ENODATA %lld\n", len); _complete(output, mref_a, -ENODATA); continue; } } sleeptime = 1; for (;;) { status = aio_submit(output, mref_a, false); if (likely(status != -EAGAIN)) { break; } atomic_inc(&output->total_delay_count); brick_msleep(sleeptime); if (sleeptime < 100) { sleeptime++; } } error: if (unlikely(status < 0)) { MARS_IO("submit_count = %d status = %d\n", atomic_read(&output->submit_count), status); _complete_mref(output, mref, status); } } MARS_DBG("submit thread has stopped, status = %d.\n", err); if (likely(current->mm)) { unuse_fake_mm(); } tinfo->terminated = true; wake_up_interruptible_all(&tinfo->terminate_event); return err; } static int aio_get_info(struct aio_output *output, struct mars_info *info) { struct file *file; loff_t min; loff_t max; if (unlikely(!output || !output->mf || !(file = output->mf->mf_filp) || !file->f_mapping || !file->f_mapping->host)) return -EINVAL; info->tf_align = 1; info->tf_min_size = 1; /* Workaround for races in the page cache. * * It appears that concurrent reads and writes seem to * result in inconsistent reads in some very rare cases, due to * races. Sometimes, the inode claims that the file has been already * appended by a write operation, but the data has not actually hit * the page cache, such that a concurrent read gets NULL blocks. */ min = i_size_read(file->f_mapping->host); max = 0; if (!output->brick->is_static_device) { get_dirty(output, &min, &max); } info->current_size = min; MARS_DBG("determined file size = %lld\n", info->current_size); return 0; } //////////////// informational / statistics /////////////// static noinline char *aio_statistics(struct aio_brick *brick, int verbose) { struct aio_output *output = brick->outputs[0]; char *res = brick_string_alloc(4096); char *sync = NULL; int pos = 0; if (!res) return NULL; pos += report_timing(&timings[0], res + pos, 4096 - pos); pos += report_timing(&timings[1], res + pos, 4096 - pos); pos += report_timing(&timings[2], res + pos, 4096 - pos); snprintf(res + pos, 4096 - pos, "total " "reads = %d " "writes = %d " "allocs = %d " "submits = %d " "again = %d " "delays = %d " "msleeps = %d " "fdsyncs = %d " "fdsync_waits = %d " "map_free = %d | " "flying reads = %d " "writes = %d " "allocs = %d " "submits = %d " "q0 = %d " "q1 = %d " "q2 = %d " "| total " "q0 = %d " "q1 = %d " "q2 = %d " "%s\n", atomic_read(&output->total_read_count), atomic_read(&output->total_write_count), atomic_read(&output->total_alloc_count), atomic_read(&output->total_submit_count), atomic_read(&output->total_again_count), atomic_read(&output->total_delay_count), atomic_read(&output->total_msleep_count), atomic_read(&output->total_fdsync_count), atomic_read(&output->total_fdsync_wait_count), atomic_read(&output->total_mapfree_count), atomic_read(&output->read_count), atomic_read(&output->write_count), atomic_read(&output->alloc_count), atomic_read(&output->submit_count), atomic_read(&output->tinfo[0].queued_sum), atomic_read(&output->tinfo[1].queued_sum), atomic_read(&output->tinfo[2].queued_sum), atomic_read(&output->tinfo[0].total_enqueue_count), atomic_read(&output->tinfo[1].total_enqueue_count), atomic_read(&output->tinfo[2].total_enqueue_count), sync ? sync : ""); if (sync) brick_string_free(sync); return res; } static noinline void aio_reset_statistics(struct aio_brick *brick) { struct aio_output *output = brick->outputs[0]; int i; atomic_set(&output->total_read_count, 0); atomic_set(&output->total_write_count, 0); atomic_set(&output->total_alloc_count, 0); atomic_set(&output->total_submit_count, 0); atomic_set(&output->total_again_count, 0); atomic_set(&output->total_delay_count, 0); atomic_set(&output->total_msleep_count, 0); atomic_set(&output->total_fdsync_count, 0); atomic_set(&output->total_fdsync_wait_count, 0); atomic_set(&output->total_mapfree_count, 0); for (i = 0; i < 3; i++) { struct aio_threadinfo *tinfo = &output->tinfo[i]; atomic_set(&tinfo->total_enqueue_count, 0); } } //////////////// object / aspect constructors / destructors /////////////// static int aio_mref_aspect_init_fn(struct generic_aspect *_ini) { struct aio_mref_aspect *ini = (void*)_ini; INIT_LIST_HEAD(&ini->io_head); INIT_LIST_HEAD(&ini->dirty_head); return 0; } static void aio_mref_aspect_exit_fn(struct generic_aspect *_ini) { struct aio_mref_aspect *ini = (void*)_ini; CHECK_HEAD_EMPTY(&ini->dirty_head); CHECK_HEAD_EMPTY(&ini->io_head); } MARS_MAKE_STATICS(aio); ////////////////////// brick constructors / destructors //////////////////// static int aio_brick_construct(struct aio_brick *brick) { return 0; } static int aio_switch(struct aio_brick *brick) { static int index; struct aio_output *output = brick->outputs[0]; const char *path = output->brick->brick_path; int flags = O_RDWR | O_LARGEFILE; int status = 0; MARS_DBG("power.button = %d\n", brick->power.button); if (!brick->power.button) goto cleanup; if (brick->power.led_on || output->mf) goto done; mars_power_led_off((void*)brick, false); if (brick->o_creat) { flags |= O_CREAT; MARS_DBG("using O_CREAT on %s\n", path); } if (brick->o_direct) { flags |= O_DIRECT; MARS_DBG("using O_DIRECT on %s\n", path); } output->mf = mapfree_get(path, flags); if (unlikely(!output->mf)) { MARS_ERR("could not open file = '%s' flags = %d\n", path, flags); status = -ENOENT; goto err; } output->index = ++index; status = _create_ioctx(output); if (unlikely(status < 0)) { MARS_ERR("could not create ioctx, status = %d\n", status); goto err; } status = aio_start_thread(output, &output->tinfo[0], aio_submit_thread, 's'); if (unlikely(status < 0)) { MARS_ERR("could not start theads, status = %d\n", status); goto err; } MARS_DBG("opened file '%s'\n", path); mars_power_led_on((void*)brick, true); done: return 0; err: MARS_ERR("status = %d\n", status); cleanup: if (brick->power.led_off) { goto done; } mars_power_led_on((void*)brick, false); aio_stop_thread(output, 0, false); _destroy_ioctx(output); mars_power_led_off((void*)brick, (output->tinfo[0].thread == NULL && output->tinfo[1].thread == NULL && output->tinfo[2].thread == NULL)); MARS_DBG("switch off led_off = %d status = %d\n", brick->power.led_off, status); if (brick->power.led_off) { if (output->mf) { MARS_DBG("closing file = '%s'\n", output->mf->mf_name); mapfree_put(output->mf); output->mf = NULL; } } return status; } static int aio_output_construct(struct aio_output *output) { INIT_LIST_HEAD(&output->dirty_anchor); spin_lock_init(&output->dirty_lock); init_waitqueue_head(&output->fdsync_event); output->fd = -1; return 0; }
long do_msgrcv(int msqid, long *pmtype, void __user *mtext, size_t msgsz, long msgtyp, int msgflg) { struct msg_queue *msq; struct msg_msg *msg; int mode; struct ipc_namespace *ns; if (msqid < 0 || (long) msgsz < 0) return -EINVAL; mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); if (IS_ERR(msq)) return PTR_ERR(msq); for (;;) { struct msg_receiver msr_d; struct list_head *tmp; msg = ERR_PTR(-EACCES); if (ipcperms(&msq->q_perm, S_IRUGO)) goto out_unlock; msg = ERR_PTR(-EAGAIN); tmp = msq->q_messages.next; while (tmp != &msq->q_messages) { struct msg_msg *walk_msg; walk_msg = list_entry(tmp, struct msg_msg, m_list); if (testmsg(walk_msg, msgtyp, mode) && !security_msg_queue_msgrcv(msq, walk_msg, current, msgtyp, mode)) { msg = walk_msg; if (mode == SEARCH_LESSEQUAL && walk_msg->m_type != 1) { msg = walk_msg; msgtyp = walk_msg->m_type - 1; } else { msg = walk_msg; break; } } tmp = tmp->next; } if (!IS_ERR(msg)) { /* * Found a suitable message. * Unlink it from the queue. */ if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); goto out_unlock; } list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); msq->q_lrpid = task_tgid_vnr(current); msq->q_cbytes -= msg->m_ts; atomic_sub(msg->m_ts, &ns->msg_bytes); atomic_dec(&ns->msg_hdrs); ss_wakeup(&msq->q_senders, 0); msg_unlock(msq); break; } /* No message waiting. Wait for a message */ if (msgflg & IPC_NOWAIT) { msg = ERR_PTR(-ENOMSG); goto out_unlock; } list_add_tail(&msr_d.r_list, &msq->q_receivers); msr_d.r_tsk = current; msr_d.r_msgtype = msgtyp; msr_d.r_mode = mode; if (msgflg & MSG_NOERROR) msr_d.r_maxsize = INT_MAX; else msr_d.r_maxsize = msgsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; msg_unlock(msq); schedule(); /* Lockless receive, part 1: * Disable preemption. We don't hold a reference to the queue * and getting a reference would defeat the idea of a lockless * operation, thus the code relies on rcu to guarantee the * existance of msq: * Prior to destruction, expunge_all(-EIRDM) changes r_msg. * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. * rcu_read_lock() prevents preemption between reading r_msg * and the spin_lock() inside ipc_lock_by_ptr(). */ rcu_read_lock(); /* Lockless receive, part 2: * Wait until pipelined_send or expunge_all are outside of * wake_up_process(). There is a race with exit(), see * ipc/mqueue.c for the details. */ msg = (struct msg_msg*)msr_d.r_msg; while (msg == NULL) { cpu_relax(); msg = (struct msg_msg *)msr_d.r_msg; } /* Lockless receive, part 3: * If there is a message or an error then accept it without * locking. */ if (msg != ERR_PTR(-EAGAIN)) { rcu_read_unlock(); break; } /* Lockless receive, part 3: * Acquire the queue spinlock. */ ipc_lock_by_ptr(&msq->q_perm); rcu_read_unlock(); /* Lockless receive, part 4: * Repeat test after acquiring the spinlock. */ msg = (struct msg_msg*)msr_d.r_msg; if (msg != ERR_PTR(-EAGAIN)) goto out_unlock; list_del(&msr_d.r_list); if (signal_pending(current)) { msg = ERR_PTR(-ERESTARTNOHAND); out_unlock: msg_unlock(msq); break; } } if (IS_ERR(msg)) return PTR_ERR(msg); msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; *pmtype = msg->m_type; if (store_msg(mtext, msg, msgsz)) msgsz = -EFAULT; free_msg(msg); return msgsz; }
static __inline__ void frag_kfree_s(void *ptr, int len) { atomic_sub(len, &ip_frag_mem); kfree(ptr); }