Beispiel #1
0
/* If kernel subsystem is allowing eBPF programs to call this function,
 * inside its own verifier_ops->get_func_proto() callback it should return
 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
 *
 * Different map implementations will rely on rcu in map methods
 * lookup/update/delete, therefore eBPF programs must run under rcu lock
 * if program is allowed to access maps, so check rcu_read_lock_held in
 * all three functions.
 */
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
	WARN_ON_ONCE(!rcu_read_lock_held());
	return (unsigned long) map->ops->map_lookup_elem(map, key);
}
static enum htc_send_queue_result htc_try_send(struct htc_target *target,
					       struct htc_endpoint *ep,
					       struct list_head *txq)
{
	struct list_head send_queue;	/* temp queue to hold packets */
	struct htc_packet *packet, *tmp_pkt;
	struct ath6kl *ar = target->dev->ar;
	enum htc_send_full_action action;
	int tx_resources, overflow, txqueue_depth, i, good_pkts;
	u8 pipeid;

	ath6kl_dbg(ATH6KL_DBG_HTC, "%s: (queue:0x%p depth:%d)\n",
		   __func__, txq,
		   (txq == NULL) ? 0 : get_queue_depth(txq));

	/* init the local send queue */
	INIT_LIST_HEAD(&send_queue);

	/*
	 * txq equals to NULL means
	 * caller didn't provide a queue, just wants us to
	 * check queues and send
	 */
	if (txq != NULL) {
		if (list_empty(txq)) {
			/* empty queue */
			return HTC_SEND_QUEUE_DROP;
		}

		spin_lock_bh(&target->tx_lock);
		txqueue_depth = get_queue_depth(&ep->txq);
		spin_unlock_bh(&target->tx_lock);

		if (txqueue_depth >= ep->max_txq_depth) {
			/* we've already overflowed */
			overflow = get_queue_depth(txq);
		} else {
			/* get how much we will overflow by */
			overflow = txqueue_depth;
			overflow += get_queue_depth(txq);
			/* get how much we will overflow the TX queue by */
			overflow -= ep->max_txq_depth;
		}

		/* if overflow is negative or zero, we are okay */
		if (overflow > 0) {
			ath6kl_dbg(ATH6KL_DBG_HTC,
				   "%s: Endpoint %d, TX queue will overflow :%d, Tx Depth:%d, Max:%d\n",
				   __func__, ep->eid, overflow, txqueue_depth,
				   ep->max_txq_depth);
		}
		if ((overflow <= 0) ||
		    (ep->ep_cb.tx_full == NULL)) {
			/*
			 * all packets will fit or caller did not provide send
			 * full indication handler -- just move all of them
			 * to the local send_queue object
			 */
			list_splice_tail_init(txq, &send_queue);
		} else {
			good_pkts = get_queue_depth(txq) - overflow;
			if (good_pkts < 0) {
				WARN_ON_ONCE(1);
				return HTC_SEND_QUEUE_DROP;
			}

			/* we have overflowed, and a callback is provided */
			/* dequeue all non-overflow packets to the sendqueue */
			for (i = 0; i < good_pkts; i++) {
				/* pop off caller's queue */
				packet = list_first_entry(txq,
							  struct htc_packet,
							  list);
				list_del(&packet->list);
				/* insert into local queue */
				list_add_tail(&packet->list, &send_queue);
			}

			/*
			 * the caller's queue has all the packets that won't fit
			 * walk through the caller's queue and indicate each to
			 * the send full handler
			 */
			list_for_each_entry_safe(packet, tmp_pkt,
						 txq, list) {

				ath6kl_dbg(ATH6KL_DBG_HTC,
					   "%s: Indicat overflowed TX pkts: %p\n",
					   __func__, packet);
				action = ep->ep_cb.tx_full(ep->target, packet);
				if (action == HTC_SEND_FULL_DROP) {
					/* callback wants the packet dropped */
					ep->ep_st.tx_dropped += 1;

					/* leave this one in the caller's queue
					 * for cleanup */
				} else {
					/* callback wants to keep this packet,
					 * remove from caller's queue */
					list_del(&packet->list);
					/* put it in the send queue */
					list_add_tail(&packet->list,
						      &send_queue);
				}

			}

			if (list_empty(&send_queue)) {
				/* no packets made it in, caller will cleanup */
				return HTC_SEND_QUEUE_DROP;
			}
		}
	}
static void dwc3_ep0_xfernotready(struct dwc3 *dwc,
		const struct dwc3_event_depevt *event)
{
	dwc->setup_packet_pending = true;

	/*
	 * This part is very tricky: If we has just handled
	 * XferNotReady(Setup) and we're now expecting a
	 * XferComplete but, instead, we receive another
	 * XferNotReady(Setup), we should STALL and restart
	 * the state machine.
	 *
	 * In all other cases, we just continue waiting
	 * for the XferComplete event.
	 *
	 * We are a little bit unsafe here because we're
	 * not trying to ensure that last event was, indeed,
	 * XferNotReady(Setup).
	 *
	 * Still, we don't expect any condition where that
	 * should happen and, even if it does, it would be
	 * another error condition.
	 */
	if (dwc->ep0_next_event == DWC3_EP0_COMPLETE) {
		switch (event->status) {
		case DEPEVT_STATUS_CONTROL_SETUP:
			dev_vdbg(dwc->dev, "Unexpected XferNotReady(Setup)\n");
			dwc3_ep0_stall_and_restart(dwc);
			break;
		case DEPEVT_STATUS_CONTROL_DATA:
			/* FALLTHROUGH */
		case DEPEVT_STATUS_CONTROL_STATUS:
			/* FALLTHROUGH */
		default:
			dev_vdbg(dwc->dev, "waiting for XferComplete\n");
		}

		return;
	}

	switch (event->status) {
	case DEPEVT_STATUS_CONTROL_SETUP:
		dev_vdbg(dwc->dev, "Control Setup\n");

		dwc->ep0state = EP0_SETUP_PHASE;

		dwc3_ep0_do_control_setup(dwc, event);
		break;

	case DEPEVT_STATUS_CONTROL_DATA:
		dev_vdbg(dwc->dev, "Control Data\n");

		dwc->ep0state = EP0_DATA_PHASE;

		if (dwc->ep0_next_event != DWC3_EP0_NRDY_DATA) {
			dev_vdbg(dwc->dev, "Expected %d got %d\n",
					dwc->ep0_next_event,
					DWC3_EP0_NRDY_DATA);

			dwc3_ep0_stall_and_restart(dwc);
			return;
		}

		/*
		 * One of the possible error cases is when Host _does_
		 * request for Data Phase, but it does so on the wrong
		 * direction.
		 *
		 * Here, we already know ep0_next_event is DATA (see above),
		 * so we only need to check for direction.
		 */
		if (dwc->ep0_expect_in != event->endpoint_number) {
			dev_vdbg(dwc->dev, "Wrong direction for Data phase\n");
			dwc3_ep0_stall_and_restart(dwc);
			return;
		}

		dwc3_ep0_do_control_data(dwc, event);
		break;

	case DEPEVT_STATUS_CONTROL_STATUS:
		dev_vdbg(dwc->dev, "Control Status\n");

		dwc->ep0state = EP0_STATUS_PHASE;

		if (dwc->ep0_next_event != DWC3_EP0_NRDY_STATUS) {
			dev_vdbg(dwc->dev, "Expected %d got %d\n",
					dwc->ep0_next_event,
					DWC3_EP0_NRDY_STATUS);

			dwc3_ep0_stall_and_restart(dwc);
			return;
		}

		if (dwc->delayed_status) {
			WARN_ON_ONCE(event->endpoint_number != 1);
			dev_vdbg(dwc->dev, "Mass Storage delayed status\n");
			return;
		}

		dwc3_ep0_do_control_status(dwc, event->endpoint_number);
	}
}
Beispiel #4
0
/**
 * bio_alloc_bioset - allocate a bio for I/O
 * @gfp_mask:   the GFP_ mask given to the slab allocator
 * @nr_iovecs:	number of iovecs to pre-allocate
 * @bs:		the bio_set to allocate from.
 *
 * Description:
 *   If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
 *   backed by the @bs's mempool.
 *
 *   When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
 *   able to allocate a bio. This is due to the mempool guarantees. To make this
 *   work, callers must never allocate more than 1 bio at a time from this pool.
 *   Callers that need to allocate more than 1 bio must always submit the
 *   previously allocated bio for IO before attempting to allocate a new one.
 *   Failure to do so can cause deadlocks under memory pressure.
 *
 *   Note that when running under generic_make_request() (i.e. any block
 *   driver), bios are not submitted until after you return - see the code in
 *   generic_make_request() that converts recursion into iteration, to prevent
 *   stack overflows.
 *
 *   This would normally mean allocating multiple bios under
 *   generic_make_request() would be susceptible to deadlocks, but we have
 *   deadlock avoidance code that resubmits any blocked bios from a rescuer
 *   thread.
 *
 *   However, we do not guarantee forward progress for allocations from other
 *   mempools. Doing multiple allocations from the same mempool under
 *   generic_make_request() should be avoided - instead, use bio_set's front_pad
 *   for per bio allocations.
 *
 *   RETURNS:
 *   Pointer to new bio on success, NULL on failure.
 */
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
	gfp_t saved_gfp = gfp_mask;
	unsigned front_pad;
	unsigned inline_vecs;
	unsigned long idx = BIO_POOL_NONE;
	struct bio_vec *bvl = NULL;
	struct bio *bio;
	void *p;

	if (!bs) {
		if (nr_iovecs > UIO_MAXIOV)
			return NULL;

		p = kmalloc(sizeof(struct bio) +
			    nr_iovecs * sizeof(struct bio_vec),
			    gfp_mask);
		front_pad = 0;
		inline_vecs = nr_iovecs;
	} else {
		/* should not use nobvec bioset for nr_iovecs > 0 */
		if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
			return NULL;
		/*
		 * generic_make_request() converts recursion to iteration; this
		 * means if we're running beneath it, any bios we allocate and
		 * submit will not be submitted (and thus freed) until after we
		 * return.
		 *
		 * This exposes us to a potential deadlock if we allocate
		 * multiple bios from the same bio_set() while running
		 * underneath generic_make_request(). If we were to allocate
		 * multiple bios (say a stacking block driver that was splitting
		 * bios), we would deadlock if we exhausted the mempool's
		 * reserve.
		 *
		 * We solve this, and guarantee forward progress, with a rescuer
		 * workqueue per bio_set. If we go to allocate and there are
		 * bios on current->bio_list, we first try the allocation
		 * without __GFP_WAIT; if that fails, we punt those bios we
		 * would be blocking to the rescuer workqueue before we retry
		 * with the original gfp_flags.
		 */

		if (current->bio_list && !bio_list_empty(current->bio_list))
			gfp_mask &= ~__GFP_WAIT;

		p = mempool_alloc(bs->bio_pool, gfp_mask);
		if (!p && gfp_mask != saved_gfp) {
			punt_bios_to_rescuer(bs);
			gfp_mask = saved_gfp;
			p = mempool_alloc(bs->bio_pool, gfp_mask);
		}

		front_pad = bs->front_pad;
		inline_vecs = BIO_INLINE_VECS;
	}

	if (unlikely(!p))
		return NULL;

	bio = p + front_pad;
	bio_init(bio);

	if (nr_iovecs > inline_vecs) {
		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
		if (!bvl && gfp_mask != saved_gfp) {
			punt_bios_to_rescuer(bs);
			gfp_mask = saved_gfp;
			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
		}

		if (unlikely(!bvl))
			goto err_free;

		bio_set_flag(bio, BIO_OWNS_VEC);
	} else if (nr_iovecs) {
		bvl = bio->bi_inline_vecs;
	}

	bio->bi_pool = bs;
	bio->bi_flags |= idx << BIO_POOL_OFFSET;
	bio->bi_max_vecs = nr_iovecs;
	bio->bi_io_vec = bvl;
	return bio;

err_free:
	mempool_free(p, bs->bio_pool);
	return NULL;
}
Beispiel #5
0
/*
 * Process a backchannel RPC request that arrived over an existing
 * outbound connection
 */
int
bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
	       struct svc_rqst *rqstp)
{
	struct kvec	*argv = &rqstp->rq_arg.head[0];
	struct kvec	*resv = &rqstp->rq_res.head[0];
	struct rpc_task *task;
	int proc_error;
	int error;

	dprintk("svc: %s(%p)\n", __func__, req);

	/* Build the svc_rqst used by the common processing routine */
	rqstp->rq_xprt = serv->sv_bc_xprt;
	rqstp->rq_xid = req->rq_xid;
	rqstp->rq_prot = req->rq_xprt->prot;
	rqstp->rq_server = serv;

	rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));

	/* reset result send buffer "put" position */
	resv->iov_len = 0;

	if (rqstp->rq_prot != IPPROTO_TCP) {
		printk(KERN_ERR "No support for Non-TCP transports!\n");
		BUG();
	}

	/*
	 * Skip the next two words because they've already been
	 * processed in the transport
	 */
	svc_getu32(argv);	/* XID */
	svc_getnl(argv);	/* CALLDIR */

	/* Parse and execute the bc call */
	proc_error = svc_process_common(rqstp, argv, resv);

	atomic_inc(&req->rq_xprt->bc_free_slots);
	if (!proc_error) {
		/* Processing error: drop the request */
		xprt_free_bc_request(req);
		return 0;
	}

	/* Finally, send the reply synchronously */
	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
	task = rpc_run_bc_task(req);
	if (IS_ERR(task)) {
		error = PTR_ERR(task);
		goto out;
	}

	WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
	error = task->tk_status;
	rpc_put_task(task);

out:
	dprintk("svc: %s(), error=%d\n", __func__, error);
	return error;
}
Beispiel #6
0
/*
 * Special-case - softirqs can safely be enabled in
 * cond_resched_softirq(), or by __do_softirq(),
 * without processing still-pending softirqs:
 */
void _local_bh_enable(void)
{
    WARN_ON_ONCE(in_irq());
    __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
}
Beispiel #7
0
static void __kernfs_remove(struct kernfs_node *kn)
{
	struct kernfs_node *pos;

	lockdep_assert_held(&kernfs_mutex);

	/*
	 * Short-circuit if non-root @kn has already finished removal.
	 * This is for kernfs_remove_self() which plays with active ref
	 * after removal.
	 */
	if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
		return;

	pr_debug("kernfs %s: removing\n", kn->name);

	/* prevent any new usage under @kn by deactivating all nodes */
	pos = NULL;
	while ((pos = kernfs_next_descendant_post(pos, kn)))
		if (kernfs_active(pos))
			atomic_add(KN_DEACTIVATED_BIAS, &pos->active);

	/* deactivate and unlink the subtree node-by-node */
	do {
		pos = kernfs_leftmost_descendant(kn);

		/*
		 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
		 * base ref could have been put by someone else by the time
		 * the function returns.  Make sure it doesn't go away
		 * underneath us.
		 */
		kernfs_get(pos);

		/*
		 * Drain iff @kn was activated.  This avoids draining and
		 * its lockdep annotations for nodes which have never been
		 * activated and allows embedding kernfs_remove() in create
		 * error paths without worrying about draining.
		 */
		if (kn->flags & KERNFS_ACTIVATED)
			kernfs_drain(pos);
		else
			WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);

		/*
		 * kernfs_unlink_sibling() succeeds once per node.  Use it
		 * to decide who's responsible for cleanups.
		 */
		if (!pos->parent || kernfs_unlink_sibling(pos)) {
			struct kernfs_iattrs *ps_iattr =
				pos->parent ? pos->parent->iattr : NULL;

			/* update timestamps on the parent */
			if (ps_iattr) {
				ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
				ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
			}

			kernfs_put(pos);
		}

		kernfs_put(pos);
	} while (pos != kn);
}
Beispiel #8
0
int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
				  u16 timeout)
{
	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
	struct ieee80211_sub_if_data *sdata = sta->sdata;
	struct ieee80211_local *local = sdata->local;
	struct tid_ampdu_tx *tid_tx;
	int ret = 0;

	if (WARN(sta->reserved_tid == tid,
		 "Requested to start BA session on reserved tid=%d", tid))
		return -EINVAL;

	trace_api_start_tx_ba_session(pubsta, tid);

	if (WARN_ON_ONCE(!local->ops->ampdu_action))
		return -EINVAL;

	if ((tid >= IEEE80211_NUM_TIDS) ||
	    !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) ||
	    (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW))
		return -EINVAL;

	ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
	       pubsta->addr, tid);

	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
	    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
	    sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
	    sdata->vif.type != NL80211_IFTYPE_AP &&
	    sdata->vif.type != NL80211_IFTYPE_ADHOC)
		return -EINVAL;

	if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
		ht_dbg(sdata,
		       "BA sessions blocked - Denying BA session request %pM tid %d\n",
		       sta->sta.addr, tid);
		return -EINVAL;
	}

	/*
	 * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a
	 * member of an IBSS, and has no other existing Block Ack agreement
	 * with the recipient STA, then the initiating STA shall transmit a
	 * Probe Request frame to the recipient STA and shall not transmit an
	 * ADDBA Request frame unless it receives a Probe Response frame
	 * from the recipient within dot11ADDBAFailureTimeout.
	 *
	 * The probe request mechanism for ADDBA is currently not implemented,
	 * but we only build up Block Ack session with HT STAs. This information
	 * is set when we receive a bss info from a probe response or a beacon.
	 */
	if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC &&
	    !sta->sta.ht_cap.ht_supported) {
		ht_dbg(sdata,
		       "BA request denied - IBSS STA %pM does not advertise HT support\n",
		       pubsta->addr);
		return -EINVAL;
	}

	spin_lock_bh(&sta->lock);

	/* we have tried too many times, receiver does not want A-MPDU */
	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
		ret = -EBUSY;
		goto err_unlock_sta;
	}

	/*
	 * if we have tried more than HT_AGG_BURST_RETRIES times we
	 * will spread our requests in time to avoid stalling connection
	 * for too long
	 */
	if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES &&
	    time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] +
			HT_AGG_RETRIES_PERIOD)) {
		ht_dbg(sdata,
		       "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n",
		       sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid);
		ret = -EBUSY;
		goto err_unlock_sta;
	}

	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
	/* check if the TID is not in aggregation flow already */
	if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) {
		ht_dbg(sdata,
		       "BA request denied - session is not idle on %pM tid %u\n",
		       sta->sta.addr, tid);
		ret = -EAGAIN;
		goto err_unlock_sta;
	}

	/* prepare A-MPDU MLME for Tx aggregation */
	tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
	if (!tid_tx) {
		ret = -ENOMEM;
		goto err_unlock_sta;
	}

	skb_queue_head_init(&tid_tx->pending);
	__set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);

	tid_tx->timeout = timeout;

	/* response timer */
	tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
	tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
	init_timer(&tid_tx->addba_resp_timer);

	/* tx timer */
	tid_tx->session_timer.function = sta_tx_agg_session_timer_expired;
	tid_tx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
	init_timer_deferrable(&tid_tx->session_timer);

	/* assign a dialog token */
	sta->ampdu_mlme.dialog_token_allocator++;
	tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator;

	/*
	 * Finally, assign it to the start array; the work item will
	 * collect it and move it to the normal array.
	 */
	sta->ampdu_mlme.tid_start_tx[tid] = tid_tx;

	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);

	/* this flow continues off the work */
 err_unlock_sta:
	spin_unlock_bh(&sta->lock);
	return ret;
}
Beispiel #9
0
/**
 * get_vaddr_frames() - map virtual addresses to pfns
 * @start:	starting user address
 * @nr_frames:	number of pages / pfns from start to map
 * @gup_flags:	flags modifying lookup behaviour
 * @vec:	structure which receives pages / pfns of the addresses mapped.
 *		It should have space for at least nr_frames entries.
 *
 * This function maps virtual addresses from @start and fills @vec structure
 * with page frame numbers or page pointers to corresponding pages (choice
 * depends on the type of the vma underlying the virtual address). If @start
 * belongs to a normal vma, the function grabs reference to each of the pages
 * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't
 * touch page structures and the caller must make sure pfns aren't reused for
 * anything else while he is using them.
 *
 * The function returns number of pages mapped which may be less than
 * @nr_frames. In particular we stop mapping if there are more vmas of
 * different type underlying the specified range of virtual addresses.
 * When the function isn't able to map a single page, it returns error.
 *
 * This function takes care of grabbing mmap_sem as necessary.
 */
int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
		     unsigned int gup_flags, struct frame_vector *vec)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	int ret = 0;
	int err;
	int locked;

	if (nr_frames == 0)
		return 0;

	if (WARN_ON_ONCE(nr_frames > vec->nr_allocated))
		nr_frames = vec->nr_allocated;

	down_read(&mm->mmap_sem);
	locked = 1;
	vma = find_vma_intersection(mm, start, start + 1);
	if (!vma) {
		ret = -EFAULT;
		goto out;
	}

	/*
	 * While get_vaddr_frames() could be used for transient (kernel
	 * controlled lifetime) pinning of memory pages all current
	 * users establish long term (userspace controlled lifetime)
	 * page pinning. Treat get_vaddr_frames() like
	 * get_user_pages_longterm() and disallow it for filesystem-dax
	 * mappings.
	 */
	if (vma_is_fsdax(vma))
		return -EOPNOTSUPP;

	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
		vec->got_ref = true;
		vec->is_pfns = false;
		ret = get_user_pages_locked(start, nr_frames,
			gup_flags, (struct page **)(vec->ptrs), &locked);
		goto out;
	}

	vec->got_ref = false;
	vec->is_pfns = true;
	do {
		unsigned long *nums = frame_vector_pfns(vec);

		while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
			err = follow_pfn(vma, start, &nums[ret]);
			if (err) {
				if (ret == 0)
					ret = err;
				goto out;
			}
			start += PAGE_SIZE;
			ret++;
		}
		/*
		 * We stop if we have enough pages or if VMA doesn't completely
		 * cover the tail page.
		 */
		if (ret >= nr_frames || start < vma->vm_end)
			break;
		vma = find_vma_intersection(mm, start, start + 1);
	} while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP));
out:
	if (locked)
		up_read(&mm->mmap_sem);
	if (!ret)
		ret = -EFAULT;
	if (ret > 0)
		vec->nr_frames = ret;
	return ret;
}
Beispiel #10
0
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
			struct task_struct *tsk)
{
	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
	unsigned cpu = smp_processor_id();
	u64 next_tlb_gen;

	/*
	 * NB: The scheduler will call us with prev == next when switching
	 * from lazy TLB mode to normal mode if active_mm isn't changing.
	 * When this happens, we don't assume that CR3 (and hence
	 * cpu_tlbstate.loaded_mm) matches next.
	 *
	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
	 */

	/* We don't want flush_tlb_func_* to run concurrently with us. */
	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
		WARN_ON_ONCE(!irqs_disabled());

	/*
	 * Verify that CR3 is what we think it is.  This will catch
	 * hypothetical buggy code that directly switches to swapper_pg_dir
	 * without going through leave_mm() / switch_mm_irqs_off() or that
	 * does something like write_cr3(read_cr3_pa()).
	 *
	 * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
	 * isn't free.
	 */
#ifdef CONFIG_DEBUG_VM
	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
		/*
		 * If we were to BUG here, we'd be very likely to kill
		 * the system so hard that we don't see the call trace.
		 * Try to recover instead by ignoring the error and doing
		 * a global flush to minimize the chance of corruption.
		 *
		 * (This is far from being a fully correct recovery.
		 *  Architecturally, the CPU could prefetch something
		 *  back into an incorrect ASID slot and leave it there
		 *  to cause trouble down the road.  It's better than
		 *  nothing, though.)
		 */
		__flush_tlb_all();
	}
#endif
	this_cpu_write(cpu_tlbstate.is_lazy, false);

	if (real_prev == next) {
		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
			  next->context.ctx_id);

		/*
		 * We don't currently support having a real mm loaded without
		 * our cpu set in mm_cpumask().  We have all the bookkeeping
		 * in place to figure out whether we would need to flush
		 * if our cpu were cleared in mm_cpumask(), but we don't
		 * currently use it.
		 */
		if (WARN_ON_ONCE(real_prev != &init_mm &&
				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
			cpumask_set_cpu(cpu, mm_cpumask(next));

		return;
	} else {
		u16 new_asid;
		bool need_flush;

		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
			/*
			 * If our current stack is in vmalloc space and isn't
			 * mapped in the new pgd, we'll double-fault.  Forcibly
			 * map it.
			 */
			unsigned int index = pgd_index(current_stack_pointer);
			pgd_t *pgd = next->pgd + index;

			if (unlikely(pgd_none(*pgd)))
				set_pgd(pgd, init_mm.pgd[index]);
		}

		/* Stop remote flushes for the previous mm */
		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
				real_prev != &init_mm);
		cpumask_clear_cpu(cpu, mm_cpumask(real_prev));

		/*
		 * Start remote flushes and then read tlb_gen.
		 */
		cpumask_set_cpu(cpu, mm_cpumask(next));
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);

		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);

		if (need_flush) {
			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
			write_cr3(build_cr3(next, new_asid));
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
					TLB_FLUSH_ALL);
		} else {
			/* The new ASID is already up to date. */
			write_cr3(build_cr3_noflush(next, new_asid));
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
		}

		this_cpu_write(cpu_tlbstate.loaded_mm, next);
		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
	}

	load_mm_cr4(next);
	switch_ldt(real_prev, next);
}
Beispiel #11
0
int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
				    enum ieee80211_agg_stop_reason reason)
{
	struct ieee80211_local *local = sta->local;
	struct tid_ampdu_tx *tid_tx;
	enum ieee80211_ampdu_mlme_action action;
	int ret;

	lockdep_assert_held(&sta->ampdu_mlme.mtx);

	switch (reason) {
	case AGG_STOP_DECLINED:
	case AGG_STOP_LOCAL_REQUEST:
	case AGG_STOP_PEER_REQUEST:
		action = IEEE80211_AMPDU_TX_STOP_CONT;
		break;
	case AGG_STOP_DESTROY_STA:
		action = IEEE80211_AMPDU_TX_STOP_FLUSH;
		break;
	default:
		WARN_ON_ONCE(1);
		return -EINVAL;
	}

	spin_lock_bh(&sta->lock);

	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
	if (!tid_tx) {
		spin_unlock_bh(&sta->lock);
		return -ENOENT;
	}

	/*
	 * if we're already stopping ignore any new requests to stop
	 * unless we're destroying it in which case notify the driver
	 */
	if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
		spin_unlock_bh(&sta->lock);
		if (reason != AGG_STOP_DESTROY_STA)
			return -EALREADY;
		ret = drv_ampdu_action(local, sta->sdata,
				       IEEE80211_AMPDU_TX_STOP_FLUSH_CONT,
				       &sta->sta, tid, NULL, 0);
		WARN_ON_ONCE(ret);
		return 0;
	}

	if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
		/* not even started yet! */
		ieee80211_assign_tid_tx(sta, tid, NULL);
		spin_unlock_bh(&sta->lock);
		kfree_rcu(tid_tx, rcu_head);
		return 0;
	}

	set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);

	spin_unlock_bh(&sta->lock);

	ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n",
	       sta->sta.addr, tid);

	del_timer_sync(&tid_tx->addba_resp_timer);
	del_timer_sync(&tid_tx->session_timer);

	/*
	 * After this packets are no longer handed right through
	 * to the driver but are put onto tid_tx->pending instead,
	 * with locking to ensure proper access.
	 */
	clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);

	/*
	 * There might be a few packets being processed right now (on
	 * another CPU) that have already gotten past the aggregation
	 * check when it was still OPERATIONAL and consequently have
	 * IEEE80211_TX_CTL_AMPDU set. In that case, this code might
	 * call into the driver at the same time or even before the
	 * TX paths calls into it, which could confuse the driver.
	 *
	 * Wait for all currently running TX paths to finish before
	 * telling the driver. New packets will not go through since
	 * the aggregation session is no longer OPERATIONAL.
	 */
	synchronize_net();

	tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ?
					WLAN_BACK_RECIPIENT :
					WLAN_BACK_INITIATOR;
	tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;

	ret = drv_ampdu_action(local, sta->sdata, action,
			       &sta->sta, tid, NULL, 0);

	/* HW shall not deny going back to legacy */
	if (WARN_ON(ret)) {
		/*
		 * We may have pending packets get stuck in this case...
		 * Not bothering with a workaround for now.
		 */
	}

	/*
	 * In the case of AGG_STOP_DESTROY_STA, the driver won't
	 * necessarily call ieee80211_stop_tx_ba_cb(), so this may
	 * seem like we can leave the tid_tx data pending forever.
	 * This is true, in a way, but "forever" is only until the
	 * station struct is actually destroyed. In the meantime,
	 * leaving it around ensures that we don't transmit packets
	 * to the driver on this TID which might confuse it.
	 */

	return 0;
}
Beispiel #12
0
/*
 * flush_tlb_func_common()'s memory ordering requirement is that any
 * TLB fills that happen after we flush the TLB are ordered after we
 * read active_mm's tlb_gen.  We don't need any explicit barriers
 * because all x86 flush operations are serializing and the
 * atomic64_read operation won't be reordered by the compiler.
 */
static void flush_tlb_func_common(const struct flush_tlb_info *f,
				  bool local, enum tlb_flush_reason reason)
{
	/*
	 * We have three different tlb_gen values in here.  They are:
	 *
	 * - mm_tlb_gen:     the latest generation.
	 * - local_tlb_gen:  the generation that this CPU has already caught
	 *                   up to.
	 * - f->new_tlb_gen: the generation that the requester of the flush
	 *                   wants us to catch up to.
	 */
	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);

	/* This code cannot presently handle being reentered. */
	VM_WARN_ON(!irqs_disabled());

	if (unlikely(loaded_mm == &init_mm))
		return;

	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
		   loaded_mm->context.ctx_id);

	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
		/*
		 * We're in lazy mode.  We need to at least flush our
		 * paging-structure cache to avoid speculatively reading
		 * garbage into our TLB.  Since switching to init_mm is barely
		 * slower than a minimal flush, just switch to init_mm.
		 */
		switch_mm_irqs_off(NULL, &init_mm, NULL);
		return;
	}

	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
		/*
		 * There's nothing to do: we're already up to date.  This can
		 * happen if two concurrent flushes happen -- the first flush to
		 * be handled can catch us all the way up, leaving no work for
		 * the second flush.
		 */
		trace_tlb_flush(reason, 0);
		return;
	}

	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);

	/*
	 * If we get to this point, we know that our TLB is out of date.
	 * This does not strictly imply that we need to flush (it's
	 * possible that f->new_tlb_gen <= local_tlb_gen), but we're
	 * going to need to flush in the very near future, so we might
	 * as well get it over with.
	 *
	 * The only question is whether to do a full or partial flush.
	 *
	 * We do a partial flush if requested and two extra conditions
	 * are met:
	 *
	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that
	 *    we've always done all needed flushes to catch up to
	 *    local_tlb_gen.  If, for example, local_tlb_gen == 2 and
	 *    f->new_tlb_gen == 3, then we know that the flush needed to bring
	 *    us up to date for tlb_gen 3 is the partial flush we're
	 *    processing.
	 *
	 *    As an example of why this check is needed, suppose that there
	 *    are two concurrent flushes.  The first is a full flush that
	 *    changes context.tlb_gen from 1 to 2.  The second is a partial
	 *    flush that changes context.tlb_gen from 2 to 3.  If they get
	 *    processed on this CPU in reverse order, we'll see
	 *     local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
	 *    If we were to use __flush_tlb_single() and set local_tlb_gen to
	 *    3, we'd be break the invariant: we'd update local_tlb_gen above
	 *    1 without the full flush that's needed for tlb_gen 2.
	 *
	 * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimiation.
	 *    Partial TLB flushes are not all that much cheaper than full TLB
	 *    flushes, so it seems unlikely that it would be a performance win
	 *    to do a partial flush if that won't bring our TLB fully up to
	 *    date.  By doing a full flush instead, we can increase
	 *    local_tlb_gen all the way to mm_tlb_gen and we can probably
	 *    avoid another flush in the very near future.
	 */
	if (f->end != TLB_FLUSH_ALL &&
	    f->new_tlb_gen == local_tlb_gen + 1 &&
	    f->new_tlb_gen == mm_tlb_gen) {
		/* Partial flush */
		unsigned long addr;
		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;

		addr = f->start;
		while (addr < f->end) {
			__flush_tlb_single(addr);
			addr += PAGE_SIZE;
		}
		if (local)
			count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
		trace_tlb_flush(reason, nr_pages);
	} else {
Beispiel #13
0
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
	WARN_ON_ONCE(!rcu_read_lock_held());
	return map->ops->map_delete_elem(map, key);
}
Beispiel #14
0
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
	   void *, value, u64, flags)
{
	WARN_ON_ONCE(!rcu_read_lock_held());
	return map->ops->map_update_elem(map, key, value, flags);
}
Beispiel #15
0
static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
			  struct hrtimer_clock_base *base,
			  struct hrtimer *timer, ktime_t *now)
{
	enum hrtimer_restart (*fn)(struct hrtimer *);
	int restart;

	lockdep_assert_held(&cpu_base->lock);

	debug_deactivate(timer);
	cpu_base->running = timer;

	/*
	 * Separate the ->running assignment from the ->state assignment.
	 *
	 * As with a regular write barrier, this ensures the read side in
	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
	 * timer->state == INACTIVE.
	 */
	raw_write_seqcount_barrier(&cpu_base->seq);

	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
	timer_stats_account_hrtimer(timer);
	fn = timer->function;

	/*
	 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
	 * timer is restarted with a period then it becomes an absolute
	 * timer. If its not restarted it does not matter.
	 */
	if (IS_ENABLED(CONFIG_TIME_LOW_RES))
		timer->is_rel = false;

	/*
	 * Because we run timers from hardirq context, there is no chance
	 * they get migrated to another cpu, therefore its safe to unlock
	 * the timer base.
	 */
	raw_spin_unlock(&cpu_base->lock);
	trace_hrtimer_expire_entry(timer, now);
	restart = fn(timer);
	trace_hrtimer_expire_exit(timer);
	raw_spin_lock(&cpu_base->lock);

	/*
	 * Note: We clear the running state after enqueue_hrtimer and
	 * we do not reprogramm the event hardware. Happens either in
	 * hrtimer_start_range_ns() or in hrtimer_interrupt()
	 *
	 * Note: Because we dropped the cpu_base->lock above,
	 * hrtimer_start_range_ns() can have popped in and enqueued the timer
	 * for us already.
	 */
	if (restart != HRTIMER_NORESTART &&
	    !(timer->state & HRTIMER_STATE_ENQUEUED))
		enqueue_hrtimer(timer, base);

	/*
	 * Separate the ->running assignment from the ->state assignment.
	 *
	 * As with a regular write barrier, this ensures the read side in
	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
	 * timer->state == INACTIVE.
	 */
	raw_write_seqcount_barrier(&cpu_base->seq);

	WARN_ON_ONCE(cpu_base->running != timer);
	cpu_base->running = NULL;
}
Beispiel #16
0
/* Firmware functions */
static char *rt2800soc_get_firmware_name(struct rt2x00_dev *rt2x00dev)
{
	WARN_ON_ONCE(1);
	return NULL;
}
static void cpufreq_interactive_timer(unsigned long data)
{
	u64 now;
	unsigned int delta_time;
	u64 cputime_speedadj;
	int cpu_load;
	struct cpufreq_interactive_cpuinfo *pcpu =
		&per_cpu(cpuinfo, data);
	struct cpufreq_interactive_tunables *tunables =
		pcpu->policy->governor_data;
	unsigned int new_freq;
	unsigned int loadadjfreq;
	unsigned int index;
	unsigned long flags;
	bool boosted;
	unsigned long mod_min_sample_time;
	int i, max_load;
	unsigned int max_freq;
	struct cpufreq_interactive_cpuinfo *picpu;

	if (!down_read_trylock(&pcpu->enable_sem))
		return;
	if (!pcpu->governor_enabled)
		goto exit;

	spin_lock_irqsave(&pcpu->load_lock, flags);
	now = update_load(data);
	delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
	cputime_speedadj = pcpu->cputime_speedadj;
	spin_unlock_irqrestore(&pcpu->load_lock, flags);

	if (WARN_ON_ONCE(!delta_time))
		goto rearm;

	do_div(cputime_speedadj, delta_time);
	loadadjfreq = (unsigned int)cputime_speedadj * 100;
	cpu_load = loadadjfreq / pcpu->policy->cur;
	pcpu->prev_load = cpu_load;
	boosted = tunables->boost_val || now < tunables->boostpulse_endtime;

	if (cpu_load >= tunables->go_hispeed_load || boosted) {
		if (pcpu->target_freq < tunables->hispeed_freq) {
			new_freq = tunables->hispeed_freq;
		} else {
			new_freq = choose_freq(pcpu, loadadjfreq);

			if (new_freq < tunables->hispeed_freq)
				new_freq = tunables->hispeed_freq;
		}
	} else {
		new_freq = choose_freq(pcpu, loadadjfreq);

		if (tunables->sync_freq && new_freq < tunables->sync_freq) {

			max_load = 0;
			max_freq = 0;

			for_each_online_cpu(i) {
				picpu = &per_cpu(cpuinfo, i);

				if (i == data || picpu->prev_load <
					tunables->up_threshold_any_cpu_load)
					continue;

				max_load = max(max_load, picpu->prev_load);
				max_freq = max(max_freq, picpu->target_freq);
			}

			if (max_freq > tunables->up_threshold_any_cpu_freq &&
			    max_load >= tunables->up_threshold_any_cpu_load)
				new_freq = tunables->sync_freq;
		}
	}
Beispiel #18
0
static int rt2800soc_write_firmware(struct rt2x00_dev *rt2x00dev,
				    const u8 *data, const size_t len)
{
	WARN_ON_ONCE(1);
	return 0;
}
Beispiel #19
0
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
    unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
    unsigned long old_flags = current->flags;
    int max_restart = MAX_SOFTIRQ_RESTART;
    struct softirq_action *h;
    bool in_hardirq;
    __u32 pending;
    int softirq_bit;

    /*
     * Mask out PF_MEMALLOC s current task context is borrowed for the
     * softirq. A softirq handled such as network RX might set PF_MEMALLOC
     * again if the socket is related to swap
     */
    current->flags &= ~PF_MEMALLOC;

    pending = local_softirq_pending();
    account_irq_enter_time(current);

    __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
    in_hardirq = lockdep_softirq_start();

restart:
    /* Reset the pending bitmask before enabling irqs */
    set_softirq_pending(0);

    local_irq_enable();

    h = softirq_vec;

    while ((softirq_bit = ffs(pending))) {
        unsigned int vec_nr;
        int prev_count;

        h += softirq_bit - 1;

        vec_nr = h - softirq_vec;
        prev_count = preempt_count();

        kstat_incr_softirqs_this_cpu(vec_nr);

        trace_softirq_entry(vec_nr);
        h->action(h);
        trace_softirq_exit(vec_nr);
        if (unlikely(prev_count != preempt_count())) {
            pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
                   vec_nr, softirq_to_name[vec_nr], h->action,
                   prev_count, preempt_count());
            preempt_count_set(prev_count);
        }
        h++;
        pending >>= softirq_bit;
    }

    rcu_bh_qs();
    local_irq_disable();

    pending = local_softirq_pending();
    if (pending) {
        if (time_before(jiffies, end) && !need_resched() &&
                --max_restart)
            goto restart;

        wakeup_softirqd();
    }

    lockdep_softirq_end(in_hardirq);
    account_irq_exit_time(current);
    __local_bh_enable(SOFTIRQ_OFFSET);
    WARN_ON_ONCE(in_interrupt());
    tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}
static int
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
{
	unsigned long head, space, next_space, pad, gap, skip, wakeup;
	unsigned int next_buf;
	struct bts_phys *phys, *next_phys;
	int ret;

	if (buf->snapshot)
		return 0;

	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
	if (WARN_ON_ONCE(head != local_read(&buf->head)))
		return -EINVAL;

	phys = &buf->buf[buf->cur_buf];
	space = phys->offset + phys->displacement + phys->size - head;
	pad = space;
	if (space > handle->size) {
		space = handle->size;
		space -= space % BTS_RECORD_SIZE;
	}
	if (space <= BTS_SAFETY_MARGIN) {
		/* See if next phys buffer has more space */
		next_buf = buf->cur_buf + 1;
		if (next_buf >= buf->nr_bufs)
			next_buf = 0;
		next_phys = &buf->buf[next_buf];
		gap = buf_size(phys->page) - phys->displacement - phys->size +
		      next_phys->displacement;
		skip = pad + gap;
		if (handle->size >= skip) {
			next_space = next_phys->size;
			if (next_space + skip > handle->size) {
				next_space = handle->size - skip;
				next_space -= next_space % BTS_RECORD_SIZE;
			}
			if (next_space > space || !space) {
				if (pad)
					bts_buffer_pad_out(phys, head);
				ret = perf_aux_output_skip(handle, skip);
				if (ret)
					return ret;
				/* Advance to next phys buffer */
				phys = next_phys;
				space = next_space;
				head = phys->offset + phys->displacement;
				/*
				 * After this, cur_buf and head won't match ds
				 * anymore, so we must not be racing with
				 * bts_update().
				 */
				buf->cur_buf = next_buf;
				local_set(&buf->head, head);
			}
		}
	}

	/* Don't go far beyond wakeup watermark */
	wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
		 handle->head;
	if (space > wakeup) {
		space = wakeup;
		space -= space % BTS_RECORD_SIZE;
	}

	buf->end = head + space;

	/*
	 * If we have no space, the lost notification would have been sent when
	 * we hit absolute_maximum - see bts_update()
	 */
	if (!space)
		return -ENOSPC;

	return 0;
}
Beispiel #21
0
static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
                               enum set_key_cmd cmd,
                               struct ieee80211_vif *vif,
                               struct ieee80211_sta *sta,
                               struct ieee80211_key_conf *key)
{
    struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
    int ret;

    if (iwlwifi_mod_params.sw_crypto) {
        IWL_DEBUG_MAC80211(mvm, "leave - hwcrypto disabled\n");
        return -EOPNOTSUPP;
    }

    switch (key->cipher) {
    case WLAN_CIPHER_SUITE_TKIP:
        key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC;
    /* fall-through */
    case WLAN_CIPHER_SUITE_CCMP:
        key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
        break;
    case WLAN_CIPHER_SUITE_AES_CMAC:
        WARN_ON_ONCE(!(hw->flags & IEEE80211_HW_MFP_CAPABLE));
        break;
    case WLAN_CIPHER_SUITE_WEP40:
    case WLAN_CIPHER_SUITE_WEP104:
        /*
         * Support for TX only, at least for now, so accept
         * the key and do nothing else. Then mac80211 will
         * pass it for TX but we don't have to use it for RX.
         */
        return 0;
    default:
        return -EOPNOTSUPP;
    }

    mutex_lock(&mvm->mutex);

    switch (cmd) {
    case SET_KEY:
        IWL_DEBUG_MAC80211(mvm, "set hwcrypto key\n");
        ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, false);
        if (ret) {
            IWL_WARN(mvm, "set key failed\n");
            /*
             * can't add key for RX, but we don't need it
             * in the device for TX so still return 0
             */
            ret = 0;
        }

        break;
    case DISABLE_KEY:
        IWL_DEBUG_MAC80211(mvm, "disable hwcrypto key\n");
        ret = iwl_mvm_remove_sta_key(mvm, vif, sta, key);
        break;
    default:
        ret = -EINVAL;
    }

    mutex_unlock(&mvm->mutex);
    return ret;
}
Beispiel #22
0
/*
 * Issue a new request to a device.
 */
void do_ide_request(struct request_queue *q)
{
	ide_drive_t	*drive = q->queuedata;
	ide_hwif_t	*hwif = drive->hwif;
	struct ide_host *host = hwif->host;
	struct request	*rq = NULL;
	ide_startstop_t	startstop;
	unsigned long queue_run_ms = 3; /* old plug delay */

	spin_unlock_irq(q->queue_lock);

	/* HLD do_request() callback might sleep, make sure it's okay */
	might_sleep();

	if (ide_lock_host(host, hwif))
		goto plug_device_2;

	spin_lock_irq(&hwif->lock);

	if (!ide_lock_port(hwif)) {
		ide_hwif_t *prev_port;

		WARN_ON_ONCE(hwif->rq);
repeat:
		prev_port = hwif->host->cur_port;
		if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
		    time_after(drive->sleep, jiffies)) {
			unsigned long left = jiffies - drive->sleep;

			queue_run_ms = jiffies_to_msecs(left + 1);
			ide_unlock_port(hwif);
			goto plug_device;
		}

		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
		    hwif != prev_port) {
			ide_drive_t *cur_dev =
				prev_port ? prev_port->cur_dev : NULL;

			/*
			 * set nIEN for previous port, drives in the
			 * quirk list may not like intr setups/cleanups
			 */
			if (cur_dev &&
			    (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
				prev_port->tp_ops->write_devctl(prev_port,
								ATA_NIEN |
								ATA_DEVCTL_OBS);

			hwif->host->cur_port = hwif;
		}
		hwif->cur_dev = drive;
		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);

		spin_unlock_irq(&hwif->lock);
		spin_lock_irq(q->queue_lock);
		/*
		 * we know that the queue isn't empty, but this can happen
		 * if the q->prep_rq_fn() decides to kill a request
		 */
		if (!rq)
			rq = blk_fetch_request(drive->queue);

		spin_unlock_irq(q->queue_lock);
		spin_lock_irq(&hwif->lock);

		if (!rq) {
			ide_unlock_port(hwif);
			goto out;
		}

		/*
		 * Sanity: don't accept a request that isn't a PM request
		 * if we are currently power managed. This is very important as
		 * blk_stop_queue() doesn't prevent the blk_fetch_request()
		 * above to return us whatever is in the queue. Since we call
		 * ide_do_request() ourselves, we end up taking requests while
		 * the queue is blocked...
		 * 
		 * We let requests forced at head of queue with ide-preempt
		 * though. I hope that doesn't happen too much, hopefully not
		 * unless the subdriver triggers such a thing in its own PM
		 * state machine.
		 */
		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
		    blk_pm_request(rq) == 0 &&
		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
			/* there should be no pending command at this point */
			ide_unlock_port(hwif);
			goto plug_device;
		}

		hwif->rq = rq;

		spin_unlock_irq(&hwif->lock);
		startstop = start_request(drive, rq);
		spin_lock_irq(&hwif->lock);

		if (startstop == ide_stopped) {
			rq = hwif->rq;
			hwif->rq = NULL;
			goto repeat;
		}
	} else
		goto plug_device;
out:
	spin_unlock_irq(&hwif->lock);
	if (rq == NULL)
		ide_unlock_host(host);
	spin_lock_irq(q->queue_lock);
	return;

plug_device:
	spin_unlock_irq(&hwif->lock);
	ide_unlock_host(host);
plug_device_2:
	spin_lock_irq(q->queue_lock);
	__ide_requeue_and_plug(q, rq);
}
struct ieee80211_regdomain *
iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
		       int num_of_ch, __le32 *channels, u16 fw_mcc)
{
	int ch_idx;
	u16 ch_flags, prev_ch_flags = 0;
	const u8 *nvm_chan = cfg->device_family == IWL_DEVICE_FAMILY_8000 ?
			     iwl_nvm_channels_family_8000 : iwl_nvm_channels;
	struct ieee80211_regdomain *regd;
	int size_of_regd;
	struct ieee80211_reg_rule *rule;
	enum ieee80211_band band;
	int center_freq, prev_center_freq = 0;
	int valid_rules = 0;
	bool new_rule;
	int max_num_ch = cfg->device_family == IWL_DEVICE_FAMILY_8000 ?
			 IWL_NUM_CHANNELS_FAMILY_8000 : IWL_NUM_CHANNELS;

	if (WARN_ON_ONCE(num_of_ch > NL80211_MAX_SUPP_REG_RULES))
		return ERR_PTR(-EINVAL);

	if (WARN_ON(num_of_ch > max_num_ch))
		num_of_ch = max_num_ch;

	IWL_DEBUG_DEV(dev, IWL_DL_LAR, "building regdom for %d channels\n",
		      num_of_ch);

	/* build a regdomain rule for every valid channel */
	size_of_regd =
		sizeof(struct ieee80211_regdomain) +
		num_of_ch * sizeof(struct ieee80211_reg_rule);

	regd = kzalloc(size_of_regd, GFP_KERNEL);
	if (!regd)
		return ERR_PTR(-ENOMEM);

	for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
		ch_flags = (u16)__le32_to_cpup(channels + ch_idx);
		band = (ch_idx < NUM_2GHZ_CHANNELS) ?
		       IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ;
		center_freq = ieee80211_channel_to_frequency(nvm_chan[ch_idx],
							     band);
		new_rule = false;

		if (!(ch_flags & NVM_CHANNEL_VALID)) {
			IWL_DEBUG_DEV(dev, IWL_DL_LAR,
				      "Ch. %d Flags %x [%sGHz] - No traffic\n",
				      nvm_chan[ch_idx],
				      ch_flags,
				      (ch_idx >= NUM_2GHZ_CHANNELS) ?
				      "5.2" : "2.4");
			continue;
		}

		/* we can't continue the same rule */
		if (ch_idx == 0 || prev_ch_flags != ch_flags ||
		    center_freq - prev_center_freq > 20) {
			valid_rules++;
			new_rule = true;
		}

		rule = &regd->reg_rules[valid_rules - 1];

		if (new_rule)
			rule->freq_range.start_freq_khz =
						MHZ_TO_KHZ(center_freq - 10);

		rule->freq_range.end_freq_khz = MHZ_TO_KHZ(center_freq + 10);

		/* this doesn't matter - not used by FW */
		rule->power_rule.max_antenna_gain = DBI_TO_MBI(6);
		rule->power_rule.max_eirp =
			DBM_TO_MBM(IWL_DEFAULT_MAX_TX_POWER);

		rule->flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx,
							  ch_flags, cfg);

		/* rely on auto-calculation to merge BW of contiguous chans */
		rule->flags |= NL80211_RRF_AUTO_BW;
		rule->freq_range.max_bandwidth_khz = 0;

		prev_ch_flags = ch_flags;
		prev_center_freq = center_freq;

		IWL_DEBUG_DEV(dev, IWL_DL_LAR,
			      "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s(0x%02x): Ad-Hoc %ssupported\n",
			      center_freq,
			      band == IEEE80211_BAND_5GHZ ? "5.2" : "2.4",
			      CHECK_AND_PRINT_I(VALID),
			      CHECK_AND_PRINT_I(ACTIVE),
			      CHECK_AND_PRINT_I(RADAR),
			      CHECK_AND_PRINT_I(WIDE),
			      CHECK_AND_PRINT_I(40MHZ),
			      CHECK_AND_PRINT_I(80MHZ),
			      CHECK_AND_PRINT_I(160MHZ),
			      CHECK_AND_PRINT_I(INDOOR_ONLY),
			      CHECK_AND_PRINT_I(GO_CONCURRENT),
			      ch_flags,
			      ((ch_flags & NVM_CHANNEL_ACTIVE) &&
			       !(ch_flags & NVM_CHANNEL_RADAR))
					 ? "" : "not ");
	}

	regd->n_reg_rules = valid_rules;

	/* set alpha2 from FW. */
	regd->alpha2[0] = fw_mcc >> 8;
	regd->alpha2[1] = fw_mcc & 0xff;

	return regd;
}
Beispiel #24
0
static int intel_runtime_suspend(struct device *device)
{
	struct pci_dev *pdev = to_pci_dev(device);
	struct drm_device *dev = pci_get_drvdata(pdev);
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6(dev))))
		return -ENODEV;

	WARN_ON(!HAS_RUNTIME_PM(dev));
	assert_force_wake_inactive(dev_priv);

	DRM_DEBUG_KMS("Suspending device\n");

	/*
	 * We could deadlock here in case another thread holding struct_mutex
	 * calls RPM suspend concurrently, since the RPM suspend will wait
	 * first for this RPM suspend to finish. In this case the concurrent
	 * RPM resume will be followed by its RPM suspend counterpart. Still
	 * for consistency return -EAGAIN, which will reschedule this suspend.
	 */
	if (!mutex_trylock(&dev->struct_mutex)) {
		DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
		/*
		 * Bump the expiration timestamp, otherwise the suspend won't
		 * be rescheduled.
		 */
		pm_runtime_mark_last_busy(device);

		return -EAGAIN;
	}
	/*
	 * We are safe here against re-faults, since the fault handler takes
	 * an RPM reference.
	 */
	i915_gem_release_all_mmaps(dev_priv);
	mutex_unlock(&dev->struct_mutex);

	/*
	 * rps.work can't be rearmed here, since we get here only after making
	 * sure the GPU is idle and the RPS freq is set to the minimum. See
	 * intel_mark_idle().
	 */
	cancel_work_sync(&dev_priv->rps.work);
	intel_runtime_pm_disable_interrupts(dev);

	if (IS_GEN6(dev)) {
		ret = 0;
	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
		ret = hsw_runtime_suspend(dev_priv);
	} else if (IS_VALLEYVIEW(dev)) {
		ret = vlv_runtime_suspend(dev_priv);
	} else {
		ret = -ENODEV;
		WARN_ON(1);
	}

	if (ret) {
		DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret);
		intel_runtime_pm_restore_interrupts(dev);

		return ret;
	}

	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
	dev_priv->pm.suspended = true;

	/*
	 * current versions of firmware which depend on this opregion
	 * notification have repurposed the D1 definition to mean
	 * "runtime suspended" vs. what you would normally expect (D3)
	 * to distinguish it from notifications that might be sent
	 * via the suspend path.
	 */
	intel_opregion_notify_adapter(dev, PCI_D1);

	DRM_DEBUG_KMS("Device suspended\n");
	return 0;
}
static int htc_issue_packets(struct htc_target *target,
			     struct htc_endpoint *ep,
			     struct list_head *pkt_queue)
{
	int status = 0;
	u16 payload_len;
	struct sk_buff *skb;
	struct htc_frame_hdr *htc_hdr;
	struct htc_packet *packet;

	ath6kl_dbg(ATH6KL_DBG_HTC,
		   "%s: queue: 0x%p, pkts %d\n", __func__,
		   pkt_queue, get_queue_depth(pkt_queue));

	while (!list_empty(pkt_queue)) {
		packet = list_first_entry(pkt_queue, struct htc_packet, list);
		list_del(&packet->list);

		skb = packet->skb;
		if (!skb) {
			WARN_ON_ONCE(1);
			status = -EINVAL;
			break;
		}

		payload_len = packet->act_len;

		/* setup HTC frame header */
		htc_hdr = (struct htc_frame_hdr *) skb_push(skb,
							    sizeof(*htc_hdr));
		if (!htc_hdr) {
			WARN_ON_ONCE(1);
			status = -EINVAL;
			break;
		}

		packet->info.tx.flags |= HTC_FLAGS_TX_FIXUP_NETBUF;

		/* Endianess? */
		put_unaligned((u16) payload_len, &htc_hdr->payld_len);
		htc_hdr->flags = packet->info.tx.flags;
		htc_hdr->eid = (u8) packet->endpoint;
		htc_hdr->ctrl[0] = 0;
		htc_hdr->ctrl[1] = (u8) packet->info.tx.seqno;

		spin_lock_bh(&target->tx_lock);

		/* store in look up queue to match completions */
		list_add_tail(&packet->list, &ep->pipe.tx_lookup_queue);
		ep->ep_st.tx_issued += 1;
		spin_unlock_bh(&target->tx_lock);

		status = ath6kl_hif_pipe_send(target->dev->ar,
					      ep->pipe.pipeid_ul, NULL, skb);

		if (status != 0) {
			if (status != -ENOMEM) {
				/* TODO: if more than 1 endpoint maps to the
				 * same PipeID, it is possible to run out of
				 * resources in the HIF layer.
				 * Don't emit the error
				 */
				ath6kl_dbg(ATH6KL_DBG_HTC,
					   "%s: failed status:%d\n",
					   __func__, status);
			}
			spin_lock_bh(&target->tx_lock);
			list_del(&packet->list);

			/* reclaim credits */
			ep->cred_dist.credits += packet->info.tx.cred_used;
			spin_unlock_bh(&target->tx_lock);

			/* put it back into the callers queue */
			list_add(&packet->list, pkt_queue);
			break;
		}

	}

	if (status != 0) {
		while (!list_empty(pkt_queue)) {
			if (status != -ENOMEM) {
				ath6kl_dbg(ATH6KL_DBG_HTC,
					   "%s: failed pkt:0x%p status:%d\n",
					   __func__, packet, status);
			}

			packet = list_first_entry(pkt_queue,
						  struct htc_packet, list);
			list_del(&packet->list);
			packet->status = status;
			send_packet_completion(target, packet);
		}
	}

	return status;
}
Beispiel #26
0
static int copy_user_bh(struct page *to, struct inode *inode,
		struct buffer_head *bh, unsigned long vaddr)
{
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	struct block_device *bdev = bh->b_bdev;
	void *vto;

	if (dax_map_atomic(bdev, &dax) < 0)
		return PTR_ERR(dax.addr);
	vto = kmap_atomic(to);
	copy_user_page(vto, (void __force *)dax.addr, vaddr, to);
	kunmap_atomic(vto);
	dax_unmap_atomic(bdev, &dax);
	return 0;
}

#define NO_SECTOR -1
#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))

static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
		sector_t sector, bool pmd_entry, bool dirty)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	pgoff_t pmd_index = DAX_PMD_INDEX(index);
	int type, error = 0;
	void *entry;

	WARN_ON_ONCE(pmd_entry && !dirty);
	if (dirty)
		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

	spin_lock_irq(&mapping->tree_lock);

	entry = radix_tree_lookup(page_tree, pmd_index);
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
		index = pmd_index;
		goto dirty;
	}

	entry = radix_tree_lookup(page_tree, index);
	if (entry) {
		type = RADIX_DAX_TYPE(entry);
		if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
					type != RADIX_DAX_PMD)) {
			error = -EIO;
			goto unlock;
		}

		if (!pmd_entry || type == RADIX_DAX_PMD)
			goto dirty;

		/*
		 * We only insert dirty PMD entries into the radix tree.  This
		 * means we don't need to worry about removing a dirty PTE
		 * entry and inserting a clean PMD entry, thus reducing the
		 * range we would flush with a follow-up fsync/msync call.
		 */
		radix_tree_delete(&mapping->page_tree, index);
		mapping->nrexceptional--;
	}

	if (sector == NO_SECTOR) {
		/*
		 * This can happen during correct operation if our pfn_mkwrite
		 * fault raced against a hole punch operation.  If this
		 * happens the pte that was hole punched will have been
		 * unmapped and the radix tree entry will have been removed by
		 * the time we are called, but the call will still happen.  We
		 * will return all the way up to wp_pfn_shared(), where the
		 * pte_same() check will fail, eventually causing page fault
		 * to be retried by the CPU.
		 */
		goto unlock;
	}

	error = radix_tree_insert(page_tree, index,
			RADIX_DAX_ENTRY(sector, pmd_entry));
	if (error)
		goto unlock;

	mapping->nrexceptional++;
 dirty:
	if (dirty)
		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return error;
}

static int dax_writeback_one(struct block_device *bdev,
		struct address_space *mapping, pgoff_t index, void *entry)
{
	struct radix_tree_root *page_tree = &mapping->page_tree;
	int type = RADIX_DAX_TYPE(entry);
	struct radix_tree_node *node;
	struct blk_dax_ctl dax;
	void **slot;
	int ret = 0;

	spin_lock_irq(&mapping->tree_lock);
	/*
	 * Regular page slots are stabilized by the page lock even
	 * without the tree itself locked.  These unlocked entries
	 * need verification under the tree lock.
	 */
	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
		goto unlock;
	if (*slot != entry)
		goto unlock;

	/* another fsync thread may have already written back this entry */
	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
		goto unlock;

	if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
		ret = -EIO;
		goto unlock;
	}

	dax.sector = RADIX_DAX_SECTOR(entry);
	dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
	spin_unlock_irq(&mapping->tree_lock);

	/*
	 * We cannot hold tree_lock while calling dax_map_atomic() because it
	 * eventually calls cond_resched().
	 */
	ret = dax_map_atomic(bdev, &dax);
	if (ret < 0)
		return ret;

	if (WARN_ON_ONCE(ret < dax.size)) {
		ret = -EIO;
		goto unmap;
	}

	wb_cache_pmem(dax.addr, dax.size);

	spin_lock_irq(&mapping->tree_lock);
	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
	spin_unlock_irq(&mapping->tree_lock);
 unmap:
	dax_unmap_atomic(bdev, &dax);
	return ret;

 unlock:
	spin_unlock_irq(&mapping->tree_lock);
	return ret;
}

/*
 * Flush the mapping to the persistent domain within the byte range of [start,
 * end]. This is required by data integrity operations to ensure file data is
 * on persistent storage prior to completion of the operation.
 */
int dax_writeback_mapping_range(struct address_space *mapping,
		struct block_device *bdev, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	pgoff_t start_index, end_index, pmd_index;
	pgoff_t indices[PAGEVEC_SIZE];
	struct pagevec pvec;
	bool done = false;
	int i, ret = 0;
	void *entry;

	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
		return -EIO;

	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
		return 0;

	start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
	end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
	pmd_index = DAX_PMD_INDEX(start_index);

	rcu_read_lock();
	entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
	rcu_read_unlock();

	/* see if the start of our range is covered by a PMD entry */
	if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
		start_index = pmd_index;

	tag_pages_for_writeback(mapping, start_index, end_index);

	pagevec_init(&pvec, 0);
	while (!done) {
		pvec.nr = find_get_entries_tag(mapping, start_index,
				PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
				pvec.pages, indices);

		if (pvec.nr == 0)
			break;

		for (i = 0; i < pvec.nr; i++) {
			if (indices[i] > end_index) {
				done = true;
				break;
			}

			ret = dax_writeback_one(bdev, mapping, indices[i],
					pvec.pages[i]);
			if (ret < 0)
				return ret;
		}
	}
	wmb_pmem();
	return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
			struct vm_area_struct *vma, struct vm_fault *vmf)
{
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	struct address_space *mapping = inode->i_mapping;
	struct block_device *bdev = bh->b_bdev;
	struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
	};
	pgoff_t size;
	int error;

	i_mmap_lock_read(mapping);

	/*
	 * Check truncate didn't happen while we were allocating a block.
	 * If it did, this block may or may not be still allocated to the
	 * file.  We can't tell the filesystem to free it because we can't
	 * take i_mutex here.  In the worst case, the file still has blocks
	 * allocated past the end of the file.
	 */
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (unlikely(vmf->pgoff >= size)) {
		error = -EIO;
		goto out;
	}

	if (dax_map_atomic(bdev, &dax) < 0) {
		error = PTR_ERR(dax.addr);
		goto out;
	}

	if (buffer_unwritten(bh) || buffer_new(bh)) {
		clear_pmem(dax.addr, PAGE_SIZE);
		wmb_pmem();
	}
	dax_unmap_atomic(bdev, &dax);

	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
			vmf->flags & FAULT_FLAG_WRITE);
	if (error)
		goto out;

	error = vm_insert_mixed(vma, vaddr, dax.pfn);

 out:
	i_mmap_unlock_read(mapping);

	return error;
}

/**
 * __dax_fault - handle a page fault on a DAX file
 * @vma: The virtual memory area where the fault occurred
 * @vmf: The description of the fault
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @complete_unwritten: The filesystem method used to convert unwritten blocks
 *	to written so the data written to them is exposed. This is required for
 *	required by write faults for filesystems that will return unwritten
 *	extent mappings from @get_block, but it is optional for reads as
 *	dax_insert_mapping() will always zero unwritten blocks. If the fs does
 *	not support unwritten extents, the it should pass NULL.
 *
 * When a page fault occurs, filesystems may call this helper in their
 * fault handler for DAX files. __dax_fault() assumes the caller has done all
 * the necessary locking for the page fault to proceed successfully.
 */
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
			get_block_t get_block, dax_iodone_t complete_unwritten)
{
	struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct page *page;
	struct buffer_head bh;
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
	unsigned blkbits = inode->i_blkbits;
	sector_t block;
	pgoff_t size;
	int error;
	int major = 0;

	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		return VM_FAULT_SIGBUS;

	memset(&bh, 0, sizeof(bh));
	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
	bh.b_bdev = inode->i_sb->s_bdev;
	bh.b_size = PAGE_SIZE;

 repeat:
	page = find_get_page(mapping, vmf->pgoff);
	if (page) {
		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
			page_cache_release(page);
			return VM_FAULT_RETRY;
		}
		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			page_cache_release(page);
			goto repeat;
		}
		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
		if (unlikely(vmf->pgoff >= size)) {
			/*
			 * We have a struct page covering a hole in the file
			 * from a read fault and we've raced with a truncate
			 */
			error = -EIO;
			goto unlock_page;
		}
	}

	error = get_block(inode, block, &bh, 0);
	if (!error && (bh.b_size < PAGE_SIZE))
		error = -EIO;		/* fs corruption? */
	if (error)
		goto unlock_page;

	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
		if (vmf->flags & FAULT_FLAG_WRITE) {
			error = get_block(inode, block, &bh, 1);
			count_vm_event(PGMAJFAULT);
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
			major = VM_FAULT_MAJOR;
			if (!error && (bh.b_size < PAGE_SIZE))
				error = -EIO;
			if (error)
				goto unlock_page;
		} else {
			return dax_load_hole(mapping, page, vmf);
		}
	}

	if (vmf->cow_page) {
		struct page *new_page = vmf->cow_page;
		if (buffer_written(&bh))
			error = copy_user_bh(new_page, inode, &bh, vaddr);
		else
			clear_user_highpage(new_page, vaddr);
		if (error)
			goto unlock_page;
		vmf->page = page;
		if (!page) {
			i_mmap_lock_read(mapping);
			/* Check we didn't race with truncate */
			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
								PAGE_SHIFT;
			if (vmf->pgoff >= size) {
				i_mmap_unlock_read(mapping);
				error = -EIO;
				goto out;
			}
		}
		return VM_FAULT_LOCKED;
	}

	/* Check we didn't race with a read fault installing a new page */
	if (!page && major)
		page = find_lock_page(mapping, vmf->pgoff);

	if (page) {
		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
							PAGE_CACHE_SIZE, 0);
		delete_from_page_cache(page);
		unlock_page(page);
		page_cache_release(page);
		page = NULL;
	}

	/*
	 * If we successfully insert the new mapping over an unwritten extent,
	 * we need to ensure we convert the unwritten extent. If there is an
	 * error inserting the mapping, the filesystem needs to leave it as
	 * unwritten to prevent exposure of the stale underlying data to
	 * userspace, but we still need to call the completion function so
	 * the private resources on the mapping buffer can be released. We
	 * indicate what the callback should do via the uptodate variable, same
	 * as for normal BH based IO completions.
	 */
	error = dax_insert_mapping(inode, &bh, vma, vmf);
	if (buffer_unwritten(&bh)) {
		if (complete_unwritten)
			complete_unwritten(&bh, !error);
		else
			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
	}

 out:
	if (error == -ENOMEM)
		return VM_FAULT_OOM | major;
	/* -EBUSY is fine, somebody else faulted on the same PTE */
	if ((error < 0) && (error != -EBUSY))
		return VM_FAULT_SIGBUS | major;
	return VM_FAULT_NOPAGE | major;

 unlock_page:
	if (page) {
		unlock_page(page);
		page_cache_release(page);
	}
	goto out;
}
Beispiel #27
0
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
	struct task_struct *tsk;
	unsigned long caller;
	int vsyscall_nr, syscall_nr, tmp;
	int prev_sig_on_uaccess_err;
	long ret;
	unsigned long orig_dx;

	/*
	 * No point in checking CS -- the only way to get here is a user mode
	 * trap to a high address, which means that we're in 64-bit user code.
	 */

	WARN_ON_ONCE(address != regs->ip);

	if (vsyscall_mode == NONE) {
		warn_bad_vsyscall(KERN_INFO, regs,
				  "vsyscall attempted with vsyscall=none");
		return false;
	}

	vsyscall_nr = addr_to_vsyscall_nr(address);

	trace_emulate_vsyscall(vsyscall_nr);

	if (vsyscall_nr < 0) {
		warn_bad_vsyscall(KERN_WARNING, regs,
				  "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
		goto sigsegv;
	}

	if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
		warn_bad_vsyscall(KERN_WARNING, regs,
				  "vsyscall with bad stack (exploit attempt?)");
		goto sigsegv;
	}

	tsk = current;

	/*
	 * Check for access_ok violations and find the syscall nr.
	 *
	 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
	 * 64-bit, so we don't need to special-case it here.  For all the
	 * vsyscalls, NULL means "don't write anything" not "write it at
	 * address 0".
	 */
	switch (vsyscall_nr) {
	case 0:
		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
		    !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
			ret = -EFAULT;
			goto check_fault;
		}

		syscall_nr = __NR_gettimeofday;
		break;

	case 1:
		if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
			ret = -EFAULT;
			goto check_fault;
		}

		syscall_nr = __NR_time;
		break;

	case 2:
		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
		    !write_ok_or_segv(regs->si, sizeof(unsigned))) {
			ret = -EFAULT;
			goto check_fault;
		}

		syscall_nr = __NR_getcpu;
		break;
	}

	/*
	 * Handle seccomp.  regs->ip must be the original value.
	 * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst.
	 *
	 * We could optimize the seccomp disabled case, but performance
	 * here doesn't matter.
	 */
	regs->orig_ax = syscall_nr;
	regs->ax = -ENOSYS;
	tmp = secure_computing(NULL);
	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
		warn_bad_vsyscall(KERN_DEBUG, regs,
				  "seccomp tried to change syscall nr or ip");
		do_exit(SIGSYS);
	}
	regs->orig_ax = -1;
	if (tmp)
		goto do_ret;  /* skip requested */

	/*
	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
	 * preserve that behavior to make writing exploits harder.
	 */
	prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
	current->thread.sig_on_uaccess_err = 1;

	ret = -EFAULT;
	switch (vsyscall_nr) {
	case 0:
		/* this decodes regs->di and regs->si on its own */
		ret = __x64_sys_gettimeofday(regs);
		break;

	case 1:
		/* this decodes regs->di on its own */
		ret = __x64_sys_time(regs);
		break;

	case 2:
		/* while we could clobber regs->dx, we didn't in the past... */
		orig_dx = regs->dx;
		regs->dx = 0;
		/* this decodes regs->di, regs->si and regs->dx on its own */
		ret = __x64_sys_getcpu(regs);
		regs->dx = orig_dx;
		break;
	}

	current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;

check_fault:
	if (ret == -EFAULT) {
		/* Bad news -- userspace fed a bad pointer to a vsyscall. */
		warn_bad_vsyscall(KERN_INFO, regs,
				  "vsyscall fault (exploit attempt?)");

		/*
		 * If we failed to generate a signal for any reason,
		 * generate one here.  (This should be impossible.)
		 */
		if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
				 !sigismember(&tsk->pending.signal, SIGSEGV)))
			goto sigsegv;

		return true;  /* Don't emulate the ret. */
	}

	regs->ax = ret;

do_ret:
	/* Emulate a ret instruction. */
	regs->ip = caller;
	regs->sp += 8;
	return true;

sigsegv:
	force_sig(SIGSEGV, current);
	return true;
}
Beispiel #28
0
int msm_irq_enter_sleep2(bool arm9_wake, int from_idle)
{
	int limit = 10;
	uint32_t pending[VIC_NUM_BANKS];
	int i;
	uint32_t any = 0;

	if (from_idle && !arm9_wake)
		return 0;

	/* edge triggered interrupt may get lost if this mode is used */
	WARN_ON_ONCE(!arm9_wake && !from_idle);

	if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP) {
		printk(KERN_INFO "%s: change irq, pend", __func__);
		print_vic_irq_stat();
	}

	for (i = 0; i < VIC_NUM_BANKS; ++i) {
		pending[i] = readl(VIC_IRQ_STATUS(i));
		pending[i] &= msm_irq_shadow_reg[i].int_en[!from_idle];
		/* Clear INT_A9_M2A_5 since requesting sleep triggers it */
		if (i == (INT_A9_M2A_5 / 32))
			pending[i] &= ~(1U << (INT_A9_M2A_5 % 32));
		any |= pending[i];
	}

	if (any) {
		if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP_ABORT) {
			printk(KERN_INFO "%s abort", __func__);
			print_irq_array(pending, VIC_NUM_BANKS);
		}
		return -EAGAIN;
	}

	for (i = 0; i < VIC_NUM_BANKS; ++i)
		writel(0, VIC_INT_EN(i));

	while (limit-- > 0) {
		int pend_irq;
		int irq = readl(VIC_IRQ_VEC_RD);
		if (irq == -1)
			break;
		pend_irq = readl(VIC_IRQ_VEC_PEND_RD);
		if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP_INT)
			printk(KERN_INFO "msm_irq_enter_sleep cleared "
			       "int %d (%d)\n", irq, pend_irq);
	}

	if (arm9_wake) {
		msm_irq_set_type(INT_A9_M2A_6, IRQF_TRIGGER_RISING);
		msm_irq_ack(INT_A9_M2A_6);
		writel(1U << INT_A9_M2A_6, VIC_INT_ENSET(0));
	} else {
		for (i = 0; i < VIC_NUM_BANKS; ++i)
			writel(msm_irq_shadow_reg[i].int_en[1],
			       VIC_INT_ENSET(i));
	}

	return 0;
}
Beispiel #29
0
STATIC ssize_t
xfs_file_read_iter(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	size_t			size = iov_iter_count(to);
	ssize_t			ret = 0;
	int			ioflags = 0;
	xfs_fsize_t		n;
	loff_t			pos = iocb->ki_pos;

	XFS_STATS_INC(xs_read_calls);

	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= XFS_IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= XFS_IO_INVIS;

	if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;
		/* DIO must be aligned to device logical sector size */
		if ((pos | size) & target->bt_logical_sectormask) {
			if (pos == i_size_read(inode))
				return 0;
			return -EINVAL;
		}
	}

	n = mp->m_super->s_maxbytes - pos;
	if (n <= 0 || size == 0)
		return 0;

	if (n < size)
		size = n;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/*
	 * Locking is a bit tricky here. If we take an exclusive lock
	 * for direct IO, we effectively serialise all new concurrent
	 * read IO to this file and block it behind IO that is currently in
	 * progress because IO in progress holds the IO lock shared. We only
	 * need to hold the lock exclusive to blow away the page cache, so
	 * only take lock exclusively if the page cache needs invalidation.
	 * This allows the normal direct IO case of no page cache pages to
	 * proceeed concurrently without serialisation.
	 */
	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);

		if (inode->i_mapping->nrpages) {
			ret = filemap_write_and_wait_range(
							VFS_I(ip)->i_mapping,
							pos, pos + size - 1);
			if (ret) {
				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				return ret;
			}

			/*
			 * Invalidate whole pages. This can return an error if
			 * we fail to invalidate a page, but this should never
			 * happen on XFS. Warn if it does fail.
			 */
			ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
					pos >> PAGE_CACHE_SHIFT,
					(pos + size - 1) >> PAGE_CACHE_SHIFT);
			WARN_ON_ONCE(ret);
			ret = 0;
		}
Beispiel #30
0
struct nat64_session *
nat64_ipv4_tcp_session(struct iphdr *ip4, struct tcphdr *th, int recur)
{
	struct nat64_binding	 bkey;
	struct nat64_session	 skey;
	struct nat64_session	*s;

	memset(&bkey, 0, sizeof(bkey));
	memset(&skey, 0, sizeof(skey));

	skey.s_binding = &bkey;

	bkey.b_proto  = IPPROTO_TCP;
	bkey.b_saddr4.s_addr = recur % 2 ? ip4->saddr : ip4->daddr;
	bkey.b_sport4 = recur % 2 ? th->source : th->dest;
	skey.s_daddr.s_addr  = recur % 2 ? ip4->daddr : ip4->saddr;
	skey.s_dport  = recur % 2 ? th->dest : th->source;

	s = nat64_session_by_ipv4_find(&skey);

	if (recur)
		return s;

	if (!s) {
		if (!th->syn)
			return NULL;

		skey.s_state = nat64_session_state_tcp_v4_syn_rcv;
		if (!(skey.s_binding = nat64_binding_by_ipv4_find(&bkey)))
			/* TODO send ICMP unreachable */
			return NULL;

		if (!(s = nat64_session_create(&skey)))
			return NULL;

		nat64_session_refresh(s, 6);

		return NULL;
	}

	switch (s->s_state) {

	case nat64_session_state_tcp_v4_syn_rcv:
		return s;

	case nat64_session_state_tcp_v6_syn_rcv:
		if (!th->syn)
			return s;

		s->s_state = nat64_session_state_tcp_established;
		nat64_session_refresh(s, 2*60*60 + 4*60);
		return s;

	case nat64_session_state_tcp_established:
		if (th->rst) {
			s->s_state = nat64_session_state_tcp_rst_rcv;
			nat64_session_refresh(s, 4*60);
		} else {
			if (th->fin)
				s->s_state = nat64_session_state_tcp_v4_fin_rcv;
			nat64_session_refresh(s, 2*60*60 + 4*60);
		}
		return s;

	case nat64_session_state_tcp_v4_fin_rcv:
		nat64_session_refresh(s, 2*60*60 + 4*60);
		return s;

	case nat64_session_state_tcp_v6_fin_rcv:
		if (th->fin) {
			s->s_state = nat64_session_state_tcp_v6_v4_fin_rcv;
			nat64_session_refresh(s, 4*60);
		} else
			nat64_session_refresh(s, 2*60*60 + 4*60);
		return s;

	case nat64_session_state_tcp_v6_v4_fin_rcv:
		return s;

	case nat64_session_state_tcp_rst_rcv:
		s->s_state = nat64_session_state_tcp_established;
		nat64_session_refresh(s, 2*60*60 + 4*60);
		return s;

	default:
		WARN_ON_ONCE(1);
	}

	return NULL;  /* DROP */
}