Example #1
0
void blkfront_sync(struct blkfront_dev *dev)
{
    unsigned long flags;
    DEFINE_WAIT(w);

    if (dev->info.mode == O_RDWR) {
        if (dev->info.barrier == 1)
            blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);

        if (dev->info.flush == 1)
            blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
    }

    /* Note: This won't finish if another thread enqueues requests.  */
    local_irq_save(flags);
    while (1) {
	blkfront_aio_poll(dev);
	if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
	    break;

	add_waiter(w, blkfront_queue);
	local_irq_restore(flags);
	schedule();
	local_irq_save(flags);
    }
    remove_waiter(w, blkfront_queue);
    local_irq_restore(flags);
}
Example #2
0
/**
 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
 * @lock:		the rt_mutex we were woken on
 * @to:			the timeout, null if none. hrtimer should already have
 * 			been started.
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Complete the lock acquisition started our behalf by another thread.
 *
 * Returns:
 *  0 - success
 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
 *
 * Special API call for PI-futex requeue support
 */
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
			       struct hrtimer_sleeper *to,
			       struct rt_mutex_waiter *waiter,
			       int detect_deadlock)
{
	int ret;

	raw_spin_lock(&lock->wait_lock);

	set_current_state(TASK_INTERRUPTIBLE);

	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);

	set_current_state(TASK_RUNNING);

	if (unlikely(ret))
		remove_waiter(lock, waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
	 * have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	raw_spin_unlock(&lock->wait_lock);

	return ret;
}
Example #3
0
int avs_condvar_wait(avs_condvar_t *condvar,
                     avs_mutex_t *mutex,
                     avs_time_monotonic_t deadline) {
    // Precondition: mutex is locked by the current thread
    // although we can't check if it's the current thread that locked it :(
    AVS_ASSERT(atomic_flag_test_and_set(&mutex->locked),
               "attempted to use a condition variable with an unlocked mutex");

    bool use_deadline = avs_time_monotonic_valid(deadline);
    bool flag_value;
    condvar_waiter_node_t waiter;
    insert_new_waiter(condvar, &waiter);

    avs_mutex_unlock(mutex);
    do {
        flag_value = atomic_flag_test_and_set(&waiter.waiting);
    } while (flag_value
            && (!use_deadline
                    || avs_time_monotonic_before(avs_time_monotonic_now(),
                                                 deadline)));
    avs_mutex_lock(mutex);

    remove_waiter(condvar, &waiter);

    // flag_value == 0 -> it means it was cleared, so we've been woken up
    // flag_value == 1 -> it mean we haven't, so timeout occurred
    return flag_value ? AVS_CONDVAR_TIMEOUT : 0;
}
Example #4
0
/**
 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
 * @lock:		the rt_mutex to take
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @task:		the task to prepare
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Returns:
 *  0 - task blocked on lock
 *  1 - acquired the lock for task, caller should wake it up
 * <0 - error
 *
 * Special API call for FUTEX_REQUEUE_PI support.
 */
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
			      struct rt_mutex_waiter *waiter,
			      struct task_struct *task, int detect_deadlock)
{
	int ret;

	raw_spin_lock(&lock->wait_lock);

	if (try_to_take_rt_mutex(lock, task, NULL)) {
		raw_spin_unlock(&lock->wait_lock);
		return 1;
	}

	ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);

	if (ret && !rt_mutex_owner(lock)) {
		/*
		 * Reset the return value. We might have
		 * returned with -EDEADLK and the owner
		 * released the lock while we were walking the
		 * pi chain.  Let the waiter sort it out.
		 */
		ret = 0;
	}

	if (unlikely(ret))
		remove_waiter(lock, waiter);

	raw_spin_unlock(&lock->wait_lock);

	debug_rt_mutex_print_deadlock(waiter);

	return ret;
}
Example #5
0
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	struct rt_mutex_waiter waiter;
	int ret = 0;

	debug_rt_mutex_init_waiter(&waiter);
	waiter.task = NULL;

	raw_spin_lock(&lock->wait_lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock)) {
		raw_spin_unlock(&lock->wait_lock);
		return 0;
	}

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout)) {
		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
		if (!hrtimer_active(&timeout->timer))
			timeout->task = NULL;
	}

	ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
				  detect_deadlock);

	set_current_state(TASK_RUNNING);

	if (unlikely(waiter.task))
		remove_waiter(lock, &waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	raw_spin_unlock(&lock->wait_lock);

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	/*
	 * Readjust priority, when we did not get the lock. We might
	 * have been the pending owner and boosted. Since we did not
	 * take the lock, the PI boost has to go.
	 */
	if (unlikely(ret))
		rt_mutex_adjust_prio(current);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}
Example #6
0
bool
EventSync::wait( UInt32 msTimeout ) const  // nofail
{
  EventWaiter waiter;
  add_waiter(&waiter);

  // check the state
  if (!get_state()) {
    pthread_mutex_lock(&waiter.lock);
    struct timespec timeout;
    struct timeval tv;
    gettimeofday(&tv, NULL);
    // convert ms to s and ns
    UInt32 s = msTimeout / 1000;
    msTimeout = msTimeout % 1000;
    UInt32 ns = msTimeout * 1000000;
    // convert timeval to timespec
    timeout.tv_nsec = tv.tv_usec * 1000;
    timeout.tv_sec = tv.tv_sec;

    // add the time
    timeout.tv_nsec += (suseconds_t)ns;
    timeout.tv_sec += (time_t)s;
    // shift the nsec to sec if overflow
    if (timeout.tv_nsec > 1000000000) {
      timeout.tv_sec ++;
      timeout.tv_nsec -= 1000000000;
    }
    pthread_cond_timedwait(&waiter.condvar, &waiter.lock, &timeout);
    pthread_mutex_unlock(&waiter.lock);
  }
  remove_waiter(&waiter);
  return get_state();
}
Example #7
0
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	struct rt_mutex_waiter waiter;
	int ret = 0;

	debug_rt_mutex_init_waiter(&waiter);
	RB_CLEAR_NODE(&waiter.pi_tree_entry);
	RB_CLEAR_NODE(&waiter.tree_entry);

	raw_spin_lock(&lock->wait_lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock, current, NULL)) {
		raw_spin_unlock(&lock->wait_lock);
		return 0;
	}

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout)) {
		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
		if (!hrtimer_active(&timeout->timer))
			timeout->task = NULL;
	}

	ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);

	if (likely(!ret))
		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);

	set_current_state(TASK_RUNNING);

	if (unlikely(ret))
		remove_waiter(lock, &waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	raw_spin_unlock(&lock->wait_lock);

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}
Example #8
0
static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event *event)
{
    struct xenfb_page *page = dev->page;
    uint32_t prod;
    DEFINE_WAIT(w);

    add_waiter(w, fbfront_queue);
    while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN)
        schedule();
    remove_waiter(w, fbfront_queue);

    prod = page->out_prod;
    mb(); /* ensure ring space available */
    XENFB_OUT_RING_REF(page, prod) = *event;
    wmb(); /* ensure ring contents visible */
    page->out_prod = prod + 1;
    notify_remote_via_evtchn(dev->evtchn);
}
Example #9
0
static void blkfront_wait_slot(struct blkfront_dev *dev)
{
    /* Wait for a slot */
    if (RING_FULL(&dev->ring)) {
	unsigned long flags;
	DEFINE_WAIT(w);
	local_irq_save(flags);
	while (1) {
	    blkfront_aio_poll(dev);
	    if (!RING_FULL(&dev->ring))
		break;
	    /* Really no slot, go to sleep. */
	    add_waiter(w, blkfront_queue);
	    local_irq_restore(flags);
	    schedule();
	    local_irq_save(flags);
	}
	remove_waiter(w, blkfront_queue);
	local_irq_restore(flags);
    }
}
Example #10
0
/**
 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
 * @lock:		the rt_mutex we were woken on
 * @to:			the timeout, null if none. hrtimer should already have
 * 			been started.
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Complete the lock acquisition started our behalf by another thread.
 *
 * Returns:
 *  0 - success
 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
 *
 * Special API call for PI-futex requeue support
 */
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
			       struct hrtimer_sleeper *to,
			       struct rt_mutex_waiter *waiter,
			       int detect_deadlock)
{
	int ret;

	raw_spin_lock(&lock->wait_lock);

	set_current_state(TASK_INTERRUPTIBLE);

	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
				  detect_deadlock);

	set_current_state(TASK_RUNNING);

	if (unlikely(waiter->task))
		remove_waiter(lock, waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
	 * have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	raw_spin_unlock(&lock->wait_lock);

	/*
	 * Readjust priority, when we did not get the lock. We might have been
	 * the pending owner and boosted. Since we did not take the lock, the
	 * PI boost has to go.
	 */
	if (unlikely(ret))
		rt_mutex_adjust_prio(current);

	return ret;
}
Example #11
0
/* Send a mesasge to xenbus, in the same fashion as xb_write, and
   block waiting for a reply.  The reply is malloced and should be
   freed by the caller. */
struct xsd_sockmsg *
xenbus_msg_reply(int type,
		 xenbus_transaction_t trans,
		 struct write_req *io,
		 int nr_reqs)
{
    int id;
    DEFINE_WAIT(w);
    struct xsd_sockmsg *rep;

    id = allocate_xenbus_id();
    add_waiter(w, req_info[id].waitq);

    xb_write(type, id, trans, io, nr_reqs);

    schedule();
    remove_waiter(w, req_info[id].waitq);
    wake(current);

    rep = req_info[id].reply;
    BUG_ON(rep->req_id != id);
    release_xenbus_id(id);
    return rep;
}
Example #12
0
void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
{
    unsigned long flags;
    DEFINE_WAIT(w);

    ASSERT(!aiocbp->aio_cb);
    aiocbp->aio_cb = blkfront_aio_cb;
    blkfront_aio(aiocbp, write);
    aiocbp->data = NULL;

    local_irq_save(flags);
    while (1) {
	blkfront_aio_poll(aiocbp->aio_dev);
	if (aiocbp->data)
	    break;

	add_waiter(w, blkfront_queue);
	local_irq_restore(flags);
	schedule();
	local_irq_save(flags);
    }
    remove_waiter(w, blkfront_queue);
    local_irq_restore(flags);
}
Example #13
0
int read(int fd, void *buf, size_t nbytes)
{
    if (fd < 0 || fd >= NOFILE) {
	fd = EBADF;
	return -1;
    }
    switch (files[fd].type) {
        case FTYPE_SAVEFILE:
	case FTYPE_CONSOLE: {
	    int ret;
            DEFINE_WAIT(w);
            while(1) {
                add_waiter(w, console_queue);
                ret = xencons_ring_recv(files[fd].cons.dev, buf, nbytes);
                if (ret)
                    break;
                schedule();
            }
            remove_waiter(w);
            return ret;
        }
#ifdef HAVE_LWIP
	case FTYPE_SOCKET:
	    return lwip_read(files[fd].socket.fd, buf, nbytes);
#endif
	case FTYPE_TAP: {
	    ssize_t ret;
	    ret = netfront_receive(files[fd].tap.dev, buf, nbytes);
	    if (ret <= 0) {
		errno = EAGAIN;
		return -1;
	    }
	    return ret;
	}
        case FTYPE_KBD: {
            int ret, n;
            n = nbytes / sizeof(union xenkbd_in_event);
            ret = kbdfront_receive(files[fd].kbd.dev, buf, n);
	    if (ret <= 0) {
		errno = EAGAIN;
		return -1;
	    }
	    return ret * sizeof(union xenkbd_in_event);
        }
        case FTYPE_FB: {
            int ret, n;
            n = nbytes / sizeof(union xenfb_in_event);
            ret = fbfront_receive(files[fd].fb.dev, buf, n);
	    if (ret <= 0) {
		errno = EAGAIN;
		return -1;
	    }
	    return ret * sizeof(union xenfb_in_event);
        }
	case FTYPE_COMPILED_FILE: {
	    int n;
	    if (files[fd].compiled_file.offset >= files[fd].compiled_file.size)
		    n = 0;
	    else
		    n = files[fd].compiled_file.size - files[fd].compiled_file.offset;
	    if (n >= nbytes)
		    n = nbytes;
	    printf("Request %d on %d, get %d\n", nbytes, fd, n);
	    memcpy(buf, files[fd].compiled_file.content + files[fd].compiled_file.offset, n);
	    files[fd].compiled_file.offset += n;
	    return n;
	}
	default:
	    break;
    }
    printk("read(%d): Bad descriptor\n", fd);
    errno = EBADF;
    return -1;
}
Example #14
0
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	struct rt_mutex_waiter waiter;
	int ret = 0;

	debug_rt_mutex_init_waiter(&waiter);
	waiter.task = NULL;

	spin_lock(&lock->wait_lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock)) {
		spin_unlock(&lock->wait_lock);
		return 0;
	}

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout))
		hrtimer_start(&timeout->timer, timeout->timer.expires,
			      HRTIMER_MODE_ABS);

	for (;;) {
		/* Try to acquire the lock: */
		if (try_to_take_rt_mutex(lock))
			break;

		/*
		 * TASK_INTERRUPTIBLE checks for signals and
		 * timeout. Ignored otherwise.
		 */
		if (unlikely(state == TASK_INTERRUPTIBLE)) {
			/* Signal pending? */
			if (signal_pending(current))
				ret = -EINTR;
			if (timeout && !timeout->task)
				ret = -ETIMEDOUT;
			if (ret)
				break;
		}

		/*
		 * waiter.task is NULL the first time we come here and
		 * when we have been woken up by the previous owner
		 * but the lock got stolen by a higher prio task.
		 */
		if (!waiter.task) {
			ret = task_blocks_on_rt_mutex(lock, &waiter,
						      detect_deadlock);
			/*
			 * If we got woken up by the owner then start loop
			 * all over without going into schedule to try
			 * to get the lock now:
			 */
			if (unlikely(!waiter.task)) {
				/*
				 * Reset the return value. We might
				 * have returned with -EDEADLK and the
				 * owner released the lock while we
				 * were walking the pi chain.
				 */
				ret = 0;
				continue;
			}
			if (unlikely(ret))
				break;
		}

		spin_unlock(&lock->wait_lock);

		debug_rt_mutex_print_deadlock(&waiter);

		if (waiter.task)
			schedule_rt_mutex(lock);

		spin_lock(&lock->wait_lock);
		set_current_state(state);
	}

	set_current_state(TASK_RUNNING);

	if (unlikely(waiter.task))
		remove_waiter(lock, &waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	spin_unlock(&lock->wait_lock);

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	/*
	 * Readjust priority, when we did not get the lock. We might
	 * have been the pending owner and boosted. Since we did not
	 * take the lock, the PI boost has to go.
	 */
	if (unlikely(ret))
		rt_mutex_adjust_prio(current);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}
Example #15
0
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	int ret = 0, saved_lock_depth = -1;
	struct rt_mutex_waiter waiter;
	unsigned long flags;

	debug_rt_mutex_init_waiter(&waiter);

	raw_spin_lock_irqsave(&lock->wait_lock, flags);
	init_lists(lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock, current, NULL)) {
		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
		return 0;
	}

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout)) {
		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
		if (!hrtimer_active(&timeout->timer))
			timeout->task = NULL;
	}

	ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock, flags, 0);

	/*
	 * We drop the BKL here before we go into the wait loop to avoid a
	 * possible deadlock in the scheduler.
	 *
	 * Note: This must be done after we call task_blocks_on_rt_mutex
	 *  because rt_release_bkl() releases the wait_lock and will
	 *  cause a race between setting the mark waiters flag in
	 *  the owner field and adding this task to the wait list. Those
	 *  two must be done within the protection of the wait_lock.
	 */
	if (unlikely(current->lock_depth >= 0))
		saved_lock_depth = rt_release_bkl(lock, flags);

	if (likely(!ret))
		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, flags);

	set_current_state(TASK_RUNNING);

	if (unlikely(ret))
		remove_waiter(lock, &waiter, flags);
	BUG_ON(!plist_node_empty(&waiter.list_entry));

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	/* Must we reaquire the BKL? */
	if (unlikely(saved_lock_depth >= 0))
		rt_reacquire_bkl(saved_lock_depth);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}
Example #16
0
/**
 * Transmit function for pbufs which can handle checksum and segmentation offloading for TCPv4 and TCPv6
 */
err_t netfront_xmit_pbuf(struct netfront_dev *dev, struct pbuf *p, int co_type, int push)
{
	struct netif_tx_request *first_tx;
	struct netif_extra_info *gso;
	int slots;
	int used = 0;
#ifdef CONFIG_NETFRONT_GSO
	int sego;
#endif /* CONFIG_NETFRONT_GSO */
#ifdef CONFIG_NETFRONT_WAITFORTX
	unsigned long flags;
	DEFINE_WAIT(w);
#endif /* CONFIG_NETFRONT_WAITFORTX */

	/* Counts how many slots we require for this buf */
	slots = netfront_count_pbuf_slots(dev, p);
#ifdef CONFIG_NETFRONT_GSO
#if TCP_GSO /* GSO flag is only available if lwIP is built with GSO support */
	sego = (p->flags & PBUF_FLAG_GSO) ? 1 : 0;
#else
	sego = 0;
#endif
	/* GSO requires checksum offloading set */
	BUG_ON(sego && !(co_type & (XEN_NETIF_GSO_TYPE_TCPV4 | XEN_NETIF_GSO_TYPE_TCPV6)));
#endif /* CONFIG_NETFRONT_GSO */

	/* Checks if there are enough requests for this many slots (gso requires one slot more) */
#ifdef CONFIG_NETFRONT_GSO
	BUG_ON(!netfront_tx_possible(dev, slots + sego));
#else
	BUG_ON(!netfront_tx_possible(dev, slots));
#endif /* CONFIG_NETFRONT_GSO */

#ifdef CONFIG_NETFRONT_WAITFORTX
	local_irq_save(flags);
#endif /* CONFIG_NETFRONT_WAITFORTX */
#ifdef CONFIG_NETFRONT_GSO
	if (unlikely(!netfront_tx_available(dev, slots + sego))) {
#else
	if (unlikely(!netfront_tx_available(dev, slots))) {
#endif /* CONFIG_NETFRONT_GSO */
		netfront_xmit_push(dev);
#ifdef CONFIG_NETFRONT_WAITFORTX
 try_again:
#ifdef CONFIG_NETFRONT_GSO
		if (!netfront_tx_available(dev, slots + sego)) {
#else
		if (!netfront_tx_available(dev, slots)) {
#endif /* CONFIG_NETFRONT_GSO */
#ifndef CONFIG_NETFRONT_WAITFORTX_BUSYLOOP
			add_waiter(w, netfront_txqueue); /* release thread until space is free'd */
			local_irq_restore(flags);
			schedule();
			local_irq_save(flags);
#endif /* !CONFIG_NETFRONT_WAITFORTX_BUSYLOOP */
			netfront_tx_buf_gc(dev);
			goto try_again;
		}
		remove_waiter(w, netfront_txqueue); /* release thread until space is free'd */
#else
		return ERR_MEM;
#endif /* CONFIG_NETFRONT_WAITFORTX */
	}
#ifdef CONFIG_NETFRONT_WAITFORTX
	local_irq_restore(flags);
#endif /* CONFIG_NETFRONT_WAITFORTX */

	/* Set extras if packet is GSO kind */
	first_tx = netfront_get_page(dev);
	ASSERT(first_tx != NULL);
#if defined CONFIG_NETFRONT_GSO && TCP_GSO
	if (sego) {
		gso = (struct netif_extra_info *) RING_GET_REQUEST(&dev->tx, dev->tx.req_prod_pvt++);

		first_tx->flags |= NETTXF_extra_info;
		gso->u.gso.size = p->gso_size; /* segmentation size */
		gso->u.gso.type = co_type; /* XEN_NETIF_GSO_TYPE_TCPV4, XEN_NETIF_GSO_TYPE_TCPV6 */
		gso->u.gso.pad = 0;
		gso->u.gso.features = 0;

		gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
		gso->flags = 0;

		used++;
	}
#endif /* CONFIG_NETFRONT_GSO */

	/* Make TX requests for the pbuf */
#ifdef CONFIG_NETFRONT_PERSISTENT_GRANTS
	netfront_make_txreqs_pgnt(dev, first_tx, p, &used);
#else
	netfront_make_txreqs(dev, first_tx, p, &used);
#endif
	ASSERT(slots >= used); /* we should have taken at most the number slots that we estimated before */
	ASSERT(slots <= XEN_NETIF_NR_SLOTS_MIN); /* we should never take more slots than the backend supports */

	/* partially checksummed (offload enabled), or checksummed */
	first_tx->flags |= co_type ? ((NETTXF_csum_blank) | (NETTXF_data_validated)) : (NETTXF_data_validated);

	push |= (((dev)->tx.req_prod_pvt - (dev)->tx.rsp_cons) <= NET_TX_RING_SIZE / 2);
	if (push)
		netfront_xmit_push(dev);

#ifdef CONFIG_NETFRONT_STATS
	++dev->txpkts;
	dev->txbytes += p->tot_len;
#endif
	dprintk("tx: %c%c%c %u bytes (%u slots)\n", sego ? 'S' : '-', co_type ? 'C' : '-', push ? 'P' : '-', p->tot_len, slots);
	return ERR_OK;
}

void netfront_xmit_push(struct netfront_dev *dev)
{
	int flags;

	netfront_xmit_notify(dev);

	/* Collects any outstanding responses for more requests */
	local_irq_save(flags);
	netfront_tx_buf_gc(dev);
	local_irq_restore(flags);
}

void netfront_set_rx_pbuf_handler(struct netfront_dev *dev,
				  void (*thenetif_rx)(struct pbuf *p, void *arg),
				  void *arg)
{
	if (dev->netif_rx_pbuf && dev->netif_rx_pbuf != netif_rx_pbuf)
		printk("Replacing netif_rx_pbuf handler for dev %s\n", dev->nodename);

	dev->netif_rx = NULL;
	dev->netif_rx_pbuf = thenetif_rx;
	dev->netif_rx_arg = arg;
}
#endif

static void free_netfront(struct netfront_dev *dev)
{
	int i;
	int separate_tx_rx_irq = (dev->tx_evtchn != dev->rx_evtchn);

	free(dev->mac);
	free(dev->backend);

#ifdef CONFIG_NETMAP
	if (dev->netmap)
		return;
#endif

	for(i=0; i<NET_TX_RING_SIZE; i++)
		down(&dev->tx_sem);

	mask_evtchn(dev->tx_evtchn);
	if (separate_tx_rx_irq)
		mask_evtchn(dev->rx_evtchn);

	gnttab_end_access(dev->rx_ring_ref);
	gnttab_end_access(dev->tx_ring_ref);

	free_page(dev->rx.sring);
	free_page(dev->tx.sring);

	unbind_evtchn(dev->tx_evtchn);
	if (separate_tx_rx_irq)
		unbind_evtchn(dev->rx_evtchn);

#ifdef CONFIG_NETFRONT_PERSISTENT_GRANTS
	for(i=0; i<NET_RX_RING_SIZE; i++) {
		if (dev->rx_buffers[i].page) {
			gnttab_end_access(dev->rx_buffers[i].gref);
			free_page(dev->rx_buffers[i].page);
		}
	}
#else
	for(i=0; i<NET_RX_BUFFERS; i++) {
		if (dev->rx_buffer_pool[i].page) {
			if (dev->rx_buffer_pool[i].gref != GRANT_INVALID_REF)
				gnttab_end_access(dev->rx_buffer_pool[i].gref);
			free_page(dev->rx_buffer_pool[i].page);
		}
	}
#endif

#if defined CONFIG_NETFRONT_PERSISTENT_GRANTS || !defined CONFIG_NETFRONT_LWIP_ONLY
	for(i=0; i<NET_TX_RING_SIZE; i++) {
		if (dev->tx_buffers[i].page) {
#ifndef CONFIG_NETFRONT_PERSISTENT_GRANTS
			if (dev->tx_buffers[i].gref != GRANT_INVALID_REF)
#endif
			gnttab_end_access(dev->tx_buffers[i].gref);
			free_page(dev->tx_buffers[i].page);
		}
	}
#endif
}
Example #17
0
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	int ret = 0, saved_lock_depth = -1;
	struct rt_mutex_waiter waiter;
	unsigned long flags;

	debug_rt_mutex_init_waiter(&waiter);
	waiter.task = NULL;

	spin_lock_irqsave(&lock->wait_lock, flags);
	init_lists(lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock)) {
		spin_unlock_irqrestore(&lock->wait_lock, flags);
		return 0;
	}

	/*
	 * We drop the BKL here before we go into the wait loop to avoid a
	 * possible deadlock in the scheduler.
	 */
	if (unlikely(current->lock_depth >= 0))
		saved_lock_depth = rt_release_bkl(lock, flags);

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout))
		hrtimer_start(&timeout->timer, timeout->timer.expires,
			      HRTIMER_MODE_ABS);

	for (;;) {
		unsigned long saved_flags;

		/* Try to acquire the lock: */
		if (try_to_take_rt_mutex(lock))
			break;

		/*
		 * TASK_INTERRUPTIBLE checks for signals and
		 * timeout. Ignored otherwise.
		 */
		if (unlikely(state == TASK_INTERRUPTIBLE)) {
			/* Signal pending? */
			if (signal_pending(current))
				ret = -EINTR;
			if (timeout && !timeout->task)
				ret = -ETIMEDOUT;
			if (ret)
				break;
		}

		/*
		 * waiter.task is NULL the first time we come here and
		 * when we have been woken up by the previous owner
		 * but the lock got stolen by a higher prio task.
		 */
		if (!waiter.task) {
			ret = task_blocks_on_rt_mutex(lock, &waiter,
						      detect_deadlock, flags);
			/*
			 * If we got woken up by the owner then start loop
			 * all over without going into schedule to try
			 * to get the lock now:
			 */
			if (unlikely(!waiter.task))
				continue;

			if (unlikely(ret))
				break;
		}
		saved_flags = current->flags & PF_NOSCHED;
		current->flags &= ~PF_NOSCHED;

		spin_unlock_irq(&lock->wait_lock);

		debug_rt_mutex_print_deadlock(&waiter);

		if (waiter.task)
			schedule_rt_mutex(lock);

		spin_lock_irq(&lock->wait_lock);

		current->flags |= saved_flags;
		set_current_state(state);
	}

	set_current_state(TASK_RUNNING);

	if (unlikely(waiter.task))
		remove_waiter(lock, &waiter, flags);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

	spin_unlock_irqrestore(&lock->wait_lock, flags);

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	/*
	 * Readjust priority, when we did not get the lock. We might
	 * have been the pending owner and boosted. Since we did not
	 * take the lock, the PI boost has to go.
	 */
	if (unlikely(ret))
		rt_mutex_adjust_prio(current);

	/* Must we reaquire the BKL? */
	if (unlikely(saved_lock_depth >= 0))
		rt_reacquire_bkl(saved_lock_depth);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}
Example #18
0
/*
 * Slow path lock function spin_lock style: this variant is very
 * careful not to miss any non-lock wakeups.
 *
 * The wakeup side uses wake_up_process_mutex, which, combined with
 * the xchg code of this function is a transparent sleep/wakeup
 * mechanism nested within any existing sleep/wakeup mechanism. This
 * enables the seemless use of arbitrary (blocking) spinlocks within
 * sleep/wakeup event loops.
 */
static void fastcall noinline __sched
rt_spin_lock_slowlock(struct rt_mutex *lock)
{
	struct rt_mutex_waiter waiter;
	unsigned long saved_state, state, flags;

	debug_rt_mutex_init_waiter(&waiter);
	waiter.task = NULL;

	spin_lock_irqsave(&lock->wait_lock, flags);
	init_lists(lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock)) {
		spin_unlock_irqrestore(&lock->wait_lock, flags);
		return;
	}

	BUG_ON(rt_mutex_owner(lock) == current);

	/*
	 * Here we save whatever state the task was in originally,
	 * we'll restore it at the end of the function and we'll take
	 * any intermediate wakeup into account as well, independently
	 * of the lock sleep/wakeup mechanism. When we get a real
	 * wakeup the task->state is TASK_RUNNING and we change
	 * saved_state accordingly. If we did not get a real wakeup
	 * then we return with the saved state.
	 */
	saved_state = xchg(&current->state, TASK_UNINTERRUPTIBLE);

	for (;;) {
		unsigned long saved_flags;
		int saved_lock_depth = current->lock_depth;

		/* Try to acquire the lock */
		if (try_to_take_rt_mutex(lock))
			break;
		/*
		 * waiter.task is NULL the first time we come here and
		 * when we have been woken up by the previous owner
		 * but the lock got stolen by an higher prio task.
		 */
		if (!waiter.task) {
			task_blocks_on_rt_mutex(lock, &waiter, 0, flags);
			/* Wakeup during boost ? */
			if (unlikely(!waiter.task))
				continue;
		}

		/*
		 * Prevent schedule() to drop BKL, while waiting for
		 * the lock ! We restore lock_depth when we come back.
		 */
		saved_flags = current->flags & PF_NOSCHED;
		current->lock_depth = -1;
		current->flags &= ~PF_NOSCHED;
		spin_unlock_irqrestore(&lock->wait_lock, flags);

		debug_rt_mutex_print_deadlock(&waiter);

		schedule_rt_mutex(lock);

		spin_lock_irqsave(&lock->wait_lock, flags);
		current->flags |= saved_flags;
		current->lock_depth = saved_lock_depth;
		state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
		if (unlikely(state == TASK_RUNNING))
			saved_state = TASK_RUNNING;
	}

	state = xchg(&current->state, saved_state);
	if (unlikely(state == TASK_RUNNING))
		current->state = TASK_RUNNING;

	/*
	 * Extremely rare case, if we got woken up by a non-mutex wakeup,
	 * and we managed to steal the lock despite us not being the
	 * highest-prio waiter (due to SCHED_OTHER changing prio), then we
	 * can end up with a non-NULL waiter.task:
	 */
	if (unlikely(waiter.task))
		remove_waiter(lock, &waiter, flags);
	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up:
	 */
	fixup_rt_mutex_waiters(lock);

	spin_unlock_irqrestore(&lock->wait_lock, flags);

	debug_rt_mutex_free_waiter(&waiter);
}
Example #19
0
int read(int fd, void *buf, size_t nbytes)
{
    switch (files[fd].type) {
        case FTYPE_SAVEFILE:
	case FTYPE_CONSOLE: {
	    int ret;
            DEFINE_WAIT(w);
            while(1) {
                add_waiter(w, console_queue);
                ret = xencons_ring_recv(files[fd].cons.dev, buf, nbytes);
                if (ret)
                    break;
                schedule();
            }
            remove_waiter(w, console_queue);
            return ret;
        }
#ifdef HAVE_LWIP
	case FTYPE_SOCKET:
	    return lwip_read(files[fd].socket.fd, buf, nbytes);
#endif
#ifdef CONFIG_NETFRONT
	case FTYPE_TAP: {
	    ssize_t ret;
	    ret = netfront_receive(files[fd].tap.dev, buf, nbytes);
	    if (ret <= 0) {
		errno = EAGAIN;
		return -1;
	    }
	    return ret;
	}
#endif
#ifdef CONFIG_KBDFRONT
        case FTYPE_KBD: {
            int ret, n;
            n = nbytes / sizeof(union xenkbd_in_event);
            ret = kbdfront_receive(files[fd].kbd.dev, buf, n);
	    if (ret <= 0) {
		errno = EAGAIN;
		return -1;
	    }
	    return ret * sizeof(union xenkbd_in_event);
        }
#endif
#ifdef CONFIG_FBFRONT
        case FTYPE_FB: {
            int ret, n;
            n = nbytes / sizeof(union xenfb_in_event);
            ret = fbfront_receive(files[fd].fb.dev, buf, n);
	    if (ret <= 0) {
		errno = EAGAIN;
		return -1;
	    }
	    return ret * sizeof(union xenfb_in_event);
        }
#endif
#ifdef CONFIG_BLKFRONT
        case FTYPE_BLK: {
	    return blkfront_posix_read(fd, buf, nbytes);
        }
#endif
#ifdef CONFIG_TPMFRONT
        case FTYPE_TPMFRONT: {
	    return tpmfront_posix_read(fd, buf, nbytes);
        }
#endif
#ifdef CONFIG_TPM_TIS
        case FTYPE_TPM_TIS: {
	    return tpm_tis_posix_read(fd, buf, nbytes);
        }
#endif
	default:
	    break;
    }
    printk("read(%d): Bad descriptor\n", fd);
    errno = EBADF;
    return -1;
}
Example #20
0
/* Just poll without blocking */
static int select_poll(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds)
{
    int i, n = 0;
#ifdef HAVE_LWIP
    int sock_n = 0, sock_nfds = 0;
    fd_set sock_readfds, sock_writefds, sock_exceptfds;
    struct timeval timeout = { .tv_sec = 0, .tv_usec = 0};
#endif

#ifdef LIBC_VERBOSE
    static int nb;
    static int nbread[NOFILE], nbwrite[NOFILE], nbexcept[NOFILE];
    static s_time_t lastshown;

    nb++;
#endif

#ifdef HAVE_LWIP
    /* first poll network */
    FD_ZERO(&sock_readfds);
    FD_ZERO(&sock_writefds);
    FD_ZERO(&sock_exceptfds);
    for (i = 0; i < nfds; i++) {
	if (files[i].type == FTYPE_SOCKET) {
	    if (FD_ISSET(i, readfds)) {
		FD_SET(files[i].socket.fd, &sock_readfds);
		sock_nfds = i+1;
	    }
	    if (FD_ISSET(i, writefds)) {
		FD_SET(files[i].socket.fd, &sock_writefds);
		sock_nfds = i+1;
	    }
	    if (FD_ISSET(i, exceptfds)) {
		FD_SET(files[i].socket.fd, &sock_exceptfds);
		sock_nfds = i+1;
	    }
	}
    }
    if (sock_nfds > 0) {
        DEBUG("lwip_select(");
        dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout);
        DEBUG("); -> ");
        sock_n = lwip_select(sock_nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout);
        dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout);
        DEBUG("\n");
    }
#endif

    /* Then see others as well. */
    for (i = 0; i < nfds; i++) {
	switch(files[i].type) {
	default:
	    if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, exceptfds))
		printk("bogus fd %d in select\n", i);
	    /* Fallthrough.  */
	case FTYPE_CONSOLE:
	    if (FD_ISSET(i, readfds)) {
                if (xencons_ring_avail(files[i].cons.dev))
		    n++;
		else
		    FD_CLR(i, readfds);
            }
	    if (FD_ISSET(i, writefds))
                n++;
	    FD_CLR(i, exceptfds);
	    break;
#ifdef CONFIG_XENBUS
	case FTYPE_XENBUS:
	    if (FD_ISSET(i, readfds)) {
                if (files[i].xenbus.events)
		    n++;
		else
		    FD_CLR(i, readfds);
	    }
	    FD_CLR(i, writefds);
	    FD_CLR(i, exceptfds);
	    break;
#endif
	case FTYPE_EVTCHN:
	case FTYPE_TAP:
	case FTYPE_BLK:
	case FTYPE_KBD:
	case FTYPE_FB:
	    if (FD_ISSET(i, readfds)) {
		if (files[i].read)
		    n++;
		else
		    FD_CLR(i, readfds);
	    }
	    FD_CLR(i, writefds);
	    FD_CLR(i, exceptfds);
	    break;
#ifdef HAVE_LWIP
	case FTYPE_SOCKET:
	    if (FD_ISSET(i, readfds)) {
	        /* Optimize no-network-packet case.  */
		if (sock_n && FD_ISSET(files[i].socket.fd, &sock_readfds))
		    n++;
		else
		    FD_CLR(i, readfds);
	    }
            if (FD_ISSET(i, writefds)) {
		if (sock_n && FD_ISSET(files[i].socket.fd, &sock_writefds))
		    n++;
		else
		    FD_CLR(i, writefds);
            }
            if (FD_ISSET(i, exceptfds)) {
		if (sock_n && FD_ISSET(files[i].socket.fd, &sock_exceptfds))
		    n++;
		else
		    FD_CLR(i, exceptfds);
            }
	    break;
#endif
	}
#ifdef LIBC_VERBOSE
	if (FD_ISSET(i, readfds))
	    nbread[i]++;
	if (FD_ISSET(i, writefds))
	    nbwrite[i]++;
	if (FD_ISSET(i, exceptfds))
	    nbexcept[i]++;
#endif
    }
#ifdef LIBC_VERBOSE
    if (NOW() > lastshown + 1000000000ull) {
	lastshown = NOW();
	printk("%lu MB free, ", num_free_pages() / ((1 << 20) / PAGE_SIZE));
	printk("%d(%d): ", nb, sock_n);
	for (i = 0; i < nfds; i++) {
	    if (nbread[i] || nbwrite[i] || nbexcept[i])
		printk(" %d(%c):", i, file_types[files[i].type]);
	    if (nbread[i])
	    	printk(" %dR", nbread[i]);
	    if (nbwrite[i])
		printk(" %dW", nbwrite[i]);
	    if (nbexcept[i])
		printk(" %dE", nbexcept[i]);
	}
	printk("\n");
	memset(nbread, 0, sizeof(nbread));
	memset(nbwrite, 0, sizeof(nbwrite));
	memset(nbexcept, 0, sizeof(nbexcept));
	nb = 0;
    }
#endif
    return n;
}

/* The strategy is to
 * - announce that we will maybe sleep
 * - poll a bit ; if successful, return
 * - if timeout, return
 * - really sleep (except if somebody woke us in the meanwhile) */
int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
	struct timeval *timeout)
{
    int n, ret;
    fd_set myread, mywrite, myexcept;
    struct thread *thread = get_current();
    s_time_t start = NOW(), stop;
#ifdef CONFIG_NETFRONT
    DEFINE_WAIT(netfront_w);
#endif
    DEFINE_WAIT(event_w);
#ifdef CONFIG_BLKFRONT
    DEFINE_WAIT(blkfront_w);
#endif
#ifdef CONFIG_XENBUS
    DEFINE_WAIT(xenbus_watch_w);
#endif
#ifdef CONFIG_KBDFRONT
    DEFINE_WAIT(kbdfront_w);
#endif
    DEFINE_WAIT(console_w);

    assert(thread == main_thread);

    DEBUG("select(%d, ", nfds);
    dump_set(nfds, readfds, writefds, exceptfds, timeout);
    DEBUG(");\n");

    if (timeout)
	stop = start + SECONDS(timeout->tv_sec) + timeout->tv_usec * 1000;
    else
	/* just make gcc happy */
	stop = start;

    /* Tell people we're going to sleep before looking at what they are
     * saying, hence letting them wake us if events happen between here and
     * schedule() */
#ifdef CONFIG_NETFRONT
    add_waiter(netfront_w, netfront_queue);
#endif
    add_waiter(event_w, event_queue);
#ifdef CONFIG_BLKFRONT
    add_waiter(blkfront_w, blkfront_queue);
#endif
#ifdef CONFIG_XENBUS
    add_waiter(xenbus_watch_w, xenbus_watch_queue);
#endif
#ifdef CONFIG_KBDFRONT
    add_waiter(kbdfront_w, kbdfront_queue);
#endif
    add_waiter(console_w, console_queue);

    if (readfds)
        myread = *readfds;
    else
        FD_ZERO(&myread);
    if (writefds)
        mywrite = *writefds;
    else
        FD_ZERO(&mywrite);
    if (exceptfds)
        myexcept = *exceptfds;
    else
        FD_ZERO(&myexcept);

    DEBUG("polling ");
    dump_set(nfds, &myread, &mywrite, &myexcept, timeout);
    DEBUG("\n");
    n = select_poll(nfds, &myread, &mywrite, &myexcept);

    if (n) {
	dump_set(nfds, readfds, writefds, exceptfds, timeout);
	if (readfds)
	    *readfds = myread;
	if (writefds)
	    *writefds = mywrite;
	if (exceptfds)
	    *exceptfds = myexcept;
	DEBUG(" -> ");
	dump_set(nfds, readfds, writefds, exceptfds, timeout);
	DEBUG("\n");
	wake(thread);
	ret = n;
	goto out;
    }
    if (timeout && NOW() >= stop) {
	if (readfds)
	    FD_ZERO(readfds);
	if (writefds)
	    FD_ZERO(writefds);
	if (exceptfds)
	    FD_ZERO(exceptfds);
	timeout->tv_sec = 0;
	timeout->tv_usec = 0;
	wake(thread);
	ret = 0;
	goto out;
    }

    if (timeout)
	thread->wakeup_time = stop;
    schedule();

    if (readfds)
        myread = *readfds;
    else
        FD_ZERO(&myread);
    if (writefds)
        mywrite = *writefds;
    else
        FD_ZERO(&mywrite);
    if (exceptfds)
        myexcept = *exceptfds;
    else
        FD_ZERO(&myexcept);

    n = select_poll(nfds, &myread, &mywrite, &myexcept);

    if (n) {
	if (readfds)
	    *readfds = myread;
	if (writefds)
	    *writefds = mywrite;
	if (exceptfds)
	    *exceptfds = myexcept;
	ret = n;
	goto out;
    }
    errno = EINTR;
    ret = -1;

out:
#ifdef CONFIG_NETFRONT
    remove_waiter(netfront_w, netfront_queue);
#endif
    remove_waiter(event_w, event_queue);
#ifdef CONFIG_BLKFRONT
    remove_waiter(blkfront_w, blkfront_queue);
#endif
#ifdef CONFIG_XENBUS
    remove_waiter(xenbus_watch_w, xenbus_watch_queue);
#endif
#ifdef CONFIG_KBDFRONT
    remove_waiter(kbdfront_w, kbdfront_queue);
#endif
    remove_waiter(console_w, console_queue);
    return ret;
}
Example #21
0
/* The strategy is to
 * - announce that we will maybe sleep
 * - poll a bit ; if successful, return
 * - if timeout, return
 * - really sleep (except if somebody woke us in the meanwhile) */
int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
	struct timeval *timeout)
{
    int n, ret;
    fd_set myread, mywrite, myexcept;
    struct thread *thread = get_current();
    s_time_t start = NOW(), stop;
    DEFINE_WAIT(w1);
    DEFINE_WAIT(w2);
    DEFINE_WAIT(w3);
    DEFINE_WAIT(w4);
    DEFINE_WAIT(w5);
    DEFINE_WAIT(w6);
    int n_sockets;
    int n_non_sockets;

    assert(thread == main_thread);

    n_sockets = 0;
    n_non_sockets = 0;
    for (n = 0; n <= nfds; n++) {
	if ((readfds && FD_ISSET(n, readfds)) ||
	    (writefds && FD_ISSET(n, writefds)) ||
	    (exceptfds && FD_ISSET(n, exceptfds))) {
	    if (files[n].type == FTYPE_SOCKET)
		n_sockets++;
	    else
		n_non_sockets++;
	}
    }
    if (n_sockets != 0 && n_non_sockets != 0) {
	static int cntr;
	if (cntr < 1000) {
	    printk("WARNING: select combining socket and non-socket FDs (n = %d vs %d); warning %d/1000\n",
		   n_sockets, n_non_sockets, cntr);
	    cntr++;
	}
    }
    if (n_non_sockets == 0)
	return do_lwip_select(nfds, readfds, writefds, exceptfds, timeout);

    if (timeout)
	stop = start + SECONDS(timeout->tv_sec) + timeout->tv_usec * 1000;
    else
	/* just make gcc happy */
	stop = start;

    /* Tell people we're going to sleep before looking at what they are
     * saying, hence letting them wake us if events happen between here and
     * schedule() */
    add_waiter(w1, netfront_queue);
    add_waiter(w2, event_queue);
    add_waiter(w3, blkfront_queue);
    add_waiter(w4, xenbus_watch_queue);
    add_waiter(w5, kbdfront_queue);
    add_waiter(w6, console_queue);

    if (readfds)
        myread = *readfds;
    else
        FD_ZERO(&myread);
    if (writefds)
        mywrite = *writefds;
    else
        FD_ZERO(&mywrite);
    if (exceptfds)
        myexcept = *exceptfds;
    else
        FD_ZERO(&myexcept);

    DEBUG("polling ");
    dump_set(nfds, &myread, &mywrite, &myexcept, timeout);
    DEBUG("\n");
    n = select_poll(nfds, &myread, &mywrite, &myexcept);

    if (n) {
	dump_set(nfds, readfds, writefds, exceptfds, timeout);
	if (readfds)
	    *readfds = myread;
	if (writefds)
	    *writefds = mywrite;
	if (exceptfds)
	    *exceptfds = myexcept;
	DEBUG(" -> ");
	dump_set(nfds, readfds, writefds, exceptfds, timeout);
	DEBUG("\n");
	wake(thread);
	ret = n;
	goto out;
    }
    if (timeout && NOW() >= stop) {
	if (readfds)
	    FD_ZERO(readfds);
	if (writefds)
	    FD_ZERO(writefds);
	if (exceptfds)
	    FD_ZERO(exceptfds);
	timeout->tv_sec = 0;
	timeout->tv_usec = 0;
	wake(thread);
	ret = 0;
	goto out;
    }

    if (timeout)
	thread->wakeup_time = stop;
    schedule();

    if (readfds)
        myread = *readfds;
    else
        FD_ZERO(&myread);
    if (writefds)
        mywrite = *writefds;
    else
        FD_ZERO(&mywrite);
    if (exceptfds)
        myexcept = *exceptfds;
    else
        FD_ZERO(&myexcept);

    n = select_poll(nfds, &myread, &mywrite, &myexcept);

    if (n) {
	if (readfds)
	    *readfds = myread;
	if (writefds)
	    *writefds = mywrite;
	if (exceptfds)
	    *exceptfds = myexcept;
	ret = n;
	goto out;
    }
    errno = EINTR;
    ret = -1;

out:
    remove_waiter(w1);
    remove_waiter(w2);
    remove_waiter(w3);
    remove_waiter(w4);
    remove_waiter(w5);
    remove_waiter(w6);
    return ret;
}