/* * Update host ring buffer after iterating over packets. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; u32 curr_write_sz, pending_sz, bytes_read, start_read_index; /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ virt_rmb(); start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) return; /* * Issue a full memory barrier before making the signaling decision. * Here is the reason for having this barrier: * If the reading of the pend_sz (in this function) * were to be reordered and read before we commit the new read * index (in the calling function) we could * have a problem. If the host were to set the pending_sz after we * have sampled pending_sz and go to sleep before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ virt_mb(); pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); if (!pending_sz) return; /* * Ensure the read of write_index in hv_get_bytes_to_write() * happens after the read of pending_send_sz. */ virt_rmb(); curr_write_sz = hv_get_bytes_to_write(rbi); bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); /* * If there was space before we began iteration, * then host was not blocked. */ if (curr_write_sz - bytes_read > pending_sz) return; /* If pending write will not fit, don't give false hope. */ if (curr_write_sz <= pending_sz) return; vmbus_setevent(channel); }
/* * Callback from vmbus_event when something is in inbound ring. */ static void hv_uio_channel_cb(void *context) { struct vmbus_channel *chan = context; struct hv_device *hv_dev = chan->device_obj; struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev); chan->inbound.ring_buffer->interrupt_mask = 1; virt_mb(); uio_event_notify(&pdata->info); }
/* * This is the irqcontrol callback to be registered to uio_info. * It can be used to disable/enable interrupt from user space processes. * * @param info * pointer to uio_info. * @param irq_state * state value. 1 to enable interrupt, 0 to disable interrupt. */ static int hv_uio_irqcontrol(struct uio_info *info, s32 irq_state) { struct hv_uio_private_data *pdata = info->priv; struct hv_device *dev = pdata->device; dev->channel->inbound.ring_buffer->interrupt_mask = !irq_state; virt_mb(); return 0; }
u32 hv_end_read(struct hv_ring_buffer_info *rbi) { rbi->ring_buffer->interrupt_mask = 0; virt_mb(); /* * Now check to see if the ring buffer is still empty. * If it is not, we raced and we need to process new * incoming messages. */ return hv_get_bytes_to_read(rbi); }
/** * xb_write - low level write * @data: buffer to send * @len: length of buffer * * Returns 0 on success, error otherwise. */ int xb_write(const void *data, unsigned len) { struct xenstore_domain_interface *intf = xen_store_interface; XENSTORE_RING_IDX cons, prod; int rc; while (len != 0) { void *dst; unsigned int avail; rc = wait_event_interruptible( xb_waitq, (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE); if (rc < 0) return rc; /* Read indexes, then verify. */ cons = intf->req_cons; prod = intf->req_prod; if (!check_indexes(cons, prod)) { intf->req_cons = intf->req_prod = 0; return -EIO; } dst = get_output_chunk(cons, prod, intf->req, &avail); if (avail == 0) continue; if (avail > len) avail = len; /* Must write data /after/ reading the consumer index. */ virt_mb(); memcpy(dst, data, avail); data += avail; len -= avail; /* Other side must not see new producer until data is there. */ virt_wmb(); intf->req_prod += avail; /* Implies mb(): other side will see the updated producer. */ notify_remote_via_evtchn(xen_store_evtchn); } return 0; }
int xb_read(void *data, unsigned len) { struct xenstore_domain_interface *intf = xen_store_interface; XENSTORE_RING_IDX cons, prod; int rc; while (len != 0) { unsigned int avail; const char *src; rc = xb_wait_for_data_to_read(); if (rc < 0) return rc; /* Read indexes, then verify. */ cons = intf->rsp_cons; prod = intf->rsp_prod; if (!check_indexes(cons, prod)) { intf->rsp_cons = intf->rsp_prod = 0; return -EIO; } src = get_input_chunk(cons, prod, intf->rsp, &avail); if (avail == 0) continue; if (avail > len) avail = len; /* Must read data /after/ reading the producer index. */ virt_rmb(); memcpy(data, src, avail); data += avail; len -= avail; /* Other side must not see free space until we've copied out */ virt_mb(); intf->rsp_cons += avail; pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); /* Implies mb(): other side will see the updated consumer. */ notify_remote_via_evtchn(xen_store_evtchn); } return 0; }
static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->outbound; virt_mb(); if (READ_ONCE(rbi->ring_buffer->interrupt_mask)) return; /* check interrupt_mask before read_index */ virt_rmb(); /* * This is the only case we need to signal when the * ring transitions from being empty to non-empty. */ if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) vmbus_setevent(channel); }
/* * Update host ring buffer after iterating over packets. If the host has * stopped queuing new entries because it found the ring buffer full, and * sufficient space is being freed up, signal the host. But be careful to * only signal the host when necessary, both for performance reasons and * because Hyper-V protects itself by throttling guests that signal * inappropriately. * * Determining when to signal is tricky. There are three key data inputs * that must be handled in this order to avoid race conditions: * * 1. Update the read_index * 2. Read the pending_send_sz * 3. Read the current write_index * * The interrupt_mask is not used to determine when to signal. The * interrupt_mask is used only on the guest->host ring buffer when * sending requests to the host. The host does not use it on the host-> * guest ring buffer to indicate whether it should be signaled. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; u32 curr_write_sz, pending_sz, bytes_read, start_read_index; /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ virt_rmb(); start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; /* * Older versions of Hyper-V (before WS2102 and Win8) do not * implement pending_send_sz and simply poll if the host->guest * ring buffer is full. No signaling is needed or expected. */ if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) return; /* * Issue a full memory barrier before making the signaling decision. * If reading pending_send_sz were to be reordered and happen * before we commit the new read_index, a race could occur. If the * host were to set the pending_send_sz after we have sampled * pending_send_sz, and the ring buffer blocks before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ virt_mb(); /* * If the pending_send_sz is zero, then the ring buffer is not * blocked and there is no need to signal. This is far by the * most common case, so exit quickly for best performance. */ pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); if (!pending_sz) return; /* * Ensure the read of write_index in hv_get_bytes_to_write() * happens after the read of pending_send_sz. */ virt_rmb(); curr_write_sz = hv_get_bytes_to_write(rbi); bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); /* * We want to signal the host only if we're transitioning * from a "not enough free space" state to a "enough free * space" state. For example, it's possible that this function * could run and free up enough space to signal the host, and then * run again and free up additional space before the host has a * chance to clear the pending_send_sz. The 2nd invocation would * be a null transition from "enough free space" to "enough free * space", which doesn't warrant a signal. * * Exactly filling the ring buffer is treated as "not enough * space". The ring buffer always must have at least one byte * empty so the empty and full conditions are distinguishable. * hv_get_bytes_to_write() doesn't fully tell the truth in * this regard. * * So first check if we were in the "enough free space" state * before we began the iteration. If so, the host was not * blocked, and there's no need to signal. */ if (curr_write_sz - bytes_read > pending_sz) return; /* * Similarly, if the new state is "not enough space", then * there's no need to signal. */ if (curr_write_sz <= pending_sz) return; vmbus_setevent(channel); }
/* Write to the ring buffer. */ int hv_ringbuffer_write(struct vmbus_channel *channel, const struct kvec *kv_list, u32 kv_count) { int i; u32 bytes_avail_towrite; u32 totalbytes_towrite = sizeof(u64); u32 next_write_location; u32 old_write; u64 prev_indices; unsigned long flags; struct hv_ring_buffer_info *outring_info = &channel->outbound; if (channel->rescind) return -ENODEV; for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; spin_lock_irqsave(&outring_info->ring_lock, flags); bytes_avail_towrite = hv_get_bytes_to_write(outring_info); /* * If there is only room for the packet, assume it is full. * Otherwise, the next time around, we think the ring buffer * is empty since the read index == write index. */ if (bytes_avail_towrite <= totalbytes_towrite) { spin_unlock_irqrestore(&outring_info->ring_lock, flags); return -EAGAIN; } /* Write to the ring buffer */ next_write_location = hv_get_next_write_location(outring_info); old_write = next_write_location; for (i = 0; i < kv_count; i++) { next_write_location = hv_copyto_ringbuffer(outring_info, next_write_location, kv_list[i].iov_base, kv_list[i].iov_len); } /* Set previous packet start */ prev_indices = hv_get_ring_bufferindices(outring_info); next_write_location = hv_copyto_ringbuffer(outring_info, next_write_location, &prev_indices, sizeof(u64)); /* Issue a full memory barrier before updating the write index */ virt_mb(); /* Now, update the write location */ hv_set_next_write_location(outring_info, next_write_location); spin_unlock_irqrestore(&outring_info->ring_lock, flags); hv_signal_on_write(old_write, channel); if (channel->rescind) return -ENODEV; return 0; }
int hv_ringbuffer_read(struct vmbus_channel *channel, void *buffer, u32 buflen, u32 *buffer_actual_len, u64 *requestid, bool raw) { u32 bytes_avail_toread; u32 next_read_location = 0; u64 prev_indices = 0; struct vmpacket_descriptor desc; u32 offset; u32 packetlen; int ret = 0; struct hv_ring_buffer_info *inring_info = &channel->inbound; if (buflen <= 0) return -EINVAL; *buffer_actual_len = 0; *requestid = 0; bytes_avail_toread = hv_get_bytes_to_read(inring_info); /* Make sure there is something to read */ if (bytes_avail_toread < sizeof(desc)) { /* * No error is set when there is even no header, drivers are * supposed to analyze buffer_actual_len. */ return ret; } next_read_location = hv_get_next_read_location(inring_info); next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc, sizeof(desc), next_read_location); offset = raw ? 0 : (desc.offset8 << 3); packetlen = (desc.len8 << 3) - offset; *buffer_actual_len = packetlen; *requestid = desc.trans_id; if (bytes_avail_toread < packetlen + offset) return -EAGAIN; if (packetlen > buflen) return -ENOBUFS; next_read_location = hv_get_next_readlocation_withoffset(inring_info, offset); next_read_location = hv_copyfrom_ringbuffer(inring_info, buffer, packetlen, next_read_location); next_read_location = hv_copyfrom_ringbuffer(inring_info, &prev_indices, sizeof(u64), next_read_location); /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ virt_mb(); /* Update the read index */ hv_set_next_read_location(inring_info, next_read_location); hv_signal_on_read(channel); return ret; }
void hv_begin_read(struct hv_ring_buffer_info *rbi) { rbi->ring_buffer->interrupt_mask = 1; virt_mb(); }