/* * Update host ring buffer after iterating over packets. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; u32 orig_write_sz = hv_get_bytes_to_write(rbi); /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ rmb(); rbi->ring_buffer->read_index = rbi->priv_read_index; /* * Issue a full memory barrier before making the signaling decision. * Here is the reason for having this barrier: * If the reading of the pend_sz (in this function) * were to be reordered and read before we commit the new read * index (in the calling function) we could * have a problem. If the host were to set the pending_sz after we * have sampled pending_sz and go to sleep before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ mb(); /* If host has disabled notifications then skip */ if (rbi->ring_buffer->interrupt_mask) return; if (rbi->ring_buffer->feature_bits.feat_pending_send_sz) { u32 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); /* * If there was space before we began iteration, * then host was not blocked. Also handles case where * pending_sz is zero then host has nothing pending * and does not need to be signaled. */ if (orig_write_sz > pending_sz) return; /* If pending write will not fit, don't give false hope. */ if (hv_get_bytes_to_write(rbi) < pending_sz) return; } vmbus_setevent(channel); }
static bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) { u32 cur_write_sz; u32 pending_sz; /* * Issue a full memory barrier before making the signaling decision. * Here is the reason for having this barrier: * If the reading of the pend_sz (in this function) * were to be reordered and read before we commit the new read * index (in the calling function) we could * have a problem. If the host were to set the pending_sz after we * have sampled pending_sz and go to sleep before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ mb(); pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); /* If the other end is not blocked on write don't bother. */ if (pending_sz == 0) return false; cur_write_sz = hv_get_bytes_to_write(rbi); if (cur_write_sz >= pending_sz) return true; return false; }
/* * Update host ring buffer after iterating over packets. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; u32 curr_write_sz, pending_sz, bytes_read, start_read_index; /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ virt_rmb(); start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) return; /* * Issue a full memory barrier before making the signaling decision. * Here is the reason for having this barrier: * If the reading of the pend_sz (in this function) * were to be reordered and read before we commit the new read * index (in the calling function) we could * have a problem. If the host were to set the pending_sz after we * have sampled pending_sz and go to sleep before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ virt_mb(); pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); if (!pending_sz) return; /* * Ensure the read of write_index in hv_get_bytes_to_write() * happens after the read of pending_send_sz. */ virt_rmb(); curr_write_sz = hv_get_bytes_to_write(rbi); bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); /* * If there was space before we began iteration, * then host was not blocked. */ if (curr_write_sz - bytes_read > pending_sz) return; /* If pending write will not fit, don't give false hope. */ if (curr_write_sz <= pending_sz) return; vmbus_setevent(channel); }
/* * Update host ring buffer after iterating over packets. If the host has * stopped queuing new entries because it found the ring buffer full, and * sufficient space is being freed up, signal the host. But be careful to * only signal the host when necessary, both for performance reasons and * because Hyper-V protects itself by throttling guests that signal * inappropriately. * * Determining when to signal is tricky. There are three key data inputs * that must be handled in this order to avoid race conditions: * * 1. Update the read_index * 2. Read the pending_send_sz * 3. Read the current write_index * * The interrupt_mask is not used to determine when to signal. The * interrupt_mask is used only on the guest->host ring buffer when * sending requests to the host. The host does not use it on the host-> * guest ring buffer to indicate whether it should be signaled. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; u32 curr_write_sz, pending_sz, bytes_read, start_read_index; /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ virt_rmb(); start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; /* * Older versions of Hyper-V (before WS2102 and Win8) do not * implement pending_send_sz and simply poll if the host->guest * ring buffer is full. No signaling is needed or expected. */ if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) return; /* * Issue a full memory barrier before making the signaling decision. * If reading pending_send_sz were to be reordered and happen * before we commit the new read_index, a race could occur. If the * host were to set the pending_send_sz after we have sampled * pending_send_sz, and the ring buffer blocks before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ virt_mb(); /* * If the pending_send_sz is zero, then the ring buffer is not * blocked and there is no need to signal. This is far by the * most common case, so exit quickly for best performance. */ pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); if (!pending_sz) return; /* * Ensure the read of write_index in hv_get_bytes_to_write() * happens after the read of pending_send_sz. */ virt_rmb(); curr_write_sz = hv_get_bytes_to_write(rbi); bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); /* * We want to signal the host only if we're transitioning * from a "not enough free space" state to a "enough free * space" state. For example, it's possible that this function * could run and free up enough space to signal the host, and then * run again and free up additional space before the host has a * chance to clear the pending_send_sz. The 2nd invocation would * be a null transition from "enough free space" to "enough free * space", which doesn't warrant a signal. * * Exactly filling the ring buffer is treated as "not enough * space". The ring buffer always must have at least one byte * empty so the empty and full conditions are distinguishable. * hv_get_bytes_to_write() doesn't fully tell the truth in * this regard. * * So first check if we were in the "enough free space" state * before we began the iteration. If so, the host was not * blocked, and there's no need to signal. */ if (curr_write_sz - bytes_read > pending_sz) return; /* * Similarly, if the new state is "not enough space", then * there's no need to signal. */ if (curr_write_sz <= pending_sz) return; vmbus_setevent(channel); }
/* Write to the ring buffer. */ int hv_ringbuffer_write(struct vmbus_channel *channel, const struct kvec *kv_list, u32 kv_count) { int i; u32 bytes_avail_towrite; u32 totalbytes_towrite = sizeof(u64); u32 next_write_location; u32 old_write; u64 prev_indices; unsigned long flags; struct hv_ring_buffer_info *outring_info = &channel->outbound; if (channel->rescind) return -ENODEV; for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; spin_lock_irqsave(&outring_info->ring_lock, flags); bytes_avail_towrite = hv_get_bytes_to_write(outring_info); /* * If there is only room for the packet, assume it is full. * Otherwise, the next time around, we think the ring buffer * is empty since the read index == write index. */ if (bytes_avail_towrite <= totalbytes_towrite) { spin_unlock_irqrestore(&outring_info->ring_lock, flags); return -EAGAIN; } /* Write to the ring buffer */ next_write_location = hv_get_next_write_location(outring_info); old_write = next_write_location; for (i = 0; i < kv_count; i++) { next_write_location = hv_copyto_ringbuffer(outring_info, next_write_location, kv_list[i].iov_base, kv_list[i].iov_len); } /* Set previous packet start */ prev_indices = hv_get_ring_bufferindices(outring_info); next_write_location = hv_copyto_ringbuffer(outring_info, next_write_location, &prev_indices, sizeof(u64)); /* Issue a full memory barrier before updating the write index */ virt_mb(); /* Now, update the write location */ hv_set_next_write_location(outring_info, next_write_location); spin_unlock_irqrestore(&outring_info->ring_lock, flags); hv_signal_on_write(old_write, channel); if (channel->rescind) return -ENODEV; return 0; }