static void perf_output_put_handle(struct perf_output_handle *handle) { struct ring_buffer *rb = handle->rb; unsigned long head; again: head = local_read(&rb->head); if (!local_dec_and_test(&rb->nest)) goto out; /** * Since the mmap() consumer (userspace) can run on a different CPU: * * kernel user * * READ ->data_tail READ ->data_head * smp_mb() (A) smp_rmb() (C) * WRITE $data READ $data * smp_wmb() (B) smp_mb() (D) * STORE ->data_head WRITE ->data_tail * * Where A pairs with D, and B pairs with C. * * I don't think A needs to be a full barrier because we won't in fact * write data until we see the store from userspace. So we simply don't * issue the data WRITE until we observe it. Be conservative for now. * * OTOH, D needs to be a full barrier since it separates the data READ * from the tail WRITE. * * For B a WMB is sufficient since it separates two WRITEs, and for C * an RMB is sufficient since it separates two READs. * * See perf_output_begin(). */ smp_wmb(); rb->user_page->data_head = head; if (unlikely(head != local_read(&rb->head))) { local_inc(&rb->nest); goto again; } if (handle->wakeup != local_read(&rb->wakeup)) perf_output_wakeup(handle); out: preempt_enable(); }
static void perf_output_put_handle(struct perf_output_handle *handle) { struct ring_buffer *rb = handle->rb; unsigned long head; again: head = local_read(&rb->head); /* * IRQ/NMI can happen here, which means we can miss a head update. */ if (!local_dec_and_test(&rb->nest)) goto out; /* * Publish the known good head. Rely on the full barrier implied * by atomic_dec_and_test() order the rb->head read and this * write. */ rb->user_page->data_head = head; /* * Now check if we missed an update, rely on the (compiler) * barrier in atomic_dec_and_test() to re-read rb->head. */ if (unlikely(head != local_read(&rb->head))) { local_inc(&rb->nest); goto again; } if (handle->wakeup != local_read(&rb->wakeup)) perf_output_wakeup(handle); out: preempt_enable(); }