static inline struct dma_chan * pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, size_t len) { #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA return NULL; #endif return async_tx_find_channel(submit, DMA_PQ_VAL, NULL, 0, blocks, disks, len); }
/** * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page * @dest_offset: offset into 'dest' to start transaction * @src_offset: offset into 'src' to start transaction * @len: length in bytes * @submit: submission / completion modifiers * * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, unsigned int src_offset, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { dma_addr_t dma_dest, dma_src; unsigned long dma_prep_flags = 0; if (submit->cb_fn) dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); dma_src = dma_map_page(device->dev, src, src_offset, len, DMA_TO_DEVICE); tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, dma_prep_flags); if (!tx) { dma_unmap_page(device->dev, dma_dest, len, DMA_FROM_DEVICE); dma_unmap_page(device->dev, dma_src, len, DMA_TO_DEVICE); } } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; memcpy(dest_buf, src_buf, len); kunmap_atomic(src_buf, KM_USER1); kunmap_atomic(dest_buf, KM_USER0); async_tx_sync_epilog(submit); } return tx; }
static inline struct dma_chan * xor_val_chan(struct async_submit_ctl *submit, struct page *dest, struct page **src_list, int src_cnt, size_t len) { #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA return NULL; #endif return async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); }
static struct dma_async_tx_descriptor * async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, srcs, 2, len); struct dma_device *dma = chan ? chan->device : NULL; const u8 *amul, *bmul; u8 ax, bx; u8 *a, *b, *c; if (dma) { dma_addr_t dma_dest[2]; dma_addr_t dma_src[2]; struct device *dev = dma->dev; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, len, dma_flags); if (tx) { async_tx_submit(chan, tx, submit); return tx; } /* could not get a descriptor, unmap and fall through to * the synchronous path */ dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); } /* run the operation synchronously */ async_tx_quiesce(&submit->depend_tx); amul = raid6_gfmul[coef[0]]; bmul = raid6_gfmul[coef[1]]; a = page_address(srcs[0]); b = page_address(srcs[1]); c = page_address(dest); while (len--) { ax = amul[*a++]; bx = bmul[*b++]; *c++ = ax ^ bx; } return NULL; }
/** * async_memset - attempt to fill memory with a dma engine. * @dest: destination page * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK * @depend_tx: memset depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, size_t len, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest; unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); tx = device->device_prep_dma_memset(chan, dma_dest, val, len, dma_prep_flags); } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); dest_buf = (void *) (((char *) page_address(dest)) + offset); /* wait for any prerequisite operations */ if (depend_tx) { /* if ack is already set then we cannot be sure * we are referring to the correct operation */ BUG_ON(depend_tx->ack); if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", __func__); } memset(dest_buf, val, len); async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); } return tx; }
static struct dma_async_tx_descriptor * async_mult(struct page *dest, struct page *src, u8 coef, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, &src, 1, len); struct dma_device *dma = chan ? chan->device : NULL; const u8 *qmul; /* Q multiplier table */ u8 *d, *s; if (dma) { dma_addr_t dma_dest[2]; dma_addr_t dma_src[1]; struct device *dev = dma->dev; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, len, dma_flags); if (tx) { async_tx_submit(chan, tx, submit); return tx; } /* could not get a descriptor, unmap and fall through to * the synchronous path */ dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); } /* no channel available, or failed to allocate a descriptor, so * perform the operation synchronously */ async_tx_quiesce(&submit->depend_tx); qmul = raid6_gfmul[coef]; d = page_address(dest); s = page_address(src); while (len--) *d++ = qmul[*s++]; return NULL; }
/** * async_gen_syndrome - asynchronously calculate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 * @offset: common offset into each block (src and dest) to start transaction * @disks: number of blocks (including missing P or Q, see below) * @len: length of operation in bytes * @submit: submission/completion modifiers * * General note: This routine assumes a field of GF(2^8) with a * primitive polynomial of 0x11d and a generator of {02}. * * 'disks' note: callers can optionally omit either P or Q (but not * both) from the calculation by setting blocks[disks-2] or * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= * PAGE_SIZE as a temporary buffer of this size is used in the * synchronous path. 'disks' always accounts for both destination * buffers. If any source buffers (blocks[i] where i < disks - 2) are * set to NULL those buffers will be replaced with the raid6_zero_page * in the synchronous path and omitted in the hardware-asynchronous * path. * * 'blocks' note: if submit->scribble is NULL then the contents of * 'blocks' may be overwritten to perform address conversions * (dma_map_page() or page_address()). */ struct dma_async_tx_descriptor * async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, size_t len, struct async_submit_ctl *submit) { int src_cnt = disks - 2; struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &P(blocks, disks), 2, blocks, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; dma_addr_t *dma_src = NULL; BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); if (submit->scribble) dma_src = submit->scribble; else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) blocks; if (dma_src && device && (src_cnt <= dma_maxpq(device, 0) || dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && is_dma_pq_aligned(device, offset, 0, len)) { /* run the p+q asynchronously */ pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, disks, len, dma_src, submit); } /* run the pq synchronously */ pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); if (!P(blocks, disks)) { P(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } if (!Q(blocks, disks)) { Q(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } do_sync_gen_syndrome(blocks, offset, disks, len, submit); return NULL; }
/** * async_xor - attempt to xor a set of blocks with a dma engine. * @dest: destination page * @src_list: array of source pages * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes * @submit: submission / completion modifiers * * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST * * xor_blocks always uses the dest as a source so the * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in * the calculation. The assumption with dma eninges is that they only * use the destination buffer as a source when it is explicity specified * in the source list. * * src_list note: if the dest is also a source it must be at index zero. * The contents of this array will be overwritten if a scribble region * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); dma_addr_t *dma_src = NULL; BUG_ON(src_cnt <= 1); if (submit->scribble) dma_src = submit->scribble; else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) src_list; if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, dma_src, submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); WARN_ONCE(chan, "%s: no space for dma address conversion\n", __func__); /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } }
/** * async_pqxor - attempt to calculate RS-syndrome and XOR in parallel using * a dma engine. * @pdest: destination page for P-parity (XOR) * @qdest: destination page for Q-parity (GF-XOR) * @src_list: array of source pages * @src_coef_list: array of source coefficients used in GF-multiplication * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_ASSUME_COHERENT, * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ASYNC_TX_ASYNC_ONLY * @depend_tx: depends on the result of this transaction. * @callback: function to call when the operation completes * @callback_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_pqxor(struct page *pdest, struct page *qdest, struct page **src_list, unsigned char *scoef_list, unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback callback, void *callback_param) { struct page *dest[] = {pdest, qdest}; struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ_XOR, dest, 2, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (!device && (flags & ASYNC_TX_ASYNC_ONLY)) return NULL; if (device) { /* run the xor asynchronously */ tx = do_async_pqxor(device, chan, pdest, qdest, src_list, scoef_list, offset, src_cnt, len, flags, depend_tx, callback,callback_param); } else { /* run the pqxor synchronously */ /* may do synchronous PQ only when both destinations exsists */ if (!pdest || !qdest) return NULL; /* wait for any prerequisite operations */ if (depend_tx) { /* if ack is already set then we cannot be sure * we are referring to the correct operation */ BUG_ON(depend_tx->ack); if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", __FUNCTION__); } do_sync_pqxor(pdest, qdest, src_list, offset, src_cnt, len, flags, depend_tx, callback, callback_param); } return tx; }
struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device && is_dma_fill_aligned(device, offset, 0, len)) { dma_addr_t dma_dest; unsigned long dma_prep_flags = 0; if (submit->cb_fn) dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); tx = device->device_prep_dma_memset(chan, dma_dest, val, len, dma_prep_flags); } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); async_tx_submit(chan, tx, submit); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); dest_buf = page_address(dest) + offset; /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); memset(dest_buf, val, len); async_tx_sync_epilog(submit); } return tx; }
/** * async_xor - attempt to xor a set of blocks with a dma engine. * @dest: destination page * @src_list: array of source pages * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes * @submit: submission / completion modifiers * * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST * * xor_blocks always uses the dest as a source so the * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in * the calculation. The assumption with dma eninges is that they only * use the destination buffer as a source when it is explicity specified * in the source list. * * src_list note: if the dest is also a source it must be at index zero. * The contents of this array will be overwritten if a scribble region * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); dma_addr_t *dma_src = NULL; BUG_ON(src_cnt <= 1); if (submit->scribble) dma_src = submit->scribble; else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) src_list; if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, dma_src, submit); } else { pr_debug("%s (sync): len: %zu\n", __func__, len); WARN_ONCE(chan, "%s: no space for dma address conversion\n", __func__); if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } async_tx_quiesce(&submit->depend_tx); do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } }
/** * async_xor - attempt to xor a set of blocks with a dma engine. * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST * flag must be set to not include dest data in the calculation. The * assumption with dma eninges is that they only use the destination * buffer as a source when it is explicity specified in the source list. * @dest: destination page * @src_list: array of source pages (if the dest is also a source it must be * at index zero). The contents of this array may be overwritten. * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, &dest, 1, src_list, src_cnt, len); BUG_ON(src_cnt <= 1); if (chan) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, flags, depend_tx, cb_fn, cb_param); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ if (flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ async_tx_quiesce(&depend_tx); do_sync_xor(dest, src_list, offset, src_cnt, len, flags, cb_fn, cb_param); return NULL; } }
/** * async_xor_zero_sum - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously * @src_list: array of source pages. The dest page must be listed as a source * at index zero. The contents of this array may be overwritten. * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_xor_zero_sum(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, u32 *result, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; BUG_ON(src_cnt <= 1); if (device && src_cnt <= device->max_xor) { dma_addr_t *dma_src = (dma_addr_t *) src_list; unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, len, result, dma_prep_flags); if (!tx) { if (depend_tx) dma_wait_for_async_tx(depend_tx); while (!tx) tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, len, result, dma_prep_flags); } async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); } else { unsigned long xor_flags = flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); xor_flags |= ASYNC_TX_XOR_DROP_DST; xor_flags &= ~ASYNC_TX_ACK; tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, depend_tx, NULL, NULL); if (tx) { if (dma_wait_for_async_tx(tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for tx\n", __func__); async_tx_ack(tx); } *result = page_is_zero(dest, offset, len) ? 0 : 1; tx = NULL; async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); } return tx; }
/** * async_xor - attempt to xor a set of blocks with a dma engine. * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST * flag must be set to not include dest data in the calculation. The * assumption with dma eninges is that they only use the destination * buffer as a source when it is explicity specified in the source list. * @dest: destination page * @src_list: array of source pages (if the dest is also a source it must be * at index zero). The contents of this array may be overwritten. * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK * @depend_tx: xor depends on the result of this transaction. * @cb_fn: function to call when the xor completes * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; dma_async_tx_callback _cb_fn; void *_cb_param; unsigned long local_flags; int xor_src_cnt; int i = 0, src_off = 0; BUG_ON(src_cnt <= 1); while (src_cnt) { local_flags = flags; if (device) { /* run the xor asynchronously */ xor_src_cnt = min(src_cnt, device->max_xor); /* if we are submitting additional xors * only set the callback on the last transaction */ if (src_cnt > xor_src_cnt) { local_flags &= ~ASYNC_TX_ACK; _cb_fn = NULL; _cb_param = NULL; } else { _cb_fn = cb_fn; _cb_param = cb_param; } tx = do_async_xor(device, chan, dest, &src_list[src_off], offset, xor_src_cnt, len, local_flags, depend_tx, _cb_fn, _cb_param); } else { /* run the xor synchronously */ /* in the sync case the dest is an implied source * (assumes the dest is at the src_off index) */ if (flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_off++; } /* process up to 'MAX_XOR_BLOCKS' sources */ xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); /* if we are submitting additional xors * only set the callback on the last transaction */ if (src_cnt > xor_src_cnt) { local_flags &= ~ASYNC_TX_ACK; _cb_fn = NULL; _cb_param = NULL; } else { _cb_fn = cb_fn; _cb_param = cb_param; } /* wait for any prerequisite operations */ if (depend_tx) { /* if ack is already set then we cannot be sure * we are referring to the correct operation */ BUG_ON(async_tx_test_ack(depend_tx)); if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for " "depend_tx\n", __func__); } do_sync_xor(dest, &src_list[src_off], offset, xor_src_cnt, len, local_flags, depend_tx, _cb_fn, _cb_param); } /* the previous tx is hidden from the client, * so ack it */ if (i && depend_tx) async_tx_ack(depend_tx); depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ src_cnt -= xor_src_cnt; src_off += xor_src_cnt; /* unconditionally preserve the destination */ flags &= ~ASYNC_TX_XOR_ZERO_DST; /* use the intermediate result a source, but remember * it's dropped, because it's implied, in the sync case */ src_list[--src_off] = dest; src_cnt++; flags |= ASYNC_TX_XOR_DROP_DST; } else src_cnt = 0; i++; } return tx; }
/** * async_syndrome_val - asynchronously validate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 * @offset: common offset into each block (src and dest) to start transaction * @disks: number of blocks (including missing P or Q, see below) * @len: length of operation in bytes * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set * @spare: temporary result buffer for the synchronous case * @submit: submission / completion modifiers * * The same notes from async_gen_syndrome apply to the 'blocks', * and 'disks' parameters of this routine. The synchronous path * requires a temporary result buffer and submit->scribble to be * specified. */ struct dma_async_tx_descriptor * async_syndrome_val(struct page **blocks, unsigned int offset, int disks, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, NULL, 0, blocks, disks, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; dma_addr_t *dma_src = NULL; BUG_ON(disks < 4); if (submit->scribble) dma_src = submit->scribble; else if (sizeof(dma_addr_t) <= sizeof(struct page *)) dma_src = (dma_addr_t *) blocks; if (dma_src && device && disks <= dma_maxpq(device, 0) && is_dma_pq_aligned(device, offset, 0, len)) { struct device *dev = device->dev; dma_addr_t *pq = &dma_src[disks-2]; int i; pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); if (!P(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_P; if (!Q(blocks, disks)) dma_flags |= DMA_PREP_PQ_DISABLE_Q; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; for (i = 0; i < disks; i++) if (likely(blocks[i])) { BUG_ON(is_raid6_zero_block(blocks[i])); dma_src[i] = dma_map_page(dev, blocks[i], offset, len, DMA_TO_DEVICE); } for (;;) { tx = device->device_prep_dma_pq_val(chan, pq, dma_src, disks - 2, raid6_gfexp, len, pqres, dma_flags); if (likely(tx)) break; async_tx_quiesce(&submit->depend_tx); dma_async_issue_pending(chan); } async_tx_submit(chan, tx, submit); return tx; } else { struct page *p_src = P(blocks, disks); struct page *q_src = Q(blocks, disks); enum async_tx_flags flags_orig = submit->flags; dma_async_tx_callback cb_fn_orig = submit->cb_fn; void *scribble = submit->scribble; void *cb_param_orig = submit->cb_param; void *p, *q, *s; pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); /* caller must provide a temporary result buffer and * allow the input parameters to be preserved */ BUG_ON(!spare || !scribble); /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); /* recompute p and/or q into the temporary buffer and then * check to see the result matches the current value */ tx = NULL; *pqres = 0; if (p_src) { init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, NULL, NULL, scribble); tx = async_xor(spare, blocks, offset, disks-2, len, submit); async_tx_quiesce(&tx); p = page_address(p_src) + offset; s = page_address(spare) + offset; *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; } if (q_src) { P(blocks, disks) = NULL; Q(blocks, disks) = spare; init_async_submit(submit, 0, NULL, NULL, NULL, scribble); tx = async_gen_syndrome(blocks, offset, disks, len, submit); async_tx_quiesce(&tx); q = page_address(q_src) + offset; s = page_address(spare) + offset; *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; } /* restore P, Q and submit */ P(blocks, disks) = p_src; Q(blocks, disks) = q_src; submit->cb_fn = cb_fn_orig; submit->cb_param = cb_param_orig; submit->flags = flags_orig; async_tx_sync_epilog(submit); return NULL; } }
/** * async_xor_zero_sum - attempt a PQ parities check with a dma engine. * @pdest: P-parity destination to check * @qdest: Q-parity destination to check * @src_list: array of source pages; the 1st pointer is qdest, the 2nd - pdest. * @scoef_list: coefficients to use in GF-multiplications * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @presult: 0 if P parity is OK else non-zero * @qresult: 0 if Q parity is OK else non-zero * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK * @depend_tx: depends on the result of this transaction. * @callback: function to call when the xor completes * @callback_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_pqxor_zero_sum(struct page *pdest, struct page *qdest, struct page **src_list, unsigned char *scf, unsigned int offset, int src_cnt, size_t len, u32 *presult, u32 *qresult, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ_ZERO_SUM, src_list, 2, &src_list[2], src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; BUG_ON(src_cnt <= 1); BUG_ON(!qdest || qdest != src_list[0] || pdest != src_list[1]); if (device) { dma_addr_t *dma_src = (dma_addr_t *)src_list; unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; int i; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); tx = device->device_prep_dma_pqzero_sum(chan, dma_src, src_cnt, scf, len, presult, qresult, dma_prep_flags); if (!tx) { if (depend_tx) dma_wait_for_async_tx(depend_tx); while (!tx) tx = device->device_prep_dma_pqzero_sum(chan, dma_src, src_cnt, scf, len, presult, qresult, dma_prep_flags); } async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); } else { unsigned long lflags = flags; /* TBD: support for lengths size of more than PAGE_SIZE */ lflags &= ~ASYNC_TX_ACK; spin_lock(&spare_lock); do_sync_pqxor(spare_pages[0], spare_pages[1], &src_list[2], offset, src_cnt - 2, len, lflags, depend_tx, NULL, NULL); if (presult && pdest) *presult = memcmp(page_address(pdest), page_address(spare_pages[0]), len) == 0 ? 0 : 1; if (qresult && qdest) *qresult = memcmp(page_address(qdest), page_address(spare_pages[1]), len) == 0 ? 0 : 1; spin_unlock(&spare_lock); } return tx; }
/** * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page * @dest_offset: offset into 'dest' to start transaction * @src_offset: offset into 'src' to start transaction * @len: length in bytes * @submit: submission / completion modifiers * * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, unsigned int src_offset, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; struct dmaengine_unmap_data *unmap = NULL; if (device) unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { unsigned long dma_prep_flags = 0; if (submit->cb_fn) dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; unmap->to_cnt = 1; unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len, DMA_TO_DEVICE); unmap->from_cnt = 1; unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); unmap->len = len; tx = device->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0], len, dma_prep_flags); } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest) + dest_offset; src_buf = kmap_atomic(src) + src_offset; memcpy(dest_buf, src_buf, len); kunmap_atomic(src_buf); kunmap_atomic(dest_buf); async_tx_sync_epilog(submit); } dmaengine_unmap_put(unmap); return tx; }
/** * async_xor - attempt to xor a set of blocks with a dma engine. * @dest: destination page * @src_list: array of source pages * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes * @submit: submission / completion modifiers * * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST * * xor_blocks always uses the dest as a source so the * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in * the calculation. The assumption with dma eninges is that they only * use the destination buffer as a source when it is explicity specified * in the source list. * * src_list note: if the dest is also a source it must be at index zero. * The contents of this array will be overwritten if a scribble region * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dmaengine_unmap_data *unmap = NULL; BUG_ON(src_cnt <= 1); if (device) unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT); if (unmap && is_dma_xor_aligned(device, offset, 0, len)) { struct dma_async_tx_descriptor *tx; int i, j; /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); unmap->len = len; for (i = 0, j = 0; i < src_cnt; i++) { if (!src_list[i]) continue; unmap->to_cnt++; unmap->addr[j++] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); } /* map it bidirectional as it may be re-used as a source */ unmap->addr[j] = dma_map_page(device->dev, dest, offset, len, DMA_BIDIRECTIONAL); unmap->bidi_cnt = 1; tx = do_async_xor(chan, unmap, submit); dmaengine_unmap_put(unmap); return tx; } else { dmaengine_unmap_put(unmap); /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); WARN_ONCE(chan, "%s: no space for dma address conversion\n", __func__); /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } }
/** * async_gen_syndrome - asynchronously calculate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 * @offset: common offset into each block (src and dest) to start transaction * @disks: number of blocks (including missing P or Q, see below) * @len: length of operation in bytes * @submit: submission/completion modifiers * * General note: This routine assumes a field of GF(2^8) with a * primitive polynomial of 0x11d and a generator of {02}. * * 'disks' note: callers can optionally omit either P or Q (but not * both) from the calculation by setting blocks[disks-2] or * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= * PAGE_SIZE as a temporary buffer of this size is used in the * synchronous path. 'disks' always accounts for both destination * buffers. If any source buffers (blocks[i] where i < disks - 2) are * set to NULL those buffers will be replaced with the raid6_zero_page * in the synchronous path and omitted in the hardware-asynchronous * path. */ struct dma_async_tx_descriptor * async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, size_t len, struct async_submit_ctl *submit) { int src_cnt = disks - 2; struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &P(blocks, disks), 2, blocks, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dmaengine_unmap_data *unmap = NULL; BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); if (device) unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT); /* XORing P/Q is only implemented in software */ if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) && (src_cnt <= dma_maxpq(device, 0) || dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && is_dma_pq_aligned(device, offset, 0, len)) { struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = 0; unsigned char coefs[src_cnt]; int i, j; /* run the p+q asynchronously */ pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); /* convert source addresses being careful to collapse 'empty' * sources and update the coefficients accordingly */ unmap->len = len; for (i = 0, j = 0; i < src_cnt; i++) { if (blocks[i] == NULL) continue; unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset, len, DMA_TO_DEVICE); coefs[j] = raid6_gfexp[i]; unmap->to_cnt++; j++; } /* * DMAs use destinations as sources, * so use BIDIRECTIONAL mapping */ unmap->bidi_cnt++; if (P(blocks, disks)) unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks), offset, len, DMA_BIDIRECTIONAL); else { unmap->addr[j++] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_P; } unmap->bidi_cnt++; if (Q(blocks, disks)) unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks), offset, len, DMA_BIDIRECTIONAL); else { unmap->addr[j++] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_Q; } tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit); dmaengine_unmap_put(unmap); return tx; } dmaengine_unmap_put(unmap); /* run the pq synchronously */ pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); if (!P(blocks, disks)) { P(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } if (!Q(blocks, disks)) { Q(blocks, disks) = pq_scribble_page; BUG_ON(len + offset > PAGE_SIZE); } do_sync_gen_syndrome(blocks, offset, disks, len, submit); return NULL; }
/** * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page * @offset: offset in pages to start transaction * @len: length in bytes * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, * @depend_tx: memcpy depends on the result of this transaction * @cb_fn: function to call when the memcpy completes * @cb_param: parameter to pass to the callback routine */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, unsigned int src_offset, size_t len, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; if (device) { dma_addr_t dma_dest, dma_src; unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); dma_src = dma_map_page(device->dev, src, src_offset, len, DMA_TO_DEVICE); tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, dma_prep_flags); } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ if (depend_tx) { /* if ack is already set then we cannot be sure * we are referring to the correct operation */ BUG_ON(depend_tx->ack); if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", __func__); } if (flags & ASYNC_TX_KMAP_DST) dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; else dest_buf = page_address(dest) + dest_offset; if (flags & ASYNC_TX_KMAP_SRC) src_buf = kmap_atomic(src, KM_USER0) + src_offset; else src_buf = page_address(src) + src_offset; memcpy(dest_buf, src_buf, len); if (flags & ASYNC_TX_KMAP_DST) kunmap_atomic(dest_buf, KM_USER0); if (flags & ASYNC_TX_KMAP_SRC) kunmap_atomic(src_buf, KM_USER0); async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); } return tx; }