Exemplo n.º 1
0
static inline struct dma_chan *
pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, size_t len)
{
	#ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
	return NULL;
	#endif
	return async_tx_find_channel(submit, DMA_PQ_VAL, NULL, 0,  blocks,
				     disks, len);
}
Exemplo n.º 2
0
/**
 * async_memcpy - attempt to copy memory with a dma engine.
 * @dest: destination page
 * @src: src page
 * @dest_offset: offset into 'dest' to start transaction
 * @src_offset: offset into 'src' to start transaction
 * @len: length in bytes
 * @submit: submission / completion modifiers
 *
 * honored flags: ASYNC_TX_ACK
 */
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
	     unsigned int src_offset, size_t len,
	     struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
						      &dest, 1, &src, 1, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;

	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
		dma_addr_t dma_dest, dma_src;
		unsigned long dma_prep_flags = 0;

		if (submit->cb_fn)
			dma_prep_flags |= DMA_PREP_INTERRUPT;
		if (submit->flags & ASYNC_TX_FENCE)
			dma_prep_flags |= DMA_PREP_FENCE;
		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
					DMA_FROM_DEVICE);

		dma_src = dma_map_page(device->dev, src, src_offset, len,
				       DMA_TO_DEVICE);

		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
						    len, dma_prep_flags);
		if (!tx) {
			dma_unmap_page(device->dev, dma_dest, len,
				       DMA_FROM_DEVICE);
			dma_unmap_page(device->dev, dma_src, len,
				       DMA_TO_DEVICE);
		}
	}

	if (tx) {
		pr_debug("%s: (async) len: %zu\n", __func__, len);
		async_tx_submit(chan, tx, submit);
	} else {
		void *dest_buf, *src_buf;
		pr_debug("%s: (sync) len: %zu\n", __func__, len);

		/* wait for any prerequisite operations */
		async_tx_quiesce(&submit->depend_tx);

		dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
		src_buf = kmap_atomic(src, KM_USER1) + src_offset;

		memcpy(dest_buf, src_buf, len);

		kunmap_atomic(src_buf, KM_USER1);
		kunmap_atomic(dest_buf, KM_USER0);

		async_tx_sync_epilog(submit);
	}

	return tx;
}
static inline struct dma_chan *
xor_val_chan(struct async_submit_ctl *submit, struct page *dest,
		 struct page **src_list, int src_cnt, size_t len)
{
	#ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
	return NULL;
	#endif
	return async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list,
				     src_cnt, len);
}
Exemplo n.º 4
0
static struct dma_async_tx_descriptor *
async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
		  size_t len, struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
						      &dest, 1, srcs, 2, len);
	struct dma_device *dma = chan ? chan->device : NULL;
	const u8 *amul, *bmul;
	u8 ax, bx;
	u8 *a, *b, *c;

	if (dma) {
		dma_addr_t dma_dest[2];
		dma_addr_t dma_src[2];
		struct device *dev = dma->dev;
		struct dma_async_tx_descriptor *tx;
		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;

		if (submit->flags & ASYNC_TX_FENCE)
			dma_flags |= DMA_PREP_FENCE;
		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
					     len, dma_flags);
		if (tx) {
			async_tx_submit(chan, tx, submit);
			return tx;
		}

		/* could not get a descriptor, unmap and fall through to
		 * the synchronous path
		 */
		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
		dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
	}

	/* run the operation synchronously */
	async_tx_quiesce(&submit->depend_tx);
	amul = raid6_gfmul[coef[0]];
	bmul = raid6_gfmul[coef[1]];
	a = page_address(srcs[0]);
	b = page_address(srcs[1]);
	c = page_address(dest);

	while (len--) {
		ax    = amul[*a++];
		bx    = bmul[*b++];
		*c++ = ax ^ bx;
	}

	return NULL;
}
Exemplo n.º 5
0
/**
 * async_memset - attempt to fill memory with a dma engine.
 * @dest: destination page
 * @val: fill value
 * @offset: offset in pages to start transaction
 * @len: length in bytes
 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
 * @depend_tx: memset depends on the result of this transaction
 * @cb_fn: function to call when the memcpy completes
 * @cb_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset,
    size_t len, enum async_tx_flags flags,
    struct dma_async_tx_descriptor *depend_tx,
    dma_async_tx_callback cb_fn, void *cb_param)
{
    struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
                              &dest, 1, NULL, 0, len);
    struct dma_device *device = chan ? chan->device : NULL;
    struct dma_async_tx_descriptor *tx = NULL;

    if (device) {
        dma_addr_t dma_dest;
        unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;

        dma_dest = dma_map_page(device->dev, dest, offset, len,
                    DMA_FROM_DEVICE);

        tx = device->device_prep_dma_memset(chan, dma_dest, val, len,
                            dma_prep_flags);
    }

    if (tx) {
        pr_debug("%s: (async) len: %zu\n", __func__, len);
        async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
    } else { /* run the memset synchronously */
        void *dest_buf;
        pr_debug("%s: (sync) len: %zu\n", __func__, len);

        dest_buf = (void *) (((char *) page_address(dest)) + offset);

        /* wait for any prerequisite operations */
        if (depend_tx) {
            /* if ack is already set then we cannot be sure
             * we are referring to the correct operation
             */
            BUG_ON(depend_tx->ack);
            if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
                panic("%s: DMA_ERROR waiting for depend_tx\n",
                    __func__);
        }

        memset(dest_buf, val, len);

        async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
    }

    return tx;
}
Exemplo n.º 6
0
static struct dma_async_tx_descriptor *
async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
	   struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
						      &dest, 1, &src, 1, len);
	struct dma_device *dma = chan ? chan->device : NULL;
	const u8 *qmul; /* Q multiplier table */
	u8 *d, *s;

	if (dma) {
		dma_addr_t dma_dest[2];
		dma_addr_t dma_src[1];
		struct device *dev = dma->dev;
		struct dma_async_tx_descriptor *tx;
		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;

		if (submit->flags & ASYNC_TX_FENCE)
			dma_flags |= DMA_PREP_FENCE;
		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
					     len, dma_flags);
		if (tx) {
			async_tx_submit(chan, tx, submit);
			return tx;
		}

		/* could not get a descriptor, unmap and fall through to
		 * the synchronous path
		 */
		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
	}

	/* no channel available, or failed to allocate a descriptor, so
	 * perform the operation synchronously
	 */
	async_tx_quiesce(&submit->depend_tx);
	qmul  = raid6_gfmul[coef];
	d = page_address(dest);
	s = page_address(src);

	while (len--)
		*d++ = qmul[*s++];

	return NULL;
}
/**
 * async_gen_syndrome - asynchronously calculate a raid6 syndrome
 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
 * @offset: common offset into each block (src and dest) to start transaction
 * @disks: number of blocks (including missing P or Q, see below)
 * @len: length of operation in bytes
 * @submit: submission/completion modifiers
 *
 * General note: This routine assumes a field of GF(2^8) with a
 * primitive polynomial of 0x11d and a generator of {02}.
 *
 * 'disks' note: callers can optionally omit either P or Q (but not
 * both) from the calculation by setting blocks[disks-2] or
 * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
 * PAGE_SIZE as a temporary buffer of this size is used in the
 * synchronous path.  'disks' always accounts for both destination
 * buffers.  If any source buffers (blocks[i] where i < disks - 2) are
 * set to NULL those buffers will be replaced with the raid6_zero_page
 * in the synchronous path and omitted in the hardware-asynchronous
 * path.
 *
 * 'blocks' note: if submit->scribble is NULL then the contents of
 * 'blocks' may be overwritten to perform address conversions
 * (dma_map_page() or page_address()).
 */
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
		   size_t len, struct async_submit_ctl *submit)
{
	int src_cnt = disks - 2;
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
						      &P(blocks, disks), 2,
						      blocks, src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	dma_addr_t *dma_src = NULL;

	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));

	if (submit->scribble)
		dma_src = submit->scribble;
	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
		dma_src = (dma_addr_t *) blocks;

	if (dma_src && device &&
	    (src_cnt <= dma_maxpq(device, 0) ||
	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
	    is_dma_pq_aligned(device, offset, 0, len)) {
		/* run the p+q asynchronously */
		pr_debug("%s: (async) disks: %d len: %zu\n",
			 __func__, disks, len);
		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
					     disks, len, dma_src, submit);
	}

	/* run the pq synchronously */
	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);

	/* wait for any prerequisite operations */
	async_tx_quiesce(&submit->depend_tx);

	if (!P(blocks, disks)) {
		P(blocks, disks) = pq_scribble_page;
		BUG_ON(len + offset > PAGE_SIZE);
	}
	if (!Q(blocks, disks)) {
		Q(blocks, disks) = pq_scribble_page;
		BUG_ON(len + offset > PAGE_SIZE);
	}
	do_sync_gen_syndrome(blocks, offset, disks, len, submit);

	return NULL;
}
/**
 * async_xor - attempt to xor a set of blocks with a dma engine.
 * @dest: destination page
 * @src_list: array of source pages
 * @offset: common src/dst offset to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @submit: submission / completion modifiers
 *
 * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
 *
 * xor_blocks always uses the dest as a source so the
 * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
 * the calculation.  The assumption with dma eninges is that they only
 * use the destination buffer as a source when it is explicity specified
 * in the source list.
 *
 * src_list note: if the dest is also a source it must be at index zero.
 * The contents of this array will be overwritten if a scribble region
 * is not specified.
 */
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
	  int src_cnt, size_t len, struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
						      &dest, 1, src_list,
						      src_cnt, len);
	dma_addr_t *dma_src = NULL;

	BUG_ON(src_cnt <= 1);

	if (submit->scribble)
		dma_src = submit->scribble;
	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
		dma_src = (dma_addr_t *) src_list;

	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
		/* run the xor asynchronously */
		pr_debug("%s (async): len: %zu\n", __func__, len);

		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
				    dma_src, submit);
	} else {
		/* run the xor synchronously */
		pr_debug("%s (sync): len: %zu\n", __func__, len);
		WARN_ONCE(chan, "%s: no space for dma address conversion\n",
			  __func__);

		/* in the sync case the dest is an implied source
		 * (assumes the dest is the first source)
		 */
		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
			src_cnt--;
			src_list++;
		}

		/* wait for any prerequisite operations */
		async_tx_quiesce(&submit->depend_tx);

		do_sync_xor(dest, src_list, offset, src_cnt, len, submit);

		return NULL;
	}
}
Exemplo n.º 9
0
/**
 * async_pqxor - attempt to calculate RS-syndrome and XOR in parallel using
 *	a dma engine.
 * @pdest: destination page for P-parity (XOR)
 * @qdest: destination page for Q-parity (GF-XOR)
 * @src_list: array of source pages
 * @src_coef_list: array of source coefficients used in GF-multiplication
 * @offset: offset in pages to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_ASSUME_COHERENT,
 *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ASYNC_TX_ASYNC_ONLY
 * @depend_tx: depends on the result of this transaction.
 * @callback: function to call when the operation completes
 * @callback_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_pqxor(struct page *pdest, struct page *qdest,
	struct page **src_list, unsigned char *scoef_list,
	unsigned int offset, int src_cnt, size_t len, enum async_tx_flags flags,
	struct dma_async_tx_descriptor *depend_tx,
	dma_async_tx_callback callback, void *callback_param)
{
	struct page *dest[] = {pdest, qdest};
	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_PQ_XOR,
						      dest, 2, src_list,
						      src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;

	if (!device && (flags & ASYNC_TX_ASYNC_ONLY))
		return NULL;

	if (device) { /* run the xor asynchronously */
		tx = do_async_pqxor(device, chan, pdest, qdest, src_list,
			       scoef_list, offset, src_cnt, len, flags,
			       depend_tx, callback,callback_param);
	} else { /* run the pqxor synchronously */
		/* may do synchronous PQ only when both destinations exsists */
		if (!pdest || !qdest)
			return NULL;

		/* wait for any prerequisite operations */
		if (depend_tx) {
			/* if ack is already set then we cannot be sure
			 * we are referring to the correct operation
			 */
			BUG_ON(depend_tx->ack);
			if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
				panic("%s: DMA_ERROR waiting for depend_tx\n",
					__FUNCTION__);
		}

		do_sync_pqxor(pdest, qdest, src_list,
			offset,	src_cnt, len, flags, depend_tx,
			callback, callback_param);
	}

	return tx;
}
Exemplo n.º 10
0
struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset, size_t len,
             struct async_submit_ctl *submit)
{
    struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
                            &dest, 1, NULL, 0, len);
    struct dma_device *device = chan ? chan->device : NULL;
    struct dma_async_tx_descriptor *tx = NULL;

    if (device && is_dma_fill_aligned(device, offset, 0, len)) {
        dma_addr_t dma_dest;
        unsigned long dma_prep_flags = 0;

        if (submit->cb_fn)
            dma_prep_flags |= DMA_PREP_INTERRUPT;
        if (submit->flags & ASYNC_TX_FENCE)
            dma_prep_flags |= DMA_PREP_FENCE;
        dma_dest = dma_map_page(device->dev, dest, offset, len,
                                DMA_FROM_DEVICE);

        tx = device->device_prep_dma_memset(chan, dma_dest, val, len,
                                            dma_prep_flags);
    }

    if (tx) {
        pr_debug("%s: (async) len: %zu\n", __func__, len);
        async_tx_submit(chan, tx, submit);
    } else { /* run the memset synchronously */
        void *dest_buf;
        pr_debug("%s: (sync) len: %zu\n", __func__, len);

        dest_buf = page_address(dest) + offset;

        /* wait for any prerequisite operations */
        async_tx_quiesce(&submit->depend_tx);

        memset(dest_buf, val, len);

        async_tx_sync_epilog(submit);
    }

    return tx;
}
Exemplo n.º 11
0
/**
 * async_xor - attempt to xor a set of blocks with a dma engine.
 * @dest: destination page
 * @src_list: array of source pages
 * @offset: common src/dst offset to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @submit: submission / completion modifiers
 *
 * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
 *
 * xor_blocks always uses the dest as a source so the
 * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
 * the calculation.  The assumption with dma eninges is that they only
 * use the destination buffer as a source when it is explicity specified
 * in the source list.
 *
 * src_list note: if the dest is also a source it must be at index zero.
 * The contents of this array will be overwritten if a scribble region
 * is not specified.
 */
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
	  int src_cnt, size_t len, struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
						      &dest, 1, src_list,
						      src_cnt, len);
	dma_addr_t *dma_src = NULL;

	BUG_ON(src_cnt <= 1);

	if (submit->scribble)
		dma_src = submit->scribble;
	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
		dma_src = (dma_addr_t *) src_list;

	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
		
		pr_debug("%s (async): len: %zu\n", __func__, len);

		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
				    dma_src, submit);
	} else {
		
		pr_debug("%s (sync): len: %zu\n", __func__, len);
		WARN_ONCE(chan, "%s: no space for dma address conversion\n",
			  __func__);

		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
			src_cnt--;
			src_list++;
		}

		
		async_tx_quiesce(&submit->depend_tx);

		do_sync_xor(dest, src_list, offset, src_cnt, len, submit);

		return NULL;
	}
}
Exemplo n.º 12
0
/**
 * async_xor - attempt to xor a set of blocks with a dma engine.
 *	xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
 *	flag must be set to not include dest data in the calculation.  The
 *	assumption with dma eninges is that they only use the destination
 *	buffer as a source when it is explicity specified in the source list.
 * @dest: destination page
 * @src_list: array of source pages (if the dest is also a source it must be
 *	at index zero).  The contents of this array may be overwritten.
 * @offset: offset in pages to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
 *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
 * @depend_tx: xor depends on the result of this transaction.
 * @cb_fn: function to call when the xor completes
 * @cb_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
	int src_cnt, size_t len, enum async_tx_flags flags,
	struct dma_async_tx_descriptor *depend_tx,
	dma_async_tx_callback cb_fn, void *cb_param)
{
	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
						      &dest, 1, src_list,
						      src_cnt, len);
	BUG_ON(src_cnt <= 1);

	if (chan) {
		/* run the xor asynchronously */
		pr_debug("%s (async): len: %zu\n", __func__, len);

		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
				    flags, depend_tx, cb_fn, cb_param);
	} else {
		/* run the xor synchronously */
		pr_debug("%s (sync): len: %zu\n", __func__, len);

		/* in the sync case the dest is an implied source
		 * (assumes the dest is the first source)
		 */
		if (flags & ASYNC_TX_XOR_DROP_DST) {
			src_cnt--;
			src_list++;
		}

		/* wait for any prerequisite operations */
		async_tx_quiesce(&depend_tx);

		do_sync_xor(dest, src_list, offset, src_cnt, len,
			    flags, cb_fn, cb_param);

		return NULL;
	}
}
Exemplo n.º 13
0
/**
 * async_xor_zero_sum - attempt a xor parity check with a dma engine.
 * @dest: destination page used if the xor is performed synchronously
 * @src_list: array of source pages.  The dest page must be listed as a source
 * 	at index zero.  The contents of this array may be overwritten.
 * @offset: offset in pages to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @result: 0 if sum == 0 else non-zero
 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
 * @depend_tx: xor depends on the result of this transaction.
 * @cb_fn: function to call when the xor completes
 * @cb_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list,
	unsigned int offset, int src_cnt, size_t len,
	u32 *result, enum async_tx_flags flags,
	struct dma_async_tx_descriptor *depend_tx,
	dma_async_tx_callback cb_fn, void *cb_param)
{
	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
						      &dest, 1, src_list,
						      src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;

	BUG_ON(src_cnt <= 1);

	if (device && src_cnt <= device->max_xor) {
		dma_addr_t *dma_src = (dma_addr_t *) src_list;
		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
		int i;

		pr_debug("%s: (async) len: %zu\n", __func__, len);

		for (i = 0; i < src_cnt; i++)
			dma_src[i] = dma_map_page(device->dev, src_list[i],
						  offset, len, DMA_TO_DEVICE);

		tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
						      len, result,
						      dma_prep_flags);
		if (!tx) {
			if (depend_tx)
				dma_wait_for_async_tx(depend_tx);

			while (!tx)
				tx = device->device_prep_dma_zero_sum(chan,
					dma_src, src_cnt, len, result,
					dma_prep_flags);
		}

		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
	} else {
		unsigned long xor_flags = flags;

		pr_debug("%s: (sync) len: %zu\n", __func__, len);

		xor_flags |= ASYNC_TX_XOR_DROP_DST;
		xor_flags &= ~ASYNC_TX_ACK;

		tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
			depend_tx, NULL, NULL);

		if (tx) {
			if (dma_wait_for_async_tx(tx) == DMA_ERROR)
				panic("%s: DMA_ERROR waiting for tx\n",
					__func__);
			async_tx_ack(tx);
		}

		*result = page_is_zero(dest, offset, len) ? 0 : 1;

		tx = NULL;

		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
	}

	return tx;
}
Exemplo n.º 14
0
/**
 * async_xor - attempt to xor a set of blocks with a dma engine.
 *	xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
 *	flag must be set to not include dest data in the calculation.  The
 *	assumption with dma eninges is that they only use the destination
 *	buffer as a source when it is explicity specified in the source list.
 * @dest: destination page
 * @src_list: array of source pages (if the dest is also a source it must be
 *	at index zero).  The contents of this array may be overwritten.
 * @offset: offset in pages to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
 *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
 * @depend_tx: xor depends on the result of this transaction.
 * @cb_fn: function to call when the xor completes
 * @cb_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
	int src_cnt, size_t len, enum async_tx_flags flags,
	struct dma_async_tx_descriptor *depend_tx,
	dma_async_tx_callback cb_fn, void *cb_param)
{
	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
						      &dest, 1, src_list,
						      src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;
	dma_async_tx_callback _cb_fn;
	void *_cb_param;
	unsigned long local_flags;
	int xor_src_cnt;
	int i = 0, src_off = 0;

	BUG_ON(src_cnt <= 1);

	while (src_cnt) {
		local_flags = flags;
		if (device) { /* run the xor asynchronously */
			xor_src_cnt = min(src_cnt, device->max_xor);
			/* if we are submitting additional xors
			 * only set the callback on the last transaction
			 */
			if (src_cnt > xor_src_cnt) {
				local_flags &= ~ASYNC_TX_ACK;
				_cb_fn = NULL;
				_cb_param = NULL;
			} else {
				_cb_fn = cb_fn;
				_cb_param = cb_param;
			}

			tx = do_async_xor(device, chan, dest,
					  &src_list[src_off], offset,
					  xor_src_cnt, len, local_flags,
					  depend_tx, _cb_fn, _cb_param);
		} else { /* run the xor synchronously */
			/* in the sync case the dest is an implied source
			 * (assumes the dest is at the src_off index)
			 */
			if (flags & ASYNC_TX_XOR_DROP_DST) {
				src_cnt--;
				src_off++;
			}

			/* process up to 'MAX_XOR_BLOCKS' sources */
			xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);

			/* if we are submitting additional xors
			 * only set the callback on the last transaction
			 */
			if (src_cnt > xor_src_cnt) {
				local_flags &= ~ASYNC_TX_ACK;
				_cb_fn = NULL;
				_cb_param = NULL;
			} else {
				_cb_fn = cb_fn;
				_cb_param = cb_param;
			}

			/* wait for any prerequisite operations */
			if (depend_tx) {
				/* if ack is already set then we cannot be sure
				 * we are referring to the correct operation
				 */
				BUG_ON(async_tx_test_ack(depend_tx));
				if (dma_wait_for_async_tx(depend_tx) ==
					DMA_ERROR)
					panic("%s: DMA_ERROR waiting for "
						"depend_tx\n",
						__func__);
			}

			do_sync_xor(dest, &src_list[src_off], offset,
				xor_src_cnt, len, local_flags, depend_tx,
				_cb_fn, _cb_param);
		}

		/* the previous tx is hidden from the client,
		 * so ack it
		 */
		if (i && depend_tx)
			async_tx_ack(depend_tx);

		depend_tx = tx;

		if (src_cnt > xor_src_cnt) {
			/* drop completed sources */
			src_cnt -= xor_src_cnt;
			src_off += xor_src_cnt;

			/* unconditionally preserve the destination */
			flags &= ~ASYNC_TX_XOR_ZERO_DST;

			/* use the intermediate result a source, but remember
			 * it's dropped, because it's implied, in the sync case
			 */
			src_list[--src_off] = dest;
			src_cnt++;
			flags |= ASYNC_TX_XOR_DROP_DST;
		} else
			src_cnt = 0;
		i++;
	}

	return tx;
}
Exemplo n.º 15
0
/**
 * async_syndrome_val - asynchronously validate a raid6 syndrome
 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
 * @offset: common offset into each block (src and dest) to start transaction
 * @disks: number of blocks (including missing P or Q, see below)
 * @len: length of operation in bytes
 * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
 * @spare: temporary result buffer for the synchronous case
 * @submit: submission / completion modifiers
 *
 * The same notes from async_gen_syndrome apply to the 'blocks',
 * and 'disks' parameters of this routine.  The synchronous path
 * requires a temporary result buffer and submit->scribble to be
 * specified.
 */
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
		   size_t len, enum sum_check_flags *pqres, struct page *spare,
		   struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
						      NULL, 0,  blocks, disks,
						      len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx;
	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
	dma_addr_t *dma_src = NULL;

	BUG_ON(disks < 4);

	if (submit->scribble)
		dma_src = submit->scribble;
	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
		dma_src = (dma_addr_t *) blocks;

	if (dma_src && device && disks <= dma_maxpq(device, 0) &&
	    is_dma_pq_aligned(device, offset, 0, len)) {
		struct device *dev = device->dev;
		dma_addr_t *pq = &dma_src[disks-2];
		int i;

		pr_debug("%s: (async) disks: %d len: %zu\n",
			 __func__, disks, len);
		if (!P(blocks, disks))
			dma_flags |= DMA_PREP_PQ_DISABLE_P;
		if (!Q(blocks, disks))
			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
		if (submit->flags & ASYNC_TX_FENCE)
			dma_flags |= DMA_PREP_FENCE;
		for (i = 0; i < disks; i++)
			if (likely(blocks[i])) {
				BUG_ON(is_raid6_zero_block(blocks[i]));
				dma_src[i] = dma_map_page(dev, blocks[i],
							  offset, len,
							  DMA_TO_DEVICE);
			}

		for (;;) {
			tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
							    disks - 2,
							    raid6_gfexp,
							    len, pqres,
							    dma_flags);
			if (likely(tx))
				break;
			async_tx_quiesce(&submit->depend_tx);
			dma_async_issue_pending(chan);
		}
		async_tx_submit(chan, tx, submit);

		return tx;
	} else {
		struct page *p_src = P(blocks, disks);
		struct page *q_src = Q(blocks, disks);
		enum async_tx_flags flags_orig = submit->flags;
		dma_async_tx_callback cb_fn_orig = submit->cb_fn;
		void *scribble = submit->scribble;
		void *cb_param_orig = submit->cb_param;
		void *p, *q, *s;

		pr_debug("%s: (sync) disks: %d len: %zu\n",
			 __func__, disks, len);

		/* caller must provide a temporary result buffer and
		 * allow the input parameters to be preserved
		 */
		BUG_ON(!spare || !scribble);

		/* wait for any prerequisite operations */
		async_tx_quiesce(&submit->depend_tx);

		/* recompute p and/or q into the temporary buffer and then
		 * check to see the result matches the current value
		 */
		tx = NULL;
		*pqres = 0;
		if (p_src) {
			init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
					  NULL, NULL, scribble);
			tx = async_xor(spare, blocks, offset, disks-2, len, submit);
			async_tx_quiesce(&tx);
			p = page_address(p_src) + offset;
			s = page_address(spare) + offset;
			*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
		}

		if (q_src) {
			P(blocks, disks) = NULL;
			Q(blocks, disks) = spare;
			init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
			tx = async_gen_syndrome(blocks, offset, disks, len, submit);
			async_tx_quiesce(&tx);
			q = page_address(q_src) + offset;
			s = page_address(spare) + offset;
			*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
		}

		/* restore P, Q and submit */
		P(blocks, disks) = p_src;
		Q(blocks, disks) = q_src;

		submit->cb_fn = cb_fn_orig;
		submit->cb_param = cb_param_orig;
		submit->flags = flags_orig;
		async_tx_sync_epilog(submit);

		return NULL;
	}
}
Exemplo n.º 16
0
/**
 * async_xor_zero_sum - attempt a PQ parities check with a dma engine.
 * @pdest: P-parity destination to check
 * @qdest: Q-parity destination to check
 * @src_list: array of source pages; the 1st pointer is qdest, the 2nd - pdest.
 * @scoef_list: coefficients to use in GF-multiplications
 * @offset: offset in pages to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @presult: 0 if P parity is OK else non-zero
 * @qresult: 0 if Q parity is OK else non-zero
 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
 * @depend_tx: depends on the result of this transaction.
 * @callback: function to call when the xor completes
 * @callback_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_pqxor_zero_sum(struct page *pdest, struct page *qdest,
	struct page **src_list, unsigned char *scf,
	unsigned int offset, int src_cnt, size_t len,
	u32 *presult, u32 *qresult, enum async_tx_flags flags,
	struct dma_async_tx_descriptor *depend_tx,
	dma_async_tx_callback cb_fn, void *cb_param)
{
	struct dma_chan *chan = async_tx_find_channel(depend_tx,
						      DMA_PQ_ZERO_SUM,
						      src_list, 2, &src_list[2],
						      src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;

	BUG_ON(src_cnt <= 1);
	BUG_ON(!qdest || qdest != src_list[0] || pdest != src_list[1]);

	if (device) {
		dma_addr_t *dma_src = (dma_addr_t *)src_list;
		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
		int i;

		for (i = 0; i < src_cnt; i++)
			dma_src[i] = dma_map_page(device->dev, src_list[i],
						  offset, len, DMA_TO_DEVICE);

		tx = device->device_prep_dma_pqzero_sum(chan, dma_src, src_cnt,
						      scf, len,
						      presult, qresult,
						      dma_prep_flags);

		if (!tx) {
			if (depend_tx)
				dma_wait_for_async_tx(depend_tx);

			while (!tx)
				tx = device->device_prep_dma_pqzero_sum(chan,
						dma_src, src_cnt, scf, len,
						presult, qresult,
						dma_prep_flags);
		}

		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
	} else {
		unsigned long lflags = flags;

		/* TBD: support for lengths size of more than PAGE_SIZE */

		lflags &= ~ASYNC_TX_ACK;
		spin_lock(&spare_lock);
		do_sync_pqxor(spare_pages[0], spare_pages[1],
			&src_list[2], offset,
			src_cnt - 2, len, lflags,
			depend_tx, NULL, NULL);

		if (presult && pdest)
			*presult = memcmp(page_address(pdest),
					   page_address(spare_pages[0]),
					   len) == 0 ? 0 : 1;
		if (qresult && qdest)
			*qresult = memcmp(page_address(qdest),
					   page_address(spare_pages[1]),
					   len) == 0 ? 0 : 1;
		spin_unlock(&spare_lock);
	}

	return tx;
}
/**
 * async_memcpy - attempt to copy memory with a dma engine.
 * @dest: destination page
 * @src: src page
 * @dest_offset: offset into 'dest' to start transaction
 * @src_offset: offset into 'src' to start transaction
 * @len: length in bytes
 * @submit: submission / completion modifiers
 *
 * honored flags: ASYNC_TX_ACK
 */
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
	     unsigned int src_offset, size_t len,
	     struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
						      &dest, 1, &src, 1, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;
	struct dmaengine_unmap_data *unmap = NULL;

	if (device)
		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);

	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
		unsigned long dma_prep_flags = 0;

		if (submit->cb_fn)
			dma_prep_flags |= DMA_PREP_INTERRUPT;
		if (submit->flags & ASYNC_TX_FENCE)
			dma_prep_flags |= DMA_PREP_FENCE;

		unmap->to_cnt = 1;
		unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
					      DMA_TO_DEVICE);
		unmap->from_cnt = 1;
		unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
					      DMA_FROM_DEVICE);
		unmap->len = len;

		tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
						    unmap->addr[0], len,
						    dma_prep_flags);
	}

	if (tx) {
		pr_debug("%s: (async) len: %zu\n", __func__, len);

		dma_set_unmap(tx, unmap);
		async_tx_submit(chan, tx, submit);
	} else {
		void *dest_buf, *src_buf;
		pr_debug("%s: (sync) len: %zu\n", __func__, len);

		/* wait for any prerequisite operations */
		async_tx_quiesce(&submit->depend_tx);

		dest_buf = kmap_atomic(dest) + dest_offset;
		src_buf = kmap_atomic(src) + src_offset;

		memcpy(dest_buf, src_buf, len);

		kunmap_atomic(src_buf);
		kunmap_atomic(dest_buf);

		async_tx_sync_epilog(submit);
	}

	dmaengine_unmap_put(unmap);

	return tx;
}
/**
 * async_xor - attempt to xor a set of blocks with a dma engine.
 * @dest: destination page
 * @src_list: array of source pages
 * @offset: common src/dst offset to start transaction
 * @src_cnt: number of source pages
 * @len: length in bytes
 * @submit: submission / completion modifiers
 *
 * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
 *
 * xor_blocks always uses the dest as a source so the
 * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
 * the calculation.  The assumption with dma eninges is that they only
 * use the destination buffer as a source when it is explicity specified
 * in the source list.
 *
 * src_list note: if the dest is also a source it must be at index zero.
 * The contents of this array will be overwritten if a scribble region
 * is not specified.
 */
struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
	  int src_cnt, size_t len, struct async_submit_ctl *submit)
{
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
						      &dest, 1, src_list,
						      src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dmaengine_unmap_data *unmap = NULL;

	BUG_ON(src_cnt <= 1);

	if (device)
		unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT);

	if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
		struct dma_async_tx_descriptor *tx;
		int i, j;

		/* run the xor asynchronously */
		pr_debug("%s (async): len: %zu\n", __func__, len);

		unmap->len = len;
		for (i = 0, j = 0; i < src_cnt; i++) {
			if (!src_list[i])
				continue;
			unmap->to_cnt++;
			unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
							offset, len, DMA_TO_DEVICE);
		}

		/* map it bidirectional as it may be re-used as a source */
		unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
					      DMA_BIDIRECTIONAL);
		unmap->bidi_cnt = 1;

		tx = do_async_xor(chan, unmap, submit);
		dmaengine_unmap_put(unmap);
		return tx;
	} else {
		dmaengine_unmap_put(unmap);
		/* run the xor synchronously */
		pr_debug("%s (sync): len: %zu\n", __func__, len);
		WARN_ONCE(chan, "%s: no space for dma address conversion\n",
			  __func__);

		/* in the sync case the dest is an implied source
		 * (assumes the dest is the first source)
		 */
		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
			src_cnt--;
			src_list++;
		}

		/* wait for any prerequisite operations */
		async_tx_quiesce(&submit->depend_tx);

		do_sync_xor(dest, src_list, offset, src_cnt, len, submit);

		return NULL;
	}
}
Exemplo n.º 19
0
/**
 * async_gen_syndrome - asynchronously calculate a raid6 syndrome
 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
 * @offset: common offset into each block (src and dest) to start transaction
 * @disks: number of blocks (including missing P or Q, see below)
 * @len: length of operation in bytes
 * @submit: submission/completion modifiers
 *
 * General note: This routine assumes a field of GF(2^8) with a
 * primitive polynomial of 0x11d and a generator of {02}.
 *
 * 'disks' note: callers can optionally omit either P or Q (but not
 * both) from the calculation by setting blocks[disks-2] or
 * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
 * PAGE_SIZE as a temporary buffer of this size is used in the
 * synchronous path.  'disks' always accounts for both destination
 * buffers.  If any source buffers (blocks[i] where i < disks - 2) are
 * set to NULL those buffers will be replaced with the raid6_zero_page
 * in the synchronous path and omitted in the hardware-asynchronous
 * path.
 */
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
		   size_t len, struct async_submit_ctl *submit)
{
	int src_cnt = disks - 2;
	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
						      &P(blocks, disks), 2,
						      blocks, src_cnt, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dmaengine_unmap_data *unmap = NULL;

	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));

	if (device)
		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);

	/* XORing P/Q is only implemented in software */
	if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
	    (src_cnt <= dma_maxpq(device, 0) ||
	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
	    is_dma_pq_aligned(device, offset, 0, len)) {
		struct dma_async_tx_descriptor *tx;
		enum dma_ctrl_flags dma_flags = 0;
		unsigned char coefs[src_cnt];
		int i, j;

		/* run the p+q asynchronously */
		pr_debug("%s: (async) disks: %d len: %zu\n",
			 __func__, disks, len);

		/* convert source addresses being careful to collapse 'empty'
		 * sources and update the coefficients accordingly
		 */
		unmap->len = len;
		for (i = 0, j = 0; i < src_cnt; i++) {
			if (blocks[i] == NULL)
				continue;
			unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
						      len, DMA_TO_DEVICE);
			coefs[j] = raid6_gfexp[i];
			unmap->to_cnt++;
			j++;
		}

		/*
		 * DMAs use destinations as sources,
		 * so use BIDIRECTIONAL mapping
		 */
		unmap->bidi_cnt++;
		if (P(blocks, disks))
			unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
							offset, len, DMA_BIDIRECTIONAL);
		else {
			unmap->addr[j++] = 0;
			dma_flags |= DMA_PREP_PQ_DISABLE_P;
		}

		unmap->bidi_cnt++;
		if (Q(blocks, disks))
			unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
						       offset, len, DMA_BIDIRECTIONAL);
		else {
			unmap->addr[j++] = 0;
			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
		}

		tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
		dmaengine_unmap_put(unmap);
		return tx;
	}

	dmaengine_unmap_put(unmap);

	/* run the pq synchronously */
	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);

	/* wait for any prerequisite operations */
	async_tx_quiesce(&submit->depend_tx);

	if (!P(blocks, disks)) {
		P(blocks, disks) = pq_scribble_page;
		BUG_ON(len + offset > PAGE_SIZE);
	}
	if (!Q(blocks, disks)) {
		Q(blocks, disks) = pq_scribble_page;
		BUG_ON(len + offset > PAGE_SIZE);
	}
	do_sync_gen_syndrome(blocks, offset, disks, len, submit);

	return NULL;
}
Exemplo n.º 20
0
/**
 * async_memcpy - attempt to copy memory with a dma engine.
 * @dest: destination page
 * @src: src page
 * @offset: offset in pages to start transaction
 * @len: length in bytes
 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
 * @depend_tx: memcpy depends on the result of this transaction
 * @cb_fn: function to call when the memcpy completes
 * @cb_param: parameter to pass to the callback routine
 */
struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
	unsigned int src_offset, size_t len, enum async_tx_flags flags,
	struct dma_async_tx_descriptor *depend_tx,
	dma_async_tx_callback cb_fn, void *cb_param)
{
	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
						      &dest, 1, &src, 1, len);
	struct dma_device *device = chan ? chan->device : NULL;
	struct dma_async_tx_descriptor *tx = NULL;

	if (device) {
		dma_addr_t dma_dest, dma_src;
		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;

		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
					DMA_FROM_DEVICE);

		dma_src = dma_map_page(device->dev, src, src_offset, len,
				       DMA_TO_DEVICE);

		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
						    len, dma_prep_flags);
	}

	if (tx) {
		pr_debug("%s: (async) len: %zu\n", __func__, len);
		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
	} else {
		void *dest_buf, *src_buf;
		pr_debug("%s: (sync) len: %zu\n", __func__, len);

		/* wait for any prerequisite operations */
		if (depend_tx) {
			/* if ack is already set then we cannot be sure
			 * we are referring to the correct operation
			 */
			BUG_ON(depend_tx->ack);
			if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
				panic("%s: DMA_ERROR waiting for depend_tx\n",
					__func__);
		}

		if (flags & ASYNC_TX_KMAP_DST)
			dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
		else
			dest_buf = page_address(dest) + dest_offset;

		if (flags & ASYNC_TX_KMAP_SRC)
			src_buf = kmap_atomic(src, KM_USER0) + src_offset;
		else
			src_buf = page_address(src) + src_offset;

		memcpy(dest_buf, src_buf, len);

		if (flags & ASYNC_TX_KMAP_DST)
			kunmap_atomic(dest_buf, KM_USER0);

		if (flags & ASYNC_TX_KMAP_SRC)
			kunmap_atomic(src_buf, KM_USER0);

		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
	}

	return tx;
}