/*
 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
 *
 * We set up our own source and destination PTEs that we fully control.
 * This is the only way to guarantee that we don't race with another
 * thread that is modifying the PTE; we can't afford to try the
 * copy_{to,from}_user() technique of catching the interrupt, since
 * we must run with interrupts disabled to avoid the risk of some
 * other code seeing the incoherent data in our cache.  (Recall that
 * our cache is indexed by PA, so even if the other code doesn't use
 * our kmap_atomic virtual addresses, they'll still hit in cache using
 * the normal VAs that aren't supposed to hit in cache.)
 */
static void memcpy_multicache(void *dest, const void *source,
			      pte_t dst_pte, pte_t src_pte, int len)
{
	int idx;
	unsigned long flags, newsrc, newdst;
	pmd_t *pmdp;
	pte_t *ptep;
	int type0, type1;
	int cpu = get_cpu();

	/*
	 * Disable interrupts so that we don't recurse into memcpy()
	 * in an interrupt handler, nor accidentally reference
	 * the PA of the source from an interrupt routine.  Also
	 * notify the simulator that we're playing games so we don't
	 * generate spurious coherency warnings.
	 */
	local_irq_save(flags);
	sim_allow_multiple_caching(1);

	/* Set up the new dest mapping */
	type0 = kmap_atomic_idx_push();
	idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
	newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
	pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
	ptep = pte_offset_kernel(pmdp, newdst);
	if (pte_val(*ptep) != pte_val(dst_pte)) {
		set_pte(ptep, dst_pte);
		local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
	}

	/* Set up the new source mapping */
	type1 = kmap_atomic_idx_push();
	idx += (type0 - type1);
	src_pte = hv_pte_set_nc(src_pte);
	src_pte = hv_pte_clear_writable(src_pte);  /* be paranoid */
	newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
	pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
	ptep = pte_offset_kernel(pmdp, newsrc);
	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);

	/* Actually move the data. */
	__memcpy_asm((void *)newdst, (const void *)newsrc, len);

	/*
	 * Remap the source as locally-cached and not OLOC'ed so that
	 * we can inval without also invaling the remote cpu's cache.
	 * This also avoids known errata with inv'ing cacheable oloc data.
	 */
	src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
	src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);

	/*
	 * Do the actual invalidation, covering the full L2 cache line
	 * at the end since __memcpy_asm() is somewhat aggressive.
	 */
	__inv_buffer((void *)newsrc, len);

	/*
	 * We're done: notify the simulator that all is back to normal,
	 * and re-enable interrupts and pre-emption.
	 */
	kmap_atomic_idx_pop();
	kmap_atomic_idx_pop();
	sim_allow_multiple_caching(0);
	local_irq_restore(flags);
	put_cpu();
}
static void memcpy_multicache(void *dest, const void *source,
			      pte_t dst_pte, pte_t src_pte, int len)
{
	int idx;
	unsigned long flags, newsrc, newdst;
	pmd_t *pmdp;
	pte_t *ptep;
	int type0, type1;
	int cpu = get_cpu();

	/*
                                                             
                                                       
                                                         
                                                             
                                         
  */
	local_irq_save(flags);
	sim_allow_multiple_caching(1);

	/*                             */
	type0 = kmap_atomic_idx_push();
	idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
	newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
	pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
	ptep = pte_offset_kernel(pmdp, newdst);
	if (pte_val(*ptep) != pte_val(dst_pte)) {
		set_pte(ptep, dst_pte);
		local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
	}

	/*                               */
	type1 = kmap_atomic_idx_push();
	idx += (type0 - type1);
	src_pte = hv_pte_set_nc(src_pte);
	src_pte = hv_pte_clear_writable(src_pte);  /*             */
	newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
	pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
	ptep = pte_offset_kernel(pmdp, newsrc);
	__set_pte(ptep, src_pte);   /*                                     */
	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);

	/*                         */
	__memcpy_asm((void *)newdst, (const void *)newsrc, len);

	/*
                                                              
                                                              
                                                                   
  */
	src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
	src_pte = hv_pte_set_writable(src_pte); /*                           */
	__set_pte(ptep, src_pte);   /*                                     */
	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);

	/*
                                                               
                                                           
  */
	__inv_buffer((void *)newsrc, len);

	/*
                                                                
                                             
  */
	kmap_atomic_idx_pop();
	kmap_atomic_idx_pop();
	sim_allow_multiple_caching(0);
	local_irq_restore(flags);
	put_cpu();
}