static inline int mspec_zero_block(unsigned long addr, int len) { int status; if (is_sn2) { if (is_shub2()) { int nid; void *p; int i; nid = nasid_to_cnodeid(get_node_number(__pa(addr))); p = (void *)TO_AMO(scratch_page[nid]); for (i=0; i < SH2_AMO_CACHE_ENTRIES; i++) { FETCHOP_LOAD_OP(p, FETCHOP_LOAD); p += FETCHOP_VAR_SIZE; } } status = bte_copy(0, addr & ~__IA64_UNCACHED_OFFSET, len, BTE_WACQUIRE | BTE_ZERO_FILL, NULL); } else { memset((char *) addr, 0, len); status = 0; } return status; }
/* * Transfer the bte_test_buffer from our node to the specified * destination and print out timing results. */ static void brt_time_xfer(int dest_node, int iterations, int xfer_lines) { int iteration; char *src, *dst; u64 xfer_len, src_phys, dst_phys; u64 itc_before, itc_after, mem_intvl, bte_intvl; xfer_len = xfer_lines * L1_CACHE_BYTES; src = nodepda->bte_if[0].bte_test_buf; src_phys = __pa(src); dst = NODEPDA(dest_node)->bte_if[1].bte_test_buf; dst_phys = __pa(dst); mem_intvl = 0; for (iteration = 0; iteration < iterations; iteration++) { if (tm_memcpy) { itc_before = ia64_get_itc(); memcpy(dst, src, xfer_len); itc_after = ia64_get_itc(); mem_intvl = itc_after - itc_before; } itc_before = ia64_get_itc(); bte_copy(src_phys, dst_phys, xfer_len, BTE_NOTIFY, NULL); itc_after = ia64_get_itc(); bte_intvl = itc_after - itc_before; if (tm_memcpy) { printk("%3d,%3d,%3d,%5d,%4ld,%7ld,%3ld," "%7ld,%7ld,%7ld\n", smp_processor_id(), NASID_GET(src), NASID_GET(dst), xfer_lines, NSEC(bte_setup_time), NSEC(bte_transfer_time), NSEC(bte_tear_down_time), NSEC(bte_execute_time), NSEC(bte_intvl), NSEC(mem_intvl)); } else { printk("%3d,%3d,%3d,%5d,%4ld,%7ld,%3ld," "%7ld,%7ld\n", smp_processor_id(), NASID_GET(src), NASID_GET(dst), xfer_lines, NSEC(bte_setup_time), NSEC(bte_transfer_time), NSEC(bte_tear_down_time), NSEC(bte_execute_time), NSEC(bte_intvl)); } } }
/* * Wrapper for bte_copy(). * * dst_pa - physical address of the destination of the transfer. * src_pa - physical address of the source of the transfer. * len - number of bytes to transfer from source to destination. * * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock. */ static enum xp_retval xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa, size_t len) { bte_result_t ret; ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); if (ret == BTE_SUCCESS) return xpSuccess; if (is_shub2()) { dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa=" "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); } else { dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx " "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); } return xpBteCopyError; }
/* * Locate a nasid which doesn't exist. Perform a bte_copy from that * node to our local node. */ static int brt_tst_invalid_xfers(void) { int i; int free_nasid = -1; int cpu; int error_cnt; u64 ret_code; if (ix_srcnasid != -1) { free_nasid = ix_srcnasid; } else { /* Only looking for nasids from C-Nodes. */ for (i = 0; i < PLAT_MAX_NODE_NUMBER; i += 2) { if (local_node_data->physical_node_map[i] == -1) { free_nasid = i; break; } } } if (free_nasid == -1) { printk("tst_invalid_xfers: No free nodes found. " "Exiting.\n"); return (0); } printk("tst_invalid_xfers: Using source nasid of %d\n", free_nasid); error_cnt = 0; for (i = 0; i < ix_iterations; i++) { if (verbose >= 1) { printk("-------------------------------" "-------------------------------" "--------------\n"); } if ((verbose >= 1) || !(i % 10)) { printk(" Loop %d\n", i); } for (cpu = 0; cpu < smp_num_cpus; cpu++) { set_cpus_allowed(current, (1UL << cpu)); if (verbose > 1) { printk("Testing with CPU %d\n", smp_processor_id()); } /* >>> Need a better means of calculating a * remote addr. */ ret_code = bte_copy(TO_NODE(free_nasid, 0), __pa(nodepda->bte_if[0]. bte_test_buf), 4 * L1_CACHE_BYTES, BTE_NOTIFY, NULL); error_cnt += (ret_code ? 1 : 0); } } ret_code = ((error_cnt != (ix_iterations * smp_num_cpus)) ? 1 : 0); return (ret_code); }
/* * One of these threads is started per cpu. Each thread is responsible * for loading that cpu's bte interface and then writing to the * test buffer. The transfers are set in a round-robin fashion. * The end result is that each test buffer is being written into * by the previous node and both cpu's at the same time as the * local bte is transferring it to the next node. */ static int brt_notify_thrd(void *__bind_cpu) { int bind_cpu = (long int)__bind_cpu; int cpu = cpu_logical_map(bind_cpu); nodepda_t *nxt_node; long tmout_itc_intvls; long tmout; long passes; long good_xfer_cnt; u64 src_phys, dst_phys; int i; volatile char *src_buf; u64 *notify; atomic_inc(&brt_thread_cnt); daemonize(); set_user_nice(current, 19); sigfillset(¤t->blocked); /* Migrate to the right CPU */ set_cpus_allowed(current, 1UL << cpu); /* Calculate the uSec timeout itc offset. */ tmout_itc_intvls = local_cpu_data->cyc_per_usec * hang_usec; if (local_cnodeid() == (numnodes - 1)) { nxt_node = NODEPDA(0); } else { nxt_node = NODEPDA(local_cnodeid() + 1); } src_buf = nodepda->bte_if[0].bte_test_buf; src_phys = __pa(src_buf); dst_phys = __pa(nxt_node->bte_if[0].bte_test_buf); notify = kmalloc(L1_CACHE_BYTES, GFP_KERNEL); ASSERT(!((u64) notify & L1_CACHE_MASK)); printk("BTE Hang %d xfer 0x%lx -> 0x%lx, Notify=0x%lx\n", smp_processor_id(), src_phys, dst_phys, (u64) notify); passes = 0; good_xfer_cnt = 0; /* Loop until signalled to exit. */ while (!brt_exit_flag) { /* * A hang will prevent further transfers. * NOTE: Sometimes, it appears like a hang occurred and * then transfers begin again. This just means that * there is NUMA congestion and the hang_usec param * should be increased. */ if (!(*notify & IBLS_BUSY)) { if ((bte_copy(src_phys, dst_phys, 4UL * L1_CACHE_BYTES, BTE_NOTIFY, (void *)notify)) != BTE_SUCCESS) { printk("<0>Cpu %d Could not " "allocate a bte.\n", smp_processor_id()); continue; } tmout = ia64_get_itc() + tmout_itc_intvls; while ((*notify & IBLS_BUSY) && (ia64_get_itc() < tmout)) { /* Push data out with the processor. */ for (i = 0; i < (4 * L1_CACHE_BYTES); i += L1_CACHE_BYTES) { src_buf[i] = (passes % 128); } }; if (*notify & IBLS_BUSY) { printk("<0>Cpu %d BTE appears to have " "hung.\n", smp_processor_id()); } else { good_xfer_cnt++; } } /* Every x passes, take a little break. */ if (!(++passes % 40)) { passes = 0; schedule_timeout(0.01 * HZ); } } kfree(notify); printk("Cpu %d had %ld good passes\n", smp_processor_id(), good_xfer_cnt); atomic_dec(&brt_thread_cnt); return (0); }