dma_chain_t * dma_initiate(void * dest, void * src, uint32 length, int type) { int irq = 0; dma_chain_t * chain = alloc_chain(); if (chain == NULL) return NULL; chain->dest = (uint32) dest; chain->src = (uint8 *) src; chain->count = length; chain->type = type; chain->waiting_thd = 0; if (type == DMA_TYPE_SPU || type == DMA_TYPE_BBA_RX) { if (type == DMA_TYPE_SPU && (0||nospudma)) { vid_border_color(0, 0, 255); spu_memload(chain->dest, chain->src, chain->count); vid_border_color(0, 0, 0); sem_signal(chain->sema); return chain; } else { //draw_lock(); vid_border_color(0, 0, 255); if (!irq_inside_int()) irq = irq_disable(); if (type == DMA_TYPE_BBA_RX) { /* if (spu_chain_head && spu_chain_head->type == DMA_TYPE_SPU) { */ /* if (!irq_inside_int()) */ /* irq_restore(irq); */ /* free_chain(chain); */ /* return NULL; */ /* } */ chain->waiting_thd = thd_current; } chain->next = spu_chain_head; spu_chain_head = chain; if (!spu_transfering) spu_cb(0); if (!irq_inside_int()) irq_restore(irq); return chain; } } else nospudma = 0; if (!irq_inside_int()) irq = irq_disable(); chain->next = chain_head; chain_head = chain; ta_render_done_cb = renderdone_cb; //vid_border_color(0, 0, 255); if (!irq_inside_int()) irq_restore(irq); return chain; }
void thread_iter(int dram_refs, int nvm_refs, int interleave_dram, int interleave_nvm) { long it_n; unsigned long time_dram, time_nvm, total_time_dram_ns, total_time_nvm_ns; uint64_t seed; uint64_t j; chain_t *C_dram[MAX_NUM_CHAINS]; chain_t *C_nvm[MAX_NUM_CHAINS]; int missing_dram_refs, missing_nvm_refs; int dram_stalls, nvm_stalls; struct timespec task_time_start, task_time_end; unsigned long task_time_diff_ns; #ifndef NDEBUG pid_t tid = (pid_t) syscall(SYS_gettid); #endif assert(NELEMS < UINT64_MAX); for (j=0; j < NCHAINS; j++) { seed = SEED_IN + j*j; C_dram[j] = alloc_chain(seed, NELEMS, 64LLU, 0, 0); C_nvm[j] = alloc_chain(seed, NELEMS, 64LLU, 0, 1); __asm__(""); } bind_cpu(thread_self()); // cache must be trashed after bind_cpu() call trash_cache(NELEMS); total_time_dram_ns = 0; total_time_nvm_ns = 0; missing_dram_refs = dram_refs; missing_nvm_refs = nvm_refs; #ifndef NDEBUG printf("DRAM accesses to be made: %ld\n", dram_refs); printf("NVM accesses to be made: %ld\n", nvm_refs); #endif //delay_cycles(8000000000); //printf("STARTING MEASURES\n"); clock_gettime(CLOCK_MONOTONIC, &task_time_start); for (it_n = 0; (missing_dram_refs > 0) || (missing_nvm_refs > 0); ++it_n) { __asm__(""); // calculate the number o memory accesses to be made on each memory type if (missing_dram_refs > interleave_dram) { missing_dram_refs -= interleave_dram; dram_stalls = interleave_dram; } else { dram_stalls = missing_dram_refs; missing_dram_refs = 0; } if (missing_nvm_refs > interleave_nvm) { missing_nvm_refs -= interleave_nvm; nvm_stalls = interleave_nvm; } else { nvm_stalls = missing_nvm_refs; missing_nvm_refs = 0; } time_dram = 0; time_nvm = 0; // do memory accesses interleaved by dividing the number of accesses in smaller amount // as configured by user force_ldm_stalls((chain_t **)&C_dram, 64LLU, 8, dram_stalls, NELEMS, it_n, &time_dram); force_ldm_stalls((chain_t **)&C_nvm, 64LLU, 8, nvm_stalls, NELEMS, it_n, &time_nvm); total_time_dram_ns += time_dram; total_time_nvm_ns += time_nvm; #ifndef NDEBUG printf("%ld DRAM accesses took: %ld ns\n", dram_stalls, time_dram); printf("%ld NVM accesses took: %ld ns\n", nvm_stalls, time_nvm); #endif } clock_gettime(CLOCK_MONOTONIC, &task_time_end); task_time_diff_ns = ((task_time_end.tv_sec * 1000000000) + task_time_end.tv_nsec) - ((task_time_start.tv_sec * 1000000000) + task_time_start.tv_nsec); // the memory latency is the total time divided by the number of accesses for each memory type if (dram_refs > 0) total_time_dram_ns /= dram_refs; else total_time_dram_ns = 0; if (nvm_refs > 0) total_time_nvm_ns /= nvm_refs; else total_time_nvm_ns = 0; printf("DRAM latency: %ld ns\n", total_time_dram_ns); printf("NVM latency: %ld ns\n", total_time_nvm_ns); printf("Measure time: %.3lf ms\n", (double)task_time_diff_ns/1000000.0); printf("Expected time: %.3ld ms\n", ((total_time_dram_ns * dram_refs) + (total_time_nvm_ns * nvm_refs)) / 1000000); for (j=0; j < NCHAINS; j++) { free(C_dram[j]); free(C_nvm[j]); } }