void thread_entry(int cid, int nc) { const int R = 8; int m, n, p; uint64_t s = 0xdeadbeefU; if (have_vec) { m = HCBM; n = HCBN; p = HCBK; } else { m = CBM; n = CBN; p = CBK; } t a[m*p]; t b[p*n]; t c[m*n]; for (size_t i = 0; i < m; i++) for (size_t j = 0; j < p; j++) a[i*p+j] = (t)(s = lfsr(s)); for (size_t i = 0; i < p; i++) for (size_t j = 0; j < n; j++) b[i*n+j] = (t)(s = lfsr(s)); memset(c, 0, m*n*sizeof(c[0])); size_t instret, cycles; if (have_vec) { for (int i = 0; i < R; i++) { instret = -rdinstret(); cycles = -rdcycle(); mm_rb_hwacha(m, n, p, a, p, b, n, c, n); instret += rdinstret(); cycles += rdcycle(); } } else { for (int i = 0; i < R; i++) { instret = -rdinstret(); cycles = -rdcycle(); mm(m, n, p, a, p, b, n, c, n); instret += rdinstret(); cycles += rdcycle(); } } asm volatile("fence"); printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n", cid, RBM, RBN, RBK, CBM, CBN, CBK); printf("C%d: %d instructions\n", cid, (int)(instret)); printf("C%d: %d cycles\n", cid, (int)(cycles)); printf("C%d: %d flops\n", cid, 2*m*n*p); printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles)); #if 1 for (size_t i = 0; i < m; i++) { for (size_t j = 0; j < n; j++) { t s = 0; for (size_t k = 0; k < p; k++) s += a[i*p+k] * b[k*n+j]; s *= R; if (fabs(c[i*n+j]-s) > fabs(1e-6*s)) { printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s); exit(1); } } } #endif barrier(nc); exit(0); }
static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top) { // copy phdrs to user stack size_t stack_top = current.stack_top - current.phdr_size; memcpy((void*)stack_top, (void*)current.phdr, current.phdr_size); current.phdr = stack_top; // copy argv to user stack for (size_t i = 0; i < argc; i++) { size_t len = strlen((char*)(uintptr_t)argv[i])+1; stack_top -= len; memcpy((void*)stack_top, (void*)(uintptr_t)argv[i], len); argv[i] = (void*)stack_top; } // copy envp to user stack const char* envp[] = { // environment goes here }; size_t envc = sizeof(envp) / sizeof(envp[0]); for (size_t i = 0; i < envc; i++) { size_t len = strlen(envp[i]) + 1; stack_top -= len; memcpy((void*)stack_top, envp[i], len); envp[i] = (void*)stack_top; } // align stack stack_top &= -sizeof(void*); struct { long key; long value; } aux[] = { {AT_ENTRY, current.entry}, {AT_PHNUM, current.phnum}, {AT_PHENT, current.phent}, {AT_PHDR, current.phdr}, {AT_PAGESZ, RISCV_PGSIZE}, {AT_SECURE, 0}, {AT_RANDOM, stack_top}, {AT_NULL, 0} }; // place argc, argv, envp, auxp on stack #define PUSH_ARG(type, value) do { \ *((type*)sp) = (type)value; \ sp += sizeof(type); \ } while (0) #define STACK_INIT(type) do { \ unsigned naux = sizeof(aux)/sizeof(aux[0]); \ stack_top -= (1 + argc + 1 + envc + 1 + 2*naux) * sizeof(type); \ stack_top &= -16; \ long sp = stack_top; \ PUSH_ARG(type, argc); \ for (unsigned i = 0; i < argc; i++) \ PUSH_ARG(type, argv[i]); \ PUSH_ARG(type, 0); /* argv[argc] = NULL */ \ for (unsigned i = 0; i < envc; i++) \ PUSH_ARG(type, envp[i]); \ PUSH_ARG(type, 0); /* envp[envc] = NULL */ \ for (unsigned i = 0; i < naux; i++) { \ PUSH_ARG(type, aux[i].key); \ PUSH_ARG(type, aux[i].value); \ } \ } while (0) STACK_INIT(uintptr_t); if (current.cycle0) { // start timer if so requested current.time0 = rdtime(); current.cycle0 = rdcycle(); current.instret0 = rdinstret(); } trapframe_t tf; init_tf(&tf, current.entry, stack_top); __clear_cache(0, 0); write_csr(sscratch, kstack_top); start_user(&tf); }