/** ========================================================================= * Soft reset * **/ void chip_soft_reset(void) { unsigned reg_value; read_sswitch_reg(get_core_id(), 6, ®_value); write_sswitch_reg(0, 6, reg_value); write_sswitch_reg(get_core_id(), 6, reg_value); }
///////////////////////////////////////////////////////// // main testing function ///////////////////////////////////////////////////////// int main(int argc, const char * const argv[]) { (void)argc; (void)argv; boolean_T pass; int coreid, k; float filt[200]; float tmp[2]; ///////////////////////////////////////////////////////// // main test loop // each core loops over a kernel instance ///////////////////////////////////////////////////////// coreid = get_core_id(); printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid); if (coreid>3) coreid=coreid-4; synch_barrier(); perf_begin(); for(k = 0; k < getKernelIts(); k++) { // matlab kernel mlButter(fv1, *(float (*)[200])&fv0[200 * coreid], filt); } synch_barrier(); perf_end(); ///////////////////////////////////////////////////////// // check results ///////////////////////////////////////////////////////// synch_barrier(); tmp[0] = sum(filt); tmp[1] = var(filt); pass = checkRes(tmp, *(float (*)[4])&fv2[coreid << 2]); flagPassFail(pass, get_core_id()); ///////////////////////////////////////////////////////// // synchronize and exit ///////////////////////////////////////////////////////// return !pass; }
/** * Prepare one shrink image IPC message. */ void register_shrink_on_core(const uint8_T *Img, const uint32_T SubArea[4], const uint32_t yStart, const uint32_t yEnd, const uint32_t yEvenOdd, const uint32_t yHeight, const uint32_t xWidth, const uint32_t xWidthSmall, uint8_T *ImgSmall, const uint32_T CoreNo) { #ifdef _NO_IPC_TEST_ shrinkImage_on_core(Img, SubArea, yStart, yEnd, yEvenOdd, yHeight, xWidth, xWidthSmall, ImgSmall); #else //To see which entries have to be placed to which parameter look at the union declarations in struct processing_info. //Very ugly, I know. But this software is only a scientific proof of concept and it's near its end, hence this kind of code can be written now during the last days ... process_message_t * p_msg = 0; p_msg = p_queue_msg[CoreNo]; p_msg->core_id = get_core_id(CoreNo); p_msg->info.processing_type = pt_shrink; memcpy(p_msg->info.Shr_SubArea, SubArea, sizeof(uint32_T) * 4); p_msg->info.Shr_Img=Img; p_msg->info.Shr_ImgSmall=ImgSmall; p_msg->info.Shr_yStart=yStart; p_msg->info.Shr_yEnd=yEnd; p_msg->info.Shr_yEvenOdd=yEvenOdd; p_msg->info.Shr_yHeight=yHeight; p_msg->info.Shr_xWidth=xWidth; p_msg->info.Shr_xWidthSmall=xWidthSmall; #endif }
/** * tries to determine the physical package, a cpu belongs to */ int get_pkg(int cpu) { int pkg=-1; char buffer[10]; if (cpu == -1) { cpu = get_cpu(); } if (cpu != -1) { sprintf(path, "/sys/devices/system/cpu/cpu%i/topology/physical_package_id", cpu); if( read_file(path, buffer, sizeof(buffer)) ) pkg = atoi(buffer); /* fallbacks if sysfs is not working */ if (pkg == -1) { /* assume 0 if there is only one CPU or only one package */ if ((num_cpus() == 1) || (num_packages() == 1)) { pkg = 0; } /* get the physical package id from /proc/cpuinfo */ else if(!get_proc_cpuinfo_data("physical id", buffer, cpu)) { pkg = atoi(buffer); } /* if the number of cpus equals the number of packages assume pkg_id = cpu_id*/ else if (num_cpus() == num_packages()) { pkg = cpu; } /* if there is only one core per package assume pkg_id = core_id */ else if (num_cores_per_package() == 1) { pkg = get_core_id(cpu); } /* if the number of packages equals the number of numa nodes assume pkg_id = numa node */ else if (num_numa_nodes() == num_packages()) { pkg = get_numa_node(cpu); } /* NOTE pkg_id in UMA Systems with multiple sockets and more than 1 Core per socket can't be determined without correct topology information in sysfs*/ } } return pkg; }
void mp_barrier(cycles_t *measurement) { coreid_t tid = get_core_id(); #ifdef QRM_DBG_ENABLED ++_num_barrier; uint32_t _num_barrier_recv = _num_barrier; #endif debug_printfff(DBG__REDUCE, "barrier enter #%d\n", _num_barrier); // Recution // -------------------------------------------------- #ifdef QRM_DBG_ENABLED uint32_t _tmp = #endif mp_reduce(_num_barrier); #ifdef QRM_DBG_ENABLED // Sanity check if (tid==get_sequentializer()) { assert (_tmp == get_num_threads()*_num_barrier); } if (measurement) *measurement = bench_tsc(); #endif // Broadcast // -------------------------------------------------- if (tid == get_sequentializer()) { mp_send_ab(_num_barrier); } else { #ifdef QRM_DBG_ENABLED _num_barrier_recv = #endif mp_receive_forward(0); } #ifdef QRM_DBG_ENABLED if (_num_barrier_recv != _num_barrier) { debug_printf("ASSERTION fail %d != %d\n", _num_barrier_recv, _num_barrier); } assert (_num_barrier_recv == _num_barrier); // Add a shared memory barrier to absolutely make sure that // everybody finished the barrier before leaving - this simplifies // debugging, as the programm will get stuck if barriers are // broken, rather than some threads (wrongly) continuing and // causing problems somewhere else #if 0 // Enable separately debug_printfff(DBG_REDUCE, "finished barrier .. waiting for others\n"); shl_barrier_shm(get_num_threads()); #endif #endif debug_printfff(DBG__REDUCE, "barrier complete #%d\n", _num_barrier); }
int main() { if(get_core_id() == 0) { run_suite(testcases); } return 0; }
void __init smp_init_cpus(void) { unsigned i; unsigned int ncpus = get_core_count(); unsigned int core_id = get_core_id(); pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); }
int main() { int coreid, i, error = 0; coreid = get_core_id(); // set start value of jrand function next = 1; if (coreid == 0) { int f=0; initialize_aes(); // 1 iterations of enc+dec for (f=0;f<1;f++){ compute_aes(); //check output for (i = 0; i < 16; i++){ if (encoutbuf[i] != check_encoutbuf[i]) { error+=1; /* printf("Error occured in encryption\n",0,0,0,0); */ //printf("encrypted: %d, expected: %d\n",encoutbuf[i],check_encoutbuf[i],0,0); } if (decoutbuf[i] != check_decoutbuf[i]) { error+=1; /* printf("Error occured in decryption\n",0,0,0,0); */ //printf("decrypted: %d, expected: %d\n",decoutbuf[i],check_decoutbuf[i],0,0); } } } int *DEFAULT_RESULT; if (error == 0) { //printf ("OOOOOOK!!!!!!\n",0,0,0,0); DEFAULT_RESULT = (int*)0x10003ffc; *(DEFAULT_RESULT) = 1; } else { //printf ("Not OK!! %d\n",error,0,0,0); DEFAULT_RESULT = (int*)0x10003ffc; *(DEFAULT_RESULT) = error; } } synch_barrier(); eoc(0); }
void __init smp_init_cpus(void) { unsigned i; unsigned int ncpus = get_core_count(); unsigned int core_id = get_core_id(); pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); if (ncpus > NR_CPUS) { ncpus = NR_CPUS; pr_info("%s: limiting core count by %d\n", __func__, ncpus); } for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); }
/** * Brief: Adds task to a chosen core and if the core isn't running, * resets it's thread to start the execution. * Param: The task to be executed. */ std::size_t add_task(TASK task) { std::size_t core_id = get_core_id(); // Get a suitable core. std::size_t task_id = get_task_id(); // Find the smallest id. task_pack<TASK> tmp_pack; tmp_pack.id = task_id; tmp_pack.task = std::move(task); set_result(task_id, T(), false); // Create a record for this task. cores_[core_id].add_task(std::move(tmp_pack)); if(!cores_[core_id].running) { // Re-start the core thread if it's not running. cores_[core_id].running = true; core_threads_[core_id].reset( new std::thread(&Scheduler<T, TASK>::run_core, this, core_id) ); core_threads_[core_id]->detach(); } return task_id; }
std::size_t add_task(TASK task) { std::size_t core_id = get_core_id(); // Get a suitable core. std::size_t task_id = get_task_id(); // Find the smallest id. task_pack<TASK> tmp_pack; tmp_pack.id = task_id; tmp_pack.task = std::move(task); set_result(task_id, 0, false); // Create a record for this task. cores_[core_id].add_task(std::move(tmp_pack)); if(!cores_[core_id].running) { // If the core is available, execute the task immedietly. // The mutex will be unlocked at the end of the run_core method. cores_[core_id].running = true; core_threads_[core_id].reset( new std::thread(&Scheduler<void, TASK>::run_core, this, core_id) ); core_threads_[core_id]->detach(); } return task_id; }
/** * Cachec invalidate zeroized memory on all cores (happens between the calculations / after a calculation result and at boot time) */ void cacheinval_on_core(const uint8_T *TBuf, const uint32_T TSize, const uint32_T number_of_cores) { #ifndef _NO_IPC_TEST_ int32_t i=0; //To see which entries have to be placed to which parameter look at the union declarations in struct processing_info. //Very ugly, I know. But this software is only a scientific proof of concept and it's near its end, hence this kind of code can be written now during the last days ... process_message_t * p_msg = 0; for (i = CORE_AMOUNT-1; i >= (int)(CORE_AMOUNT-number_of_cores); i-- ) { p_msg = p_queue_msg[i]; p_msg->core_id = get_core_id(i); p_msg->info.processing_type = pt_cacheinval; p_msg->info.Tvec = TBuf; p_msg->info.Tsize = TSize; } send_to_cores(pt_cacheinval, number_of_cores, NULL, NULL, NULL); #endif }
/** * Prepare one ssd() and/or jacobian() IPC message. */ void prepare_ipc_message(const processing_type_e ProcessingType, const real32_T w[3], const uint32_T BoundBox[4], const uint32_T MarginAddon[3], const real32_T DSPRange[4], const emxArray_uint8_T *Tvec, const uint32_T TOffset, const emxArray_uint8_T *Rvec, const uint32_T ROffset, const uint32_T d, const uint32_T CoreNo, const uint32_T i_from, const uint32_T i_to) { process_message_t * p_msg = 0; p_msg = p_queue_msg[CoreNo]; p_msg->core_id = get_core_id(CoreNo); p_msg->info.processing_type = ProcessingType; memcpy(p_msg->info.w, w, sizeof(real32_T) * 3); memcpy(p_msg->info.BoundBox, BoundBox, sizeof(uint32_T) * 4); memcpy(p_msg->info.MarginAddon, MarginAddon, sizeof(uint32_T) * 3); memcpy(p_msg->info.DSPRange, DSPRange, sizeof(real32_T) * 4); p_msg->info.Tvec = &Tvec->data[0]; p_msg->info.Tsize = Tvec->allocatedSize; p_msg->info.TOffset = TOffset; p_msg->info.Rvec = &Rvec->data[0]; p_msg->info.Rsize = Rvec->allocatedSize; p_msg->info.ROffset = ROffset; p_msg->info.d = d; p_msg->info.i_from = i_from; p_msg->info.i_to = i_to; p_msg->info.NewImageDataArrived = g_NewImageDataArrived; }
///////////////////////////////////////////////////////// // main testing function ///////////////////////////////////////////////////////// int main(int argc, const char * const argv[]) { (void)argc; (void)argv; int coreid; int it; int k; boolean_T pass, flag; float y[100]; int ix; float b_y; float xbar; float r; float c_y; float tmp[2]; float golden[4]; ///////////////////////////////////////////////////////// // main test loop // each core loops over a kernel instance ///////////////////////////////////////////////////////// coreid = get_core_id(); printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid); if (coreid>3) coreid=coreid-4; synch_barrier(); perf_begin(); for(it = 0; it < KERNEL_ITS; it++) { // matlab kernel for (ix = 0; ix < 100; ix++) { y[ix] = (real32_T)fLog(fv0[ix + 100 * coreid]); } } synch_barrier(); perf_end(); synch_barrier(); ///////////////////////////////////////////////////////// // check results ///////////////////////////////////////////////////////// pass = true; b_y = y[0]; ix = 0; xbar = y[0]; for (k = 0; k < 99; k++) { b_y += y[k + 1]; ix++; xbar += y[ix]; } xbar *= 1.0F/100.0F; ix = 0; r = y[0] - xbar; c_y = r * r; for (k = 0; k < 99; k++) { ix++; r = y[ix] - xbar; c_y += r * r; } c_y *= 1.0F/99.0F; tmp[0] = b_y; tmp[1] = c_y; pass = true; for (ix = 0; ix < 2; ix++) { for (k = 0; k < 2; k++) { golden[k + (ix << 1)] = fv1[(k + (ix << 1)) + (coreid << 2)]; } flag = true; flag = flag && (tmp[ix] <= golden[ix << 1]); flag = flag && (tmp[ix] >= golden[1 + (ix << 1)]); printErrors(!flag, ix, tmp[ix] ,golden[(ix << 1)] ,golden[1 + (ix << 1)]); pass = pass && flag; } flagPassFail(pass, get_core_id()); synch_barrier(); ///////////////////////////////////////////////////////// // synchronize and exit ///////////////////////////////////////////////////////// return !pass; }
#include "memTester.h" sl_def(memTester,, sl_shparm(sl_place_t, syscall_gateway)) { sl_place_t syscall_gateway = sl_getp(syscall_gateway); syscall_target(&syscall_gateway); sl_index(i); unsigned pid = get_current_place(); unsigned core_id = get_core_id(); output_string("MemTester (thread ", 2); output_uint((unsigned int)i, 2); output_string(") now running on core ", 2); output_uint(core_id, 2); output_string(", place_id ", 2); output_hex(pid, 2); output_char('\n', 2); output_char('\n', 2); output_char('\n', 2); run(0); //Let the compiler know we care sl_setp(syscall_gateway, sl_getp(syscall_gateway)); } sl_enddef // Space reserved for testing small pages (4KiB): // 0x440000 - 0x550000 inclusive // Smallest page = 4KB = 0x440000 : 0x440FFF //
///////////////////////////////////////////////////////// // main testing function ///////////////////////////////////////////////////////// int main(int argc, const char * const argv[]) { (void)argc; (void)argv; boolean_T pass, flag; int coreid; float omega, ampl, runningPhase; float sig[200]; int k; int i; float y; int ix; float xbar; float r; float b_y; float tmp[2]; float golden[4]; boolean_T c_y; boolean_T exitg1; ///////////////////////////////////////////////////////// // main test loop // each core loops over a kernel instance ///////////////////////////////////////////////////////// coreid = get_core_id(); printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid); if (coreid>3) coreid=coreid-4; synch_barrier(); perf_begin(); omega = fv1[coreid]; ampl = fv2[coreid]; for(k = 0; k < getKernelIts(); k++) { runningPhase = omega; // matlab kernel for (i = 0; i < 200; i++) { sig[i] = ampl * fSin(runningPhase); runningPhase += omega; if(runningPhase > pi2[0]) { runningPhase -= pi2[0]; } } } synch_barrier(); perf_end(); ///////////////////////////////////////////////////////// // check results ///////////////////////////////////////////////////////// synch_barrier(); y = sig[0]; ix = 0; xbar = sig[0]; for (k = 0; k < 199; k++) { y += sig[k + 1]; ix++; xbar += sig[ix]; } xbar = fDiv(xbar,200.0F); ix = 0; r = sig[0] - xbar; b_y = r * r; for (k = 0; k < 199; k++) { ix++; r = sig[ix] - xbar; b_y += r * r; } b_y = fDiv(b_y,199.0F); tmp[0] = y; tmp[1] = b_y; pass = true; for (k = 0; k < 2; k++) { for (ix = 0; ix < 2; ix++) { golden[ix + (k << 1)] = fv0[(ix + (k << 1)) + (coreid << 2)]; } flag = true; flag = flag && (tmp[k] <= golden[k << 1]); flag = flag && (tmp[k] >= golden[1 + (k << 1)]); printErrors(!flag, k, tmp[k] ,golden[k << 1], golden[1 + (k << 1)]); pass = pass && flag; } flagPassFail(pass, get_core_id()); ///////////////////////////////////////////////////////// // synchronize and exit ///////////////////////////////////////////////////////// return !pass; }
int main() { /* Variable Definition */ int coreid; int i,j; int start_frame; int index; int time; coreid = get_core_id(); if (coreid == 0) { // initialization NB_BLOB = 0 ; start_frame = 0; for(i=0;i<NFRAME;i++){ // for each frame //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// //%%%%%%%%%%%%%%% * DATA TRANSFER FROM L2 TO L1 * %%%%%%%%%%%%%%%%%%%%%%%%%%%%%// index = 0; N_pixel = N_pixelL2[i]; reset_timer(); start_timer(); for(j=start_frame; j<start_frame + N_pixel*2; j++){ pixel[index++]=pixelL2[j]; } stop_timer(); printf("FRAME: %d (%d-%d) Transfer Time: %d\n",i,start_frame,j,get_time()); start_frame = j; //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// //%%%%%%%%%%%%%%% * PROCESSING * %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// init_data(); reset_timer(); start_timer(); blob_formation(); stop_timer(); printf("Blob Formation Time: %d\n",get_time(),0,0,0); reset_timer(); start_timer(); prevBlob_filter(); stop_timer(); printf("Filtering prev Blob List Time: %d\n",get_time(),0,0,0); reset_timer(); start_timer(); newBlob_filter(); stop_timer(); printf("Filtering new Blob List Time: %d\n",get_time(),0,0,0); reset_timer(); start_timer(); blob_merge(); stop_timer(); printf("Blob Merging Time: %d\n",get_time(),0,0,0); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// //%%%%%%%%%%%%%%% * CHECKSUM * %%%%%%%%%%%%%%%%%%%%%%%%%%%%%// printf("FRAME = %d\n",i,0,0,0); printf("NB_BLOB = %d\n",NB_BLOB,0,0,0); for(j=0;j<NB_BLOB;j++){ printf("Blob %d: centroid = (%d,%d), weight = %d, ",j,BLOB_LIST[j].xc,BLOB_LIST[j].yc,BLOB_LIST[j].W); printf("xmax = %d, xmin = %d, ymax = %d, ymin = %d\n",BLOB_LIST[j].xmax, BLOB_LIST[j].xmin, BLOB_LIST[j].ymax, BLOB_LIST[j].ymin); if(BLOB_LIST[j].xc == results[(i*B_MAX+j)*6 ]) printf("OK xc!\t",0,0,0,0); else printf("FAIL xc!\t",0,0,0,0); if(BLOB_LIST[j].yc == results[(i*B_MAX+j)*6+1 ]) printf("OK yc!\t",0,0,0,0); else printf("FAIL yc!\t",0,0,0,0); if(BLOB_LIST[j].xmax == results[(i*B_MAX+j)*6+2 ]) printf("OK xmax!\t",0,0,0,0); else printf("FAIL xmax!\t",0,0,0,0); if(BLOB_LIST[j].xmin == results[(i*B_MAX+j)*6+3 ]) printf("OK xmin!\t",0,0,0,0); else printf("FAIL xmin!\t",0,0,0,0); if(BLOB_LIST[j].ymax == results[(i*B_MAX+j)*6+4 ]) printf("OK ymax!\t",0,0,0,0); else printf("FAIL ymax!\t",0,0,0,0); if(BLOB_LIST[j].ymin == results[(i*B_MAX+j)*6+5 ]) printf("OK ymin!\n",0,0,0,0); else printf("FAIL ymin!\n",0,0,0,0); } printf("\n\n",0,0,0,0); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%// } eoc(0); } }
void init_rng(unsigned *p) { for (unsigned i = 0; i < get_core_id(); i++) rand_r(p); }
int mm_ctx_dup(mm_context *dst_ctx, mm_context *src_ctx) { /* Copy the src ctx to the dst creating mappings as we go * * Return -1 if there is a problem */ struct mm_mapping *pmap; int err; kdebug("MM_CTX_DUP core = %d\n", get_core_id()); mm_context_dump(src_ctx); mm_context_dump(dst_ctx); rwlock_wrlock(&src_ctx->lock); pmap = first_mapping(src_ctx); while (pmap) { struct mm_object *new_obj1, *new_obj2; struct mm_mapping *new_map; if (!(pmap->prot & PROT_WRITE)) { /* This map is read-only - share the existing object */ new_obj1 = pmap->object; ++new_obj1->refcnt; /* Create a new mapping */ if ((new_map = mapping_new(pmap->start_addr, pmap->length, pmap->prot, pmap->fd, new_obj1)) == NULL) { rwlock_unlock(&src_ctx->lock); return -1; } } else { /* This map is writable - make a new object */ /* Create an mm_object */ /* TODO: COW */ /* * Create 2 new objects * Point them at the existing obj * Create new map * Point each map at an obj */ if ((new_obj1 = object_new()) == NULL) { rwlock_unlock(&src_ctx->lock); return -1; } if ((new_obj2 = object_new()) == NULL) { rwlock_unlock(&src_ctx->lock); return -1; } new_obj1->chain = pmap->object; new_obj1->share_type = share_private; new_obj2->chain = pmap->object; new_obj2->share_type = share_private; //pmap->object->share_type = cow; /* Point the original map at new_obj1 */ pmap->object = new_obj1; /* Create a new mapping to point at new_obj2 */ if ((new_map = mapping_new(pmap->start_addr, pmap->length, pmap->prot, pmap->fd, new_obj2)) == NULL) { rwlock_unlock(&src_ctx->lock); return -1; } mm_change_commit(src_ctx, pmap, PM_USER); } dst_ctx->mapping_list.add_tail(new_map); mm_check_uncommit(dst_ctx, new_map); /* Go to the next parent mapping */ pmap = next_mapping(src_ctx, pmap); } cpu_tlb_flush_global(); rwlock_unlock(&src_ctx->lock); // kdebug("MM_CTX_DUP done\n"); // mm_context_dump(src_ctx); // mm_context_dump(dst_ctx); // pagestruct_audit(); return 0; }
///////////////////////////////////////////////////////// // main testing function ///////////////////////////////////////////////////////// int main(int argc, const char * const argv[]) { (void)argc; (void)argv; int coreid, k; boolean_T pass; float V[25]; float s[5]; float U[25]; int b_k; float y[25]; float b_y; float c_y; float d_y; float tmp[3]; init_fp_regs(); ///////////////////////////////////////////////////////// // main test loop // each core loops over a kernel instance ///////////////////////////////////////////////////////// coreid = get_core_id(); printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid); if (coreid>3) coreid=coreid-4; synch_barrier(); perf_begin(); for(k = 0; k < getKernelIts(); k++) { // call matlab kernel eml_xgesvd(*(float (*)[25])&fv0[25 * coreid], U, s, V); } synch_barrier(); perf_end(); ///////////////////////////////////////////////////////// // check results ///////////////////////////////////////////////////////// synch_barrier(); for (b_k = 0; b_k < 25; b_k++) { y[b_k] = fAbs(U[b_k]); } b_y = y[0]; c_y = s[0]; for (b_k = 0; b_k < 4; b_k++) { c_y += s[b_k + 1]; } for (b_k = 0; b_k < 24; b_k++) { b_y += y[b_k + 1]; } for (b_k = 0; b_k < 25; b_k++) { y[b_k] = fAbs(V[b_k]); } d_y = y[0]; for (b_k = 0; b_k < 24; b_k++) { d_y += y[b_k + 1]; } tmp[0] = b_y; tmp[1] = c_y; tmp[2] = d_y; pass = true; for (b_k = 0; b_k < 3; b_k++) { pass = pass && (tmp[b_k] <= fv1[(0 + (b_k << 1)) + 6 * coreid]); pass = pass && (tmp[b_k] >= fv1[(1 + (b_k << 1)) + 6 * coreid]); } flagPassFail(pass, get_core_id()); synch_barrier(); ///////////////////////////////////////////////////////// // synchronize and exit ///////////////////////////////////////////////////////// return !pass; }
int main() { /* Initialize the RNG */ unsigned rng, i, trials = TRIALS / get_num_cores(), x, y, active; rng = 0; init_rng(&rng); barrier(&b0); /* Do the simulation */ for (i = 0; i < trials; ++i) { x = rand_mc(&rng) / ((1u<<31)/10000); y = rand_mc(&rng) / ((1u<<31)/10000); if (x * x + y * y <= 100000000) count[get_core_id()]++; } printf("core %u: %u\n", get_core_id(), count[get_core_id()]); barrier(&b1); if (get_core_id() == 0) barrier_init(&b0); barrier(&b2); if (get_core_id() == 0) barrier_init(&b1); /* Do the final reduction */ for (active = get_num_cores()/2; active > 0; active /= 2) { if (get_core_id() < active) { unsigned idx0 = get_core_id(), idx1 = get_core_id() + active; count[idx0] = count[idx0] + count[idx1]; printf("%u active cores, sum = %u\n", active, count[idx0]); } barrier(&b0); if (get_core_id() == 0) barrier_init(&b2); barrier(&b1); if (get_core_id() == 0) barrier_init(&b0); barrier(&b2); if (get_core_id() == 0) barrier_init(&b1); } if (get_core_id() == 0) { unsigned pi_whole = count[0]*4/TRIALS, pi_frac = count[0]*4%TRIALS/(TRIALS/100); printf("pi is approximately %u.%02u\n", pi_whole, pi_frac); } return 0; }
///////////////////////////////////////////////////////// // main testing function ///////////////////////////////////////////////////////// int main(int argc, const char * const argv[]) { (void)argc; (void)argv; int coreid; int it; boolean_T pass; boolean_T flag; float y[10]; int ix; float b_y; int b_k; float xbar; float r; float c_y; float check[2]; float golden[4]; ///////////////////////////////////////////////////////// // main test loop // each core loops over a kernel instance ///////////////////////////////////////////////////////// coreid = get_core_id(); printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid); if (coreid>3) coreid=coreid-4; synch_barrier(); perf_begin(); for(it = 0; it < getKernelIts(); it++) { // matlab kernel for (ix = 0; ix < 10; ix++) { b_y = 0.0F; for (b_k = 0; b_k < 10; b_k++) { b_y += fv1[(ix + 10 * b_k) + 100 * coreid] * fv0[b_k + 10 * coreid]; } y[ix] = b_y + fv3[coreid] * fv2[ix + 10 * coreid]; } } synch_barrier(); perf_end(); synch_barrier(); ///////////////////////////////////////////////////////// // check results ///////////////////////////////////////////////////////// b_y = y[0]; ix = 0; xbar = y[0]; for (b_k = 0; b_k < 9; b_k++) { b_y += y[b_k + 1]; ix++; xbar += y[ix]; } xbar *= 1.0F/10.0F; ix = 0; r = y[0] - xbar; c_y = r * r; for (b_k = 0; b_k < 9; b_k++) { ix++; r = y[ix] - xbar; c_y += r * r; } c_y *= 1.0F/9.0F; check[0] = b_y; check[1] = c_y; pass = true; for (ix = 0; ix < 2; ix++) { for (b_k = 0; b_k < 2; b_k++) { golden[b_k + (ix << 1)] = fv4[(b_k + (ix << 1)) + (coreid << 2)]; } flag = true; flag = pass && (check[ix] <= golden[ix << 1]); flag = pass && (check[ix] >= golden[1 + (ix << 1)]); printErrors(!flag, ix, check[ix], golden[ix<<1], golden[1+(ix<<1)]); pass = pass && flag; } flagPassFail(pass, get_core_id()); ///////////////////////////////////////////////////////// // synchronize and exit ///////////////////////////////////////////////////////// return !pass; }