static void *magma_ssytrd_sb2st_parallel_section(void *arg) { magma_int_t my_core_id = ((magma_sbulge_id_data*)arg) -> id; magma_sbulge_data* data = ((magma_sbulge_id_data*)arg) -> data; magma_int_t allcores_num = data -> threads_num; magma_int_t n = data -> n; magma_int_t nb = data -> nb; magma_int_t nbtiles = data -> nbtiles; magma_int_t grsiz = data -> grsiz; magma_int_t Vblksiz = data -> Vblksiz; magma_int_t compT = data -> compT; float *A = data -> A; magma_int_t lda = data -> lda; float *V = data -> V; magma_int_t ldv = data -> ldv; float *TAU = data -> TAU; float *T = data -> T; magma_int_t ldt = data -> ldt; volatile magma_int_t* prog = data -> prog; pthread_barrier_t* barrier = &(data -> barrier); //magma_int_t sys_corenbr = 1; #ifdef ENABLE_TIMER real_Double_t timeB=0.0, timeT=0.0; #endif // with MKL and when using omp_set_num_threads instead of mkl_set_num_threads // it need that all threads setting it to 1. magma_setlapack_numthreads(1); #ifdef MAGMA_SETAFFINITY //#define PRINTAFFINITY #ifdef PRINTAFFINITY affinity_set print_set; print_set.print_affinity(my_core_id, "starting affinity"); #endif affinity_set original_set; affinity_set new_set(my_core_id); int check = 0; int check2 = 0; // bind threads check = original_set.get_affinity(); if (check == 0) { check2 = new_set.set_affinity(); if (check2 != 0) printf("Error in sched_setaffinity (single cpu)\n"); } else { printf("Error in sched_getaffinity\n"); } #ifdef PRINTAFFINITY print_set.print_affinity(my_core_id, "set affinity"); #endif #endif if(compT==1) { /* compute the Q1 overlapped with the bulge chasing+T. * if all_cores_num=1 it call Q1 on GPU and then bulgechasing. * otherwise the first thread run Q1 on GPU and * the other threads run the bulgechasing. * */ if(allcores_num==1) { //========================= // bulge chasing //========================= #ifdef ENABLE_TIMER timeB = magma_wtime(); #endif magma_stile_bulge_parallel(0, 1, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); #ifdef ENABLE_TIMER timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); #endif //========================= // compute the T's to be used when applying Q2 //========================= #ifdef ENABLE_TIMER timeT = magma_wtime(); #endif magma_stile_bulge_computeT_parallel(0, 1, V, ldv, TAU, T, ldt, n, nb, Vblksiz); #ifdef ENABLE_TIMER timeT = magma_wtime()-timeT; printf(" Finish T's timing= %f \n" ,timeT); #endif }else{ // allcore_num > 1 magma_int_t id = my_core_id; magma_int_t tot = allcores_num; //========================= // bulge chasing //========================= #ifdef ENABLE_TIMER if(id == 0) timeB = magma_wtime(); #endif magma_stile_bulge_parallel(id, tot, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); pthread_barrier_wait(barrier); #ifdef ENABLE_TIMER if(id == 0){ timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); } #endif //========================= // compute the T's to be used when applying Q2 //========================= #ifdef ENABLE_TIMER if(id == 0) timeT = magma_wtime(); #endif magma_stile_bulge_computeT_parallel(id, tot, V, ldv, TAU, T, ldt, n, nb, Vblksiz); pthread_barrier_wait(barrier); #ifdef ENABLE_TIMER if (id == 0){ timeT = magma_wtime()-timeT; printf(" Finish T's timing= %f \n" ,timeT); } #endif } // allcore == 1 }else{ // WANTZ = 0 //========================= // bulge chasing //========================= #ifdef ENABLE_TIMER if(my_core_id == 0) timeB = magma_wtime(); #endif magma_stile_bulge_parallel(my_core_id, allcores_num, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); pthread_barrier_wait(barrier); #ifdef ENABLE_TIMER if(my_core_id == 0){ timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); } #endif } // WANTZ > 0 #ifdef MAGMA_SETAFFINITY // unbind threads if (check == 0){ check2 = original_set.set_affinity(); if (check2 != 0) printf("Error in sched_setaffinity (restore cpu list)\n"); } #ifdef PRINTAFFINITY print_set.print_affinity(my_core_id, "restored_affinity"); #endif #endif return 0; }
static void *magma_ssytrd_hb2st_parallel_section(void *arg) { magma_int_t my_core_id = ((magma_sbulge_id_data*)arg) -> id; magma_sbulge_data* data = ((magma_sbulge_id_data*)arg) -> data; magma_int_t allcores_num = data -> threads_num; magma_int_t n = data -> n; magma_int_t nb = data -> nb; magma_int_t nbtiles = data -> nbtiles; magma_int_t grsiz = data -> grsiz; magma_int_t Vblksiz = data -> Vblksiz; magma_int_t compT = data -> compT; float *A = data -> A; magma_int_t lda = data -> lda; float *V = data -> V; magma_int_t ldv = data -> ldv; float *TAU = data -> TAU; float *T = data -> T; magma_int_t ldt = data -> ldt; volatile magma_int_t* prog = data -> prog; pthread_barrier_t* barrier = &(data -> barrier); magma_int_t sys_corenbr = 1; float timeB=0.0, timeT=0.0; #if defined(SETAFFINITY) // bind threads cpu_set_t set; // bind threads CPU_ZERO( &set ); CPU_SET( my_core_id, &set ); sched_setaffinity( 0, sizeof(set), &set) ; #endif if(compT==1) { /* compute the Q1 overlapped with the bulge chasing+T. * if all_cores_num=1 it call Q1 on GPU and then bulgechasing. * otherwise the first thread run Q1 on GPU and * the other threads run the bulgechasing. * */ if(allcores_num==1) { //========================= // bulge chasing //========================= timeB = magma_wtime(); magma_stile_bulge_parallel(0, 1, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); //========================= // compute the T's to be used when applying Q2 //========================= timeT = magma_wtime(); magma_stile_bulge_computeT_parallel(0, 1, V, ldv, TAU, T, ldt, n, nb, Vblksiz); timeT = magma_wtime()-timeT; printf(" Finish T's timing= %f \n" ,timeT); }else{ // allcore_num > 1 magma_int_t id = my_core_id; magma_int_t tot = allcores_num; //========================= // bulge chasing //========================= if(id == 0)timeB = magma_wtime(); magma_stile_bulge_parallel(id, tot, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); pthread_barrier_wait(barrier); if(id == 0){ timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); } //========================= // compute the T's to be used when applying Q2 //========================= if(id == 0)timeT = magma_wtime(); magma_stile_bulge_computeT_parallel(id, tot, V, ldv, TAU, T, ldt, n, nb, Vblksiz); pthread_barrier_wait(barrier); if (id == 0){ timeT = magma_wtime()-timeT; printf(" Finish T's timing= %f \n" ,timeT); } } // allcore == 1 }else{ // WANTZ = 0 //========================= // bulge chasing //========================= if(my_core_id == 0) timeB = magma_wtime(); magma_stile_bulge_parallel(my_core_id, allcores_num, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); pthread_barrier_wait(barrier); if(my_core_id == 0){ timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); } } // WANTZ > 0 #if defined(SETAFFINITY) // unbind threads sys_corenbr = sysconf(_SC_NPROCESSORS_ONLN); CPU_ZERO( &set ); for(magma_int_t i=0; i<sys_corenbr; i++) CPU_SET( i, &set ); sched_setaffinity( 0, sizeof(set), &set) ; #endif return 0; }