예제 #1
0
static void *magma_ssytrd_sb2st_parallel_section(void *arg)
{
    magma_int_t my_core_id  = ((magma_sbulge_id_data*)arg) -> id;
    magma_sbulge_data* data = ((magma_sbulge_id_data*)arg) -> data;

    magma_int_t allcores_num   = data -> threads_num;
    magma_int_t n              = data -> n;
    magma_int_t nb             = data -> nb;
    magma_int_t nbtiles        = data -> nbtiles;
    magma_int_t grsiz          = data -> grsiz;
    magma_int_t Vblksiz        = data -> Vblksiz;
    magma_int_t compT          = data -> compT;
    float *A         = data -> A;
    magma_int_t lda            = data -> lda;
    float *V         = data -> V;
    magma_int_t ldv            = data -> ldv;
    float *TAU       = data -> TAU;
    float *T         = data -> T;
    magma_int_t ldt            = data -> ldt;
    volatile magma_int_t* prog = data -> prog;

    pthread_barrier_t* barrier = &(data -> barrier);

    //magma_int_t sys_corenbr    = 1;

    #ifdef ENABLE_TIMER
    real_Double_t timeB=0.0, timeT=0.0;
    #endif

    // with MKL and when using omp_set_num_threads instead of mkl_set_num_threads
    // it need that all threads setting it to 1.
    magma_setlapack_numthreads(1);

#ifdef MAGMA_SETAFFINITY
//#define PRINTAFFINITY
#ifdef PRINTAFFINITY
    affinity_set print_set;
    print_set.print_affinity(my_core_id, "starting affinity");
#endif
    affinity_set original_set;
    affinity_set new_set(my_core_id);
    int check  = 0;
    int check2 = 0;
    // bind threads
    check = original_set.get_affinity();
    if (check == 0) {
        check2 = new_set.set_affinity();
        if (check2 != 0)
            printf("Error in sched_setaffinity (single cpu)\n");
    }
    else
    {
        printf("Error in sched_getaffinity\n");
    }
#ifdef PRINTAFFINITY
    print_set.print_affinity(my_core_id, "set affinity");
#endif
#endif

    if(compT==1)
    {
        /* compute the Q1 overlapped with the bulge chasing+T.
         * if all_cores_num=1 it call Q1 on GPU and then bulgechasing.
         * otherwise the first thread run Q1 on GPU and
         * the other threads run the bulgechasing.
         * */

        if(allcores_num==1)
        {

            //=========================
            //    bulge chasing
            //=========================
            #ifdef ENABLE_TIMER
            timeB = magma_wtime();
            #endif
            
            magma_stile_bulge_parallel(0, 1, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog);

            #ifdef ENABLE_TIMER
            timeB = magma_wtime()-timeB;
            printf("  Finish BULGE   timing= %f \n" ,timeB);
            #endif
            //=========================
            // compute the T's to be used when applying Q2
            //=========================
            #ifdef ENABLE_TIMER
            timeT = magma_wtime();
            #endif

            magma_stile_bulge_computeT_parallel(0, 1, V, ldv, TAU, T, ldt, n, nb, Vblksiz);

            #ifdef ENABLE_TIMER
            timeT = magma_wtime()-timeT;
            printf("  Finish T's     timing= %f \n" ,timeT);
            #endif

        }else{ // allcore_num > 1

            magma_int_t id  = my_core_id;
            magma_int_t tot = allcores_num;


                //=========================
                //    bulge chasing
                //=========================
                #ifdef ENABLE_TIMER
                if(id == 0)
                    timeB = magma_wtime();
                #endif

                magma_stile_bulge_parallel(id, tot, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog);
                pthread_barrier_wait(barrier);

                #ifdef ENABLE_TIMER
                if(id == 0){
                    timeB = magma_wtime()-timeB;
                    printf("  Finish BULGE   timing= %f \n" ,timeB);
                }
                #endif

                //=========================
                // compute the T's to be used when applying Q2
                //=========================
                #ifdef ENABLE_TIMER
                if(id == 0)
                    timeT = magma_wtime();
                #endif

                magma_stile_bulge_computeT_parallel(id, tot, V, ldv, TAU, T, ldt, n, nb, Vblksiz);
                pthread_barrier_wait(barrier);

                #ifdef ENABLE_TIMER
                if (id == 0){
                    timeT = magma_wtime()-timeT;
                    printf("  Finish T's     timing= %f \n" ,timeT);
                }
                #endif

        } // allcore == 1

    }else{ // WANTZ = 0

        //=========================
        //    bulge chasing
        //=========================
        #ifdef ENABLE_TIMER
        if(my_core_id == 0)
            timeB = magma_wtime();
        #endif

        magma_stile_bulge_parallel(my_core_id, allcores_num, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog);
        pthread_barrier_wait(barrier);

        #ifdef ENABLE_TIMER
        if(my_core_id == 0){
            timeB = magma_wtime()-timeB;
            printf("  Finish BULGE   timing= %f \n" ,timeB);
        }
        #endif

    } // WANTZ > 0

#ifdef MAGMA_SETAFFINITY
    // unbind threads
    if (check == 0){
        check2 = original_set.set_affinity();
        if (check2 != 0)
            printf("Error in sched_setaffinity (restore cpu list)\n");
    }
#ifdef PRINTAFFINITY
    print_set.print_affinity(my_core_id, "restored_affinity");
#endif
#endif

    return 0;
}
예제 #2
0
static void *magma_ssytrd_hb2st_parallel_section(void *arg)
{
    magma_int_t my_core_id  = ((magma_sbulge_id_data*)arg) -> id;
    magma_sbulge_data* data = ((magma_sbulge_id_data*)arg) -> data;

    magma_int_t allcores_num   = data -> threads_num;
    magma_int_t n              = data -> n;
    magma_int_t nb             = data -> nb;
    magma_int_t nbtiles        = data -> nbtiles;
    magma_int_t grsiz          = data -> grsiz;
    magma_int_t Vblksiz        = data -> Vblksiz;
    magma_int_t compT          = data -> compT;
    float *A         = data -> A;
    magma_int_t lda            = data -> lda;
    float *V         = data -> V;
    magma_int_t ldv            = data -> ldv;
    float *TAU       = data -> TAU;
    float *T         = data -> T;
    magma_int_t ldt            = data -> ldt;
    volatile magma_int_t* prog = data -> prog;

    pthread_barrier_t* barrier = &(data -> barrier);

    magma_int_t sys_corenbr    = 1;

    float timeB=0.0, timeT=0.0;

#if defined(SETAFFINITY)
    // bind threads
    cpu_set_t set;
    // bind threads
    CPU_ZERO( &set );
    CPU_SET( my_core_id, &set );
    sched_setaffinity( 0, sizeof(set), &set) ;
#endif

    if(compT==1)
    {
        /* compute the Q1 overlapped with the bulge chasing+T.
         * if all_cores_num=1 it call Q1 on GPU and then bulgechasing.
         * otherwise the first thread run Q1 on GPU and
         * the other threads run the bulgechasing.
         * */

        if(allcores_num==1)
        {

            //=========================
            //    bulge chasing
            //=========================
            timeB = magma_wtime();

            magma_stile_bulge_parallel(0, 1, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog);

            timeB = magma_wtime()-timeB;
            printf("  Finish BULGE   timing= %f \n" ,timeB);


            //=========================
            // compute the T's to be used when applying Q2
            //=========================
            timeT = magma_wtime();
            magma_stile_bulge_computeT_parallel(0, 1, V, ldv, TAU, T, ldt, n, nb, Vblksiz);

            timeT = magma_wtime()-timeT;
            printf("  Finish T's     timing= %f \n" ,timeT);

        }else{ // allcore_num > 1

            magma_int_t id  = my_core_id;
            magma_int_t tot = allcores_num;


                //=========================
                //    bulge chasing
                //=========================
                if(id == 0)timeB = magma_wtime();

                magma_stile_bulge_parallel(id, tot, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog);
                pthread_barrier_wait(barrier);

                if(id == 0){
                    timeB = magma_wtime()-timeB;
                    printf("  Finish BULGE   timing= %f \n" ,timeB);
                }

                //=========================
                // compute the T's to be used when applying Q2
                //=========================
                if(id == 0)timeT = magma_wtime();

                magma_stile_bulge_computeT_parallel(id, tot, V, ldv, TAU, T, ldt, n, nb, Vblksiz);
                pthread_barrier_wait(barrier);

                if (id == 0){
                    timeT = magma_wtime()-timeT;
                    printf("  Finish T's     timing= %f \n" ,timeT);
                }

        } // allcore == 1

    }else{ // WANTZ = 0

        //=========================
        //    bulge chasing
        //=========================
        if(my_core_id == 0)
            timeB = magma_wtime();
        
        magma_stile_bulge_parallel(my_core_id, allcores_num, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog);

        pthread_barrier_wait(barrier);
        
        if(my_core_id == 0){
            timeB = magma_wtime()-timeB;
            printf("  Finish BULGE   timing= %f \n" ,timeB);
        }
    } // WANTZ > 0

#if defined(SETAFFINITY)
    // unbind threads
    sys_corenbr = sysconf(_SC_NPROCESSORS_ONLN);
    CPU_ZERO( &set );
    for(magma_int_t i=0; i<sys_corenbr; i++)
        CPU_SET( i, &set );
    sched_setaffinity( 0, sizeof(set), &set) ;
#endif

    return 0;
}