Exemplo n.º 1
0
static void magma_stile_bulge_computeT_parallel(magma_int_t my_core_id, magma_int_t cores_num, float *V, magma_int_t ldv, float *TAU,
                                                float *T, magma_int_t ldt, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz)
{
    //%===========================
    //%   local variables
    //%===========================
    magma_int_t firstcolj;
    magma_int_t rownbm;
    magma_int_t st,ed,fst,vlen,vnb,colj;
    magma_int_t blkid,vpos,taupos,tpos;
    magma_int_t blkpercore, myid;
    
    if(n<=0)
        return ;
    
    magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz);
    
    blkpercore = blkcnt/cores_num;
    
    magma_int_t nbGblk  = magma_ceildiv(n-1, Vblksiz);
    
    if(my_core_id==0) printf("  COMPUTE T parallel threads %d with  N %d   NB %d   Vblksiz %d \n",cores_num,n,nb,Vblksiz);
    
    for (magma_int_t bg = nbGblk; bg>0; bg--)
    {
        firstcolj = (bg-1)*Vblksiz + 1;
        rownbm    = magma_ceildiv(n-(firstcolj+1), nb);
        if(bg==nbGblk) 
            rownbm    = magma_ceildiv(n-firstcolj ,nb);  // last blk has size=1 used for real to handle A(N,N-1)

        for (magma_int_t m = rownbm; m>0; m--)
        {
            vlen = 0;
            vnb  = 0;
            colj      = (bg-1)*Vblksiz; // for k=0;I compute the fst and then can remove it from the loop
            fst       = (rownbm -m)*nb+colj +1;
            for (magma_int_t k=0; k<Vblksiz; k++)
            {
                colj     = (bg-1)*Vblksiz + k;
                st       = (rownbm -m)*nb+colj +1;
                ed       = min(st+nb-1,n-1);
                if(st>ed)
                    break;
                if((st==ed)&&(colj!=n-2))
                    break;
                
                vlen=ed-fst+1;
                vnb=k+1;
            }        
            colj     = (bg-1)*Vblksiz;
            magma_bulge_findVTAUTpos(n, nb, Vblksiz, colj, fst, ldv, ldt, &vpos, &taupos, &tpos, &blkid);
            myid = blkid/blkpercore;
            if(my_core_id==(myid%cores_num)){
                if((vlen>0)&&(vnb>0))
                    lapackf77_slarft( "F", "C", &vlen, &vnb, V(vpos), &ldv, TAU(taupos), T(tpos), &ldt);
            }
        }
    }
}
Exemplo n.º 2
0
    void findVTpos(magma_int_t N, magma_int_t NB, magma_int_t Vblksiz, magma_int_t sweep, magma_int_t st, magma_int_t *Vpos, magma_int_t *TAUpos, magma_int_t *Tpos, magma_int_t *myblkid)
    {
        // to be able to use and compare with the old reduction function.
        // route the old function to the new ones because the changes are done on the new function.
        magma_int_t ldv = NB + Vblksiz;
        magma_int_t ldt = Vblksiz;
        magma_bulge_findVTAUTpos(N,  NB, Vblksiz,  sweep,  st,  ldv, ldt,
                               Vpos, TAUpos, Tpos, myblkid);
        return;


        magma_int_t locblknb, prevblkcnt, prevGblkid;
        magma_int_t myblknb, nbprevGblk, mastersweep;
        magma_int_t blkid, locj, LDV;
        locblknb = 0;
        prevblkcnt   = 0;
        myblknb  = 0;

        nbprevGblk = sweep/Vblksiz;
        for (prevGblkid = 0; prevGblkid < nbprevGblk; prevGblkid++)
        {
            mastersweep  = prevGblkid * Vblksiz;
            locblknb = plasma_ceildiv((N-(mastersweep+2)),NB);
            prevblkcnt   = prevblkcnt + locblknb;
        }
        myblknb = plasma_ceildiv((st-sweep),NB);
        blkid       = prevblkcnt + myblknb -1;
        locj        = sweep%Vblksiz;
        LDV         = NB + Vblksiz;

        *myblkid= blkid;
        *Vpos   = blkid*Vblksiz*LDV  + locj*LDV + locj;
        *TAUpos = blkid*Vblksiz + locj;
        *Tpos   = blkid*Vblksiz*Vblksiz + locj*Vblksiz + locj;
        //printf("voici  blkid  %d  locj %d  vpos %d tpos %d \n",blkid,locj,*Vpos,*Tpos);
    }
Exemplo n.º 3
0
static void magma_stile_bulge_computeT_parallel(magma_int_t my_core_id, magma_int_t cores_num, float *V, magma_int_t ldv, float *TAU,
                                                float *T, magma_int_t ldt, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz)
{
    //%===========================
    //%   local variables
    //%===========================
    magma_int_t Vm, Vn, mt, nt;
    magma_int_t myrow, mycol, blkj, blki, firstrow;
    magma_int_t blkid,vpos,taupos,tpos;
    magma_int_t blkpercore, myid;

    if(n<=0)
        return ;

    magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz);
    blkpercore = blkcnt/cores_num;
    blkpercore = blkpercore==0 ? 1:blkpercore;
    //magma_int_t nbGblk  = magma_ceildiv(n-1, Vblksiz);

    #ifdef ENABLE_DEBUG
    if(my_core_id==0) 
        printf("  COMPUTE T parallel threads %d with  N %d   NB %d   Vblksiz %d \n",cores_num,n,nb,Vblksiz);
    #endif



    /*========================================
     * compute the T's in parallel.
     * The Ts are independent so each core pick
     * a T and compute it. The loop is based on 
     * the version 113 of the applyQ
     * which go over the losange block_column 
     * by block column. but it is not important 
     * here the order because Ts are independent.
     * ========================================
    */ 
    nt  = magma_ceildiv((n-1),Vblksiz);
    for (blkj=nt-1; blkj>=0; blkj--) {
        /* the index of the first row on the top of block (blkj) */ 
        firstrow = blkj * Vblksiz + 1;
        /*find the number of tile for this block */
        if( blkj == nt-1 )
            mt = magma_ceildiv( n -  firstrow,    nb);
        else
            mt = magma_ceildiv( n - (firstrow+1), nb);
        /*loop over the tiles find the size of the Vs and apply it */
        for (blki=mt; blki>0; blki--) {
            /*calculate the size of each losange of Vs= (Vm,Vn)*/
            myrow     = firstrow + (mt-blki)*nb;
            mycol     = blkj*Vblksiz;
            Vm = min( nb+Vblksiz-1, n-myrow);
            if( ( blkj == nt-1 ) && ( blki == mt ) ){
                Vn = min (Vblksiz, Vm);
            } else {
                Vn = min (Vblksiz, Vm-1);
            }
            /*calculate the pointer to the Vs and the Ts.
             * Note that Vs and Ts have special storage done
             * by the bulgechasing function*/
            magma_bulge_findVTAUTpos(n, nb, Vblksiz, mycol, myrow, ldv, ldt, &vpos, &taupos, &tpos, &blkid);
            myid = blkid/blkpercore;
            if( my_core_id==(myid%cores_num) ){
                if( ( Vm > 0 ) && ( Vn > 0 ) ){
                    lapackf77_slarft( "F", "C", &Vm, &Vn, V(vpos), &ldv, TAU(taupos), T(tpos), &ldt);
                }
            }
        }
    }
}