Exemplo n.º 1
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf(BASKER_MATRIX &M)
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_flag = BASKER_TRUE;

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    #endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    #ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    break_into_parts(M, nblks, btf_tabs);

    btf_nblks = nblks;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    //#endif

    return 0;
  }//end find BTF
Exemplo n.º 2
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::spmv_BTF
  (
   Int tab,
   BASKER_MATRIX &M,
   ENTRY_1DARRAY x,
   ENTRY_1DARRAY y
   )
  {
    //Tab = block in    
    const Int bcol = btf_tabs(tab)- M.scol;
    const Int brow = M.srow;
    const Int ecol = btf_tabs(tab+1) - M.scol;
    Int erow = 0;
    if(tab > 0)
      {
        erow = btf_tabs(tab);
      }
    else
      {
        erow = brow-1;
      }

    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("BTF_UPDATE, TAB: %d [%d %d] [%d %d] \n",
	   tab, brow, erow, bcol, ecol);
    #endif

    //loop over each column
    for(Int k = bcol; k < ecol; ++k)
      {
	//for(Int i = M.col_ptr[k]; i < M.col_ptr[k+1]; i++)
        //printf("k: %d col_ptr: %d \n", k, M.col_ptr(k));
	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    //Int j = M.row_idx[i];
	    const Int j = gperm(M.row_idx(i));
            //printf("j: %d jp: %d \n", M.row_idx(i), j);
	    if(j > erow)
	      {
		#ifdef BASKER_DEBUG_SOLVE_RHS
		///printf("break, k: %d j: %d erow: %d\n",
                //     k, j, erow);
		#endif
		//break; //breaks for 1 colummn
		continue;
	      }

	    #ifdef BASKER_DEBUG_SOLVE_RHS
	    printf("BTF_UPDATE-val, j: %d y: %f x: %f, val: %f \n",
		   j, y[j], x[k+M.scol], M.val[i]);
	    #endif
	    //for now just do a single function with zero
	    //y[j] -= M.val[i]*x[k+M.scol];

	    y(j+brow) -= M.val(i)*x(k+M.scol);
	  }//over all nnz in row
      }
    //printf("done\n");
    return 0;
  }//end spmv_BTF();
Exemplo n.º 3
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::serial_btf_solve
  (
   ENTRY_1DARRAY y,
   ENTRY_1DARRAY x
   )
  {

    
    for(Int i = 0; i < gn; ++i)
      {
	x(i) = y(i);
	y(i) = (Entry) 0.0;
      }
    
    //printf("Test \n");

    //Start in C and go backwards
    //In first level, only due U\L\x->y
    for(Int b = (btf_nblks-btf_tabs_offset)-1;
	b>= 0; b--)
      {
        
        #ifdef BASKER_DEBUG_SOLVE_RHS
        printf("\n\n btf b: %d \n", b);
        #endif

	//---Lower solve
	BASKER_MATRIX &LC = LBTF(b);
	//L\x -> y 
	lower_tri_solve(LC,x,y);

	BASKER_MATRIX &UC = UBTF(b);
	//U\x -> y
	upper_tri_solve(UC,x,y);

        #ifdef BASKER_DEBUG_SOLVE_RHS
        printf("Before spmv\n");
        printf("Inner Vector y print\n");
        printVec(y, gn);
        printf("Inner Vector x print\n");
        printVec(x, gn);
        printf("\n");
        #endif

       
	//-----Update
	//if(b > btf_tabs_offset)
	  {
	//x = BTF_C*y;
            //printf("spmv tab: %d \n", b+btf_tabs_offset);
         spmv_BTF(b+btf_tabs_offset,
		 BTF_C, y, x);
	  }
          
          #ifdef BASKER_DEBUG_SOLVE_RHS
          printf("After spmv\n");
          printf("Inner Vector y print\n");
          printVec(y, gn);
          printf("Inner Vector x print\n");
          printVec(x, gn);
          #endif
        

	//BASKER_MATRIX &UC = UBTF[b];
	//U\x -> y
	//upper_tri_solve(UC,x,y);

      }



    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("Done, BTF-C Solve \n");
    printf("\n x \n");
    printVec(x, gn);
    printf("\n y \n");
    printVec(y, gn);
    printf("\n\n");
    #endif

    
    //Update B
    //BTF_B*y -> x
    if(btf_tabs_offset !=  0)
      {
        neg_spmv(BTF_B,y,x);
      }

    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("Done, SPMV BTF_B UPDATE \n");
    printf("\n x \n");
    printVec(x, gn);
    printf("\n y \n");
    printVec(y, gn);
    printf("\n\n");
    #endif

    //now do the forward backwared solve
    //L\x ->y
    serial_forward_solve(x,y);

    //U\y->x
    serial_backward_solve(y,x);

    //copy lower part down
    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("copying lower starting: %d \n",
	   btf_tabs[btf_tabs_offset]);
    #endif
    for(Int i = btf_tabs(btf_tabs_offset); i < gn; ++i)
      {
	//x[i] = y[i];
	x(i) = y(i);
      }
    
    //Comeback and fix
    return 0;
    
  }//end serial_btf_solve
Exemplo n.º 4
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf2
  (
   BASKER_MATRIX &M
  )
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_nblks = nblks;

    btf_flag = BASKER_TRUE;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    //BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    //#endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    //#ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("num_threads: %d \n", num_threads);
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    // #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol);
    init_value(order_blk_amd_array, M.ncol, (Int)0);
    MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1);
    init_value(btf_blk_nnz, nblks+1, (Int) 0);
    MALLOC_INT_1DARRAY(btf_blk_work, nblks+1);
    init_value(btf_blk_work, nblks+1, (Int) 0);


    //Find AMD blk ordering, get nnz, and get work
    btf_blk_amd( M, order_blk_amd_array,
		 btf_blk_nnz, btf_blk_work);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("blk_perm:\n");
    for(Int i = 0; i < M.ncol; i++)
      {
	printf("(%d,%d) ", i, order_blk_amd_array(i));
      }
    printf("\n");
    printf("id/blk_size/blk_nnz/work: \n");
    for(Int i = 0; i < nblks; i++)
      {
	printf("(%d, %d, %d, %d) ", i,
	       btf_tabs(i+1)-btf_tabs(i), 
	       btf_blk_nnz(i), btf_blk_work(i));
      }
    printf("\n");
    #endif

    //printMTX("A_BEFORE.mtx", M);
    //printVec("AMD.txt", order_blk_amd_array, M.ncol);
    

    permute_col(M, order_blk_amd_array);
    permute_row(M, order_blk_amd_array);
    sort_matrix(M);

    //changed col to row, error.
    //print to see issue
    //printMTX("A_AMD.mtx", M);
    
       
    break_into_parts2(M, nblks, btf_tabs);

    //find schedule
    find_btf_schedule(M, nblks, btf_tabs);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    #endif

    return 0;
  }//end find BTF(nnz)
Exemplo n.º 5
0
  void Basker<Int,Entry,Exe_Space>::btf_blk_amd
  (
   BASKER_MATRIX &M, 
   INT_1DARRAY p, 
   INT_1DARRAY btf_nnz, 
   INT_1DARRAY btf_work
  )
  {
   

    // printf("=============BTF_BLK_AMD_CALLED========\n");
    if(Options.incomplete == BASKER_TRUE)
      {
	//We note that AMD on incomplete ILUK
	//Seems realy bad and leads to a zero on the diag
	//Therefore, we simply return the natural ordering
	for(Int i = 0 ; i < M.ncol; i++)
	  {
	    p(i) = i;
	  }
	//We will makeup work to be 1, 
	//Since BTF is not supported in our iluk
	for(Int b = 0; b < btf_nblks; b++)
	  {
	    btf_nnz(b) = 1;
	    btf_work(b) =1;
	  }
       
	//printf("Short amd blk\n");

	return;
      }

 
    //p == length(M)
    //Scan over all blks
    //Note, that this needs to be made parallel in the 
    //future (Future Josh will be ok with this, right?)

    //This is a horrible way to do this!!!!!
    //KLU does this very nice, but they also make all the little blks
    INT_1DARRAY temp_col;
    MALLOC_INT_1DARRAY(temp_col, M.ncol+1);
    INT_1DARRAY temp_row;
    MALLOC_INT_1DARRAY(temp_row, M.nnz);
    //printf("Done with btf_blk_amd malloc \n");
    //printf("blks: %d \n" , btf_nblks);


    for(Int b = 0; b < btf_nblks; b++)
      {
	Int blk_size = btf_tabs(b+1) - btf_tabs(b);

	//printf("blk: %d blk_size: %d \n",
	//     b, blk_size);

	if(blk_size < 3)
	  {
	    
	    //printf("debug, blk_size: %d \n", blk_size);
	    for(Int ii = 0; ii < blk_size; ++ii)
	      {
		//printf("set %d \n", btf_tabs(b)+ii-M.scol);
		p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol;
	      }
	    btf_work(b) = blk_size*blk_size*blk_size;
	    btf_nnz(b)  = (.5*(blk_size*blk_size) + blk_size);
	    continue;
	  }
	
	INT_1DARRAY tempp;
	MALLOC_INT_1DARRAY(tempp, blk_size+1);
	
	
	//Fill in temp matrix
	Int nnz = 0;
	Int column = 1;
	temp_col(0) = 0;
	for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++)
	  {
	    for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++)
	      {
		if(M.row_idx(i) < btf_tabs(b))
		  continue;
		  
		temp_row(nnz) = M.row_idx(i) - btf_tabs(b);
		nnz++;
	      }// end over all row_idx
	    temp_col(column) = nnz;
	    column++;
	  }//end over all columns k
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("col_ptr: ");
	for(Int i = 0 ; i < blk_size+1; i++)
	  {
	    printf("%d, ", temp_col(i));
	  }
	printf("\n");
	printf("row_idx: ");
	for(Int i = 0; i < nnz; i++)
	  {
	    printf("%d, ", temp_row(i));
	  }
	printf("\n");
	#endif


	double l_nnz = 0;
	double lu_work = 0;
	BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), 
					&(temp_row(0)),&(tempp(0)), 
					l_nnz, lu_work);


	btf_nnz(b)  = l_nnz;
	btf_work(b) = lu_work;
       
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("blk: %d order: \n", b);
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    printf("%d, ", tempp(ii));
	  }
	#endif

				     
	//Add to the bigger perm vector
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    //printf("loc: %d val: %d \n", 
	    //ii+btf_tabs(b), tempp(ii)+btf_tabs(b));

	    p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b);
	  }


	FREE_INT_1DARRAY(tempp);
	
      }//over all blk_tabs

    #ifdef BASKER_DEBUG_AMD_ORDER
    printf("blk amd final order\n");
    for(Int ii = 0; ii < M.ncol; ii++)
      {
	printf("%d, ", p(ii));
      }
    printf("\n");
    #endif

    FREE_INT_1DARRAY(temp_col);
    FREE_INT_1DARRAY(temp_row);
    
  }//end blk_amd()
Exemplo n.º 6
0
  void Basker<Int,Entry,Exe_Space>::blk_amd(BASKER_MATRIX &M, INT_1DARRAY p)
  {
    
    //p == length(M)
    //Scan over all blks
    //Note, that this needs to be made parallel in the 
    //future (Future Josh will be ok with this, right?)

    //This is a horrible way to do this!!!!!
    //KLU does this very nice, but they also make all the little blks
    INT_1DARRAY temp_col;
    MALLOC_INT_1DARRAY(temp_col, M.ncol+1);
    INT_1DARRAY temp_row;
    MALLOC_INT_1DARRAY(temp_row, M.nnz);


    for(Int b = btf_tabs_offset; b < btf_nblks; b++)
      {
	Int blk_size = btf_tabs(b+1) - btf_tabs(b);
	if(blk_size < 3)
	  {
	    
	    //printf("debug, blk_size: %d \n", blk_size);
	    for(Int ii = 0; ii < blk_size; ++ii)
	      {
		//printf("set %d \n", btf_tabs(b)+ii-M.scol);
		p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol;
	      }
	    continue;
	  }
	
	INT_1DARRAY tempp;
	MALLOC_INT_1DARRAY(tempp, blk_size+1);
	
	
	//Fill in temp matrix
	Int nnz = 0;
	Int column = 1;
	temp_col(0) = 0;
	for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++)
	  {
	    for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++)
	      {
		if(M.row_idx(i) < btf_tabs(b))
		  continue;
		  
		temp_row(nnz) = M.row_idx(i) - btf_tabs(b);
		nnz++;
	      }// end over all row_idx
	    temp_col(column) = nnz;
	    column++;
	  }//end over all columns k
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("col_ptr: ");
	for(Int i = 0 ; i < blk_size+1; i++)
	  {
	    printf("%d, ", temp_col(i));
	  }
	printf("\n");
	printf("row_idx: ");
	for(Int i = 0; i < nnz; i++)
	  {
	    printf("%d, ", temp_row(i));
	  }
	printf("\n");
	#endif


	BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), 
					&(temp_row(0)),&(tempp(0)));


	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("blk: %d order: \n", b);
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    printf("%d, ", tempp(ii));
	  }
	#endif

				     
	//Add to the bigger perm vector
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    //printf("loc: %d val: %d \n", 
	    //ii+btf_tabs(b), tempp(ii)+btf_tabs(b));

	    p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b);
	  }


	FREE_INT_1DARRAY(tempp);
	
      }//over all blk_tabs

    #ifdef BASKER_DEBUG_AMD_ORDER
    printf("blk amd final order\n");
    for(Int ii = 0; ii < M.ncol; ii++)
      {
	printf("%d, ", p(ii));
      }
    printf("\n");
    #endif

    FREE_INT_1DARRAY(temp_col);
    FREE_INT_1DARRAY(temp_row);
    
  }//end blk_amd()