コード例 #1
0
ファイル: shylubasker_def.hpp プロジェクト: trilinos/Trilinos
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::Factor(Int option)
  {
#ifdef BASKER_KOKKOS_TIME
    Kokkos::Impl::Timer timer;
#endif

    factor_notoken(option);

#ifdef BASKER_KOKKOS_TIME
    stats.time_nfactor += timer.seconds();
#endif

    // NDE
    MALLOC_ENTRY_1DARRAY(x_view_ptr_copy, gn); //used in basker_solve_rhs - move alloc
    MALLOC_ENTRY_1DARRAY(y_view_ptr_copy, gm);
    MALLOC_INT_1DARRAY(perm_inv_comp_array , gm); //y
    MALLOC_INT_1DARRAY(perm_comp_array, gn); //x

    MALLOC_INT_1DARRAY(perm_comp_iworkspace_array, gn); 
    MALLOC_ENTRY_1DARRAY(perm_comp_fworkspace_array, gn);
    permute_composition_for_solve();

    factor_flag = BASKER_TRUE;

    return 0;
  }//end Factor()
コード例 #2
0
ファイル: shylubasker_def.hpp プロジェクト: trilinos/Trilinos
  int Basker<Int,Entry,Exe_Space>::Factor_Inc(Int Options)
  {
    factor_inc_lvl(Options);

    // NDE
    MALLOC_ENTRY_1DARRAY(x_view_ptr_copy, gn); //used in basker_solve_rhs - move alloc
    MALLOC_ENTRY_1DARRAY(y_view_ptr_copy, gm);
    MALLOC_INT_1DARRAY(perm_inv_comp_array , gm); //y
    MALLOC_INT_1DARRAY(perm_comp_array, gn); //x

    MALLOC_INT_1DARRAY(perm_comp_iworkspace_array, gn);
    MALLOC_ENTRY_1DARRAY(perm_comp_fworkspace_array, gn);
    permute_composition_for_solve();

    return 0;
  }
コード例 #3
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::permute_row(
				      BASKER_MATRIX &M,
				       INT_1DARRAY row)
  {

    if(M.nnz == 0)
      {
	return 0;
      }
    Int nnz = M.nnz;
    INT_1DARRAY temp_i;
    MALLOC_INT_1DARRAY(temp_i, nnz);
    init_value(temp_i, nnz, (Int)0);

    //permute
    for(Int k = 0; k < nnz; k++)
      {
        temp_i[k] = row[M.row_idx[k]];
      }
    //Copy back
    for(Int k = 0; k < nnz; k++)
      {
        M.row_idx[k] = temp_i[k];
      }
    FREE_INT_1DARRAY(temp_i);
    return 0;
  }//end permute_row(matrix,int)
コード例 #4
0
ファイル: basker_order_btf.hpp プロジェクト: petsc/Trilinos
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::strong_component
  (
   BASKER_MATRIX &M,
   Int           &nblks,
   INT_1DARRAY   &perm,
   INT_1DARRAY   &CC
   )
  {

    typedef long int   l_int;
    
    INT_1DARRAY perm_in;
    MALLOC_INT_1DARRAY(perm_in, M.ncol);
    MALLOC_INT_1DARRAY(perm, M.ncol);
    //JDB:Note, this needs to be changed just fixed for int/long
    MALLOC_INT_1DARRAY(CC, M.ncol+1);
    for(l_int i = 0; i < M.ncol; i++)
      {
	perm_in(i) = i;
      }
    //printf("SC one \n");
    //my_strong_component(M,nblks,perm,perm_in, CC);
    BaskerSSWrapper<Int>::my_strong_component(M.ncol,
			&(M.col_ptr(0)),
			&(M.row_idx(0)),
			nblks,
			&(perm(0)),
			&(perm_in(0)), 
			&(CC(0)));

    #ifdef BASKER_DEBUG_ORDER_BTF
    FILE *fp;
    fp = fopen("btf.txt", "w");
    for(Int i = 0; i < M.ncol; i++)
      {
        fprintf(fp, "%d \n", perm(i));
      }
    fclose(fp);
    #endif

    
    //printf("FOUND NBLKS: %d \n", nblks);

    return 0;

  }//end strong_component <long int>
コード例 #5
0
 BASKER_INLINE
 void BaskerMatrix<Int, Entry, Exe_Space>::init_col()
 {
   BASKER_ASSERT(ncol >= 0, "INIT_COL, ncol > 0");
   MALLOC_INT_1DARRAY(col_ptr, ncol+1);
   for(Int i = 0; i < ncol+1; ++i)
   {
     col_ptr(i) = (Int) BASKER_MAX_IDX;
   }
 }//end init_col()
コード例 #6
0
  BASKER_INLINE
  void BaskerMatrix<Int,Entry,Exe_Space>::malloc_perm(Int n)
  {
    BASKER_ASSERT(n > 0, "matrix malloc_perm");
    MALLOC_INT_1DARRAY(lpinv,n);    
 
    //Fix later.  //NDE determine what the issue is...
    init_perm();

  }//end init_perm(Int)
コード例 #7
0
  BASKER_INLINE
  void BaskerMatrix<Int, Entry, Exe_Space>::init_vectors
  (
   Int _m, Int _n, Int _nnz
   )
  {
    nrow = _m;
    ncol = _n;
    nnz  = _nnz;
    mnnz = _nnz;
    
    if(ncol >= 0)
      {
	BASKER_ASSERT((ncol+1)>0, "matrix init_vector ncol");
	MALLOC_INT_1DARRAY(col_ptr,ncol+1);
      }
    if(nnz > 0)
      {
	BASKER_ASSERT(nnz > 0, "matrix init_vector nnz");
	MALLOC_INT_1DARRAY(row_idx,nnz);
	MALLOC_ENTRY_1DARRAY(val,nnz);
	#ifdef BASKER_INC_LVL
	MALLOC_INT_1DARRAY(inc_lvl, nnz);
	#endif

      }
    else if(nnz==0)
      {

	BASKER_ASSERT((nnz+1)>0, "nnz+1 init_vector ");
	MALLOC_INT_1DARRAY(row_idx, nnz+1);
	row_idx(0) = (Int) 0;
	MALLOC_ENTRY_1DARRAY(val, nnz+1);
	val(0) = (Entry) 0;
	#ifdef BASKER_INC_LVL
	MALLOC_INT_1DARRAY(inc_lvl, nnz+1);
	#endif

      }

  }//end init_vectors(Int, Int, Int)
コード例 #8
0
  BASKER_INLINE
  void BaskerMatrix<Int,Entry,Exe_Space>::init_inc_lvl()
  {

    MALLOC_INT_1DARRAY(inc_lvl, nnz+1);
    //for(Int i = 0; i < nnz+1; i++)
    //  {
    //	inc_lvl(i) = 0; 
    // }
    inc_lvl_flg = BASKER_TRUE;

  }
コード例 #9
0
 void BaskerMatrix<Int,Entry,Exe_Space>::init_pend()
 {
   if(ncol > 0)
   {
     BASKER_ASSERT((ncol+1)>0, "matrix init_pend")
     MALLOC_INT_1DARRAY(pend,ncol+1);
     for(Int i =0; i < ncol+1; ++i)
     {
       pend(i) = BASKER_MAX_IDX;
     }
   }
 }//end init_pend()
コード例 #10
0
ファイル: basker_order_amd.hpp プロジェクト: agrippa/Trilinos
  BASKER_FINLINE
  void Basker<Int, Entry,Exe_Space>::csymamd_order
  (
   BASKER_MATRIX &M,
   INT_1DARRAY p,
   INT_1DARRAY cmember
   )
  {    

    amd_flag = BASKER_TRUE;

    //Debug,
    #ifdef BASKER_DEBUG_ORDER_AMD
    printf("cmember: \n");
    for(Int i = 0; i < M.ncol; ++i)
      {
	printf("(%d, %d), ", i, cmember(i));
      }
    printf("\n"); 
    #endif
    
    //If doing  iluk, we will not want this.
    //See amd blk notes
    if(Options.incomplete == BASKER_TRUE)
      {
	for(Int i = 0; i < M.ncol; i++)
	  {
	    p(i) = i;
	  }
	//printf("Short csym \n");
	return;
      }



    INT_1DARRAY temp_p;
    BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough");
    MALLOC_INT_1DARRAY(temp_p, M.ncol+1);
    init_value(temp_p, M.ncol+1, (Int) 0);
    
    my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)),
		     &(temp_p(0)), &(cmember(0)));


    for(Int i = 0; i < M.ncol; ++i)
      {
	p(temp_p(i)) = i;
      }


  }//end csymamd()
コード例 #11
0
ファイル: shylubasker_def.hpp プロジェクト: trilinos/Trilinos
  int Basker<Int,Entry,Exe_Space>::GetPerm(Int **lp, Int **rp)
  {
    INT_1DARRAY lp_array;
    MALLOC_INT_1DARRAY(lp_array, gn);
    INT_1DARRAY rp_array;
    MALLOC_INT_1DARRAY(rp_array, gn);

    get_total_perm(lp_array, rp_array);

    (*lp) = new Int[gn];
    (*rp) = new Int[gn];

    for(Int i = 0; i < gn; ++i)
    {
      (*lp)[i] = lp_array(i);
      (*rp)[i] = rp_array(i);
    }

    FREE_INT_1DARRAY(lp_array);
    FREE_INT_1DARRAY(rp_array);

    return BASKER_SUCCESS;
  }//end GetPerm()
コード例 #12
0
  BASKER_FINLINE
  void Basker<Int, Entry,Exe_Space>::csymamd_order
  (
   BASKER_MATRIX &M,
   INT_1DARRAY p,
   INT_1DARRAY cmember
   )
  {    

    amd_flag = BASKER_TRUE;

    //Debug,
    #ifdef BASKER_DEBUG_ORDER_AMD
    printf("cmember: \n");
    for(Int i = 0; i < M.ncol; ++i)
      {
	printf("(%d, %d), ", i, cmember(i));
      }
    printf("\n"); 
    #endif



    INT_1DARRAY temp_p;
    BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough");
    MALLOC_INT_1DARRAY(temp_p, M.ncol+1);
    init_value(temp_p, M.ncol+1, (Int) 0);
    
    my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)),
		     &(temp_p(0)), &(cmember(0)));


    for(Int i = 0; i < M.ncol; ++i)
      {
	p(temp_p(i)) = i;
      }


  }//end csymamd()
コード例 #13
0
ファイル: basker_order_btf.hpp プロジェクト: petsc/Trilinos
  BASKER_INLINE
  int Basker<Int, Entry,Exe_Space>::break_into_parts
  (
   BASKER_MATRIX &M,
   Int           nblks,
   INT_1DARRAY   btf_tabs
   )
  {

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("break_into_parts called \n");
    printf("nblks: %d \n", nblks);
    #endif
    
    Options.btf = BASKER_TRUE;

    //Alg.  
    // A -> [BTF_A  BTF_B] 
    //      [0      BTF_C]
    //1. Run backward through the btf_tabs to find size C
    //2. Form A,B,C based on size in 1.

    //Short circuit, 
    //If nblks  == 1, than only BTF_A exists
    if(nblks == 1)
      {
	
        #ifdef BASKER_DEBUG_ORDER_BTF
	printf("Short Circuit part_call \n");
	#endif
	BTF_A = A;
	//Options.btf = BASKER_FALSE;
	btf_tabs_offset = 1;
	return 0;
      }

    //Step 1.
    Int t_size            = 0;
    Int scol              = M.ncol;
    Int blk_idx           = nblks;
    BASKER_BOOL  move_fwd = BASKER_TRUE;
    while(move_fwd==BASKER_TRUE)
      {

	Int blk_size = btf_tabs(blk_idx)-
	                     btf_tabs(blk_idx-1);

	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("move_fwd loop \n");
	BASKER_ASSERT(blk_idx>=0, "btf blk idx off");
	BASKER_ASSERT(blk_size>0, "btf blk size wrong");
	printf("blk_idx: %d blk_size: %d \n", 
	       blk_idx, blk_size);
	std::cout << blk_size << std::endl;
	#endif


	if((blk_size < Options.btf_large) &&
	   ((((double)t_size+blk_size)/(double)M.ncol) < Options.btf_max_percent))
	  {
	    #ifdef BASKER_DEBUG_ORDER_BTF
	    printf("first choice \n");
	    printf("blksize test: %d %d %d \n",
		   blk_size, Options.btf_large, 
		   BASKER_BTF_LARGE);
	    printf("blkpercent test: %f %f %f \n", 
		   ((double)t_size+blk_size)/(double)M.ncol, 
		   Options.btf_max_percent, 
		   (double) BASKER_BTF_MAX_PERCENT);
	    #endif

	   
		t_size = t_size+blk_size;
		blk_idx = blk_idx-1;
		scol   = btf_tabs[blk_idx];
	   
	  }
	else
	  {
	    //printf("second choice \n");
	    //#ifdef BASKER_DEBUG_ORDER_BTF
	    printf("Cut: blk_size: %d percent: %f \n",
		   blk_size, ((double)t_size+blk_size)/(double)M.ncol);
	    
	    if((((double)t_size+blk_size)/(double)M.ncol)
	       == 1.0)
	      {
		blk_idx = 0;
		t_size = t_size + blk_size;
		scol = btf_tabs[blk_idx];
		
	      }

	    //#endif
	    move_fwd = BASKER_FALSE;
	  }
      }//end while(move_fwd)

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Done finding BTF cut.  Cut size: %d scol: %d \n",
	   t_size, scol);
    //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n");
    BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n");
    #endif
    
    //Comeback and change
    btf_tabs_offset = blk_idx;


    //2. Move into Blocks

    if(btf_tabs_offset != 0)
      {
    //--Move A into BTF_A;
    BTF_A.set_shape(0, scol, 0, scol);
    BTF_A.nnz = M.col_ptr(scol);
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Init BTF_A. ncol: %d nnz: %d \n",
	   scol, BTF_A.nnz);
    #endif

    if(BTF_A.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr");
	MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1);
	BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz");
	MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz);
	BTF_A.fill();
      }
    
    Int annz = 0;
    for(Int k = 0; k < scol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("copy column: %d into A_BTF, [%d %d] \n", 
	       k, M.col_ptr(k), M.col_ptr(k+1));
	#endif
	
	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    //printf("annz: %d i: %d \n", annz, i);
	    BTF_A.row_idx(annz) = M.row_idx(i);
	    BTF_A.val(annz)     = M.val(i);
	    annz++;
	  }
	
	BTF_A.col_ptr(k+1) = annz;
      }

      }//no A
 
    //Fill in B and C at the same time
    INT_1DARRAY cws;
    BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC");
    MALLOC_INT_1DARRAY(cws, M.ncol-scol+1);
    init_value(cws, M.ncol-scol+1, (Int)M.ncol);
    BTF_B.set_shape(0   , scol,
		    scol, M.ncol-scol);
    BTF_C.set_shape(scol, M.ncol-scol,
		    scol, M.ncol-scol);

    
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Set Shape BTF_B: %d %d %d %d \n",
	   BTF_B.srow, BTF_B.nrow,
	   BTF_B.scol, BTF_B.ncol);
    printf("Set Shape BTF_C: %d %d %d %d \n",
	   BTF_C.srow, BTF_C.nrow,
	   BTF_C.scol, BTF_C.nrow);
    #endif
    
    //Scan and find nnz
    //We can do this much better!!!!
    Int bnnz = 0;
    Int cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Lower, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		cnnz++;
	      }
	  }//over all nnz in k
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF_B nnz: %d \n", bnnz);
    printf("BTF_C nnz: %d \n", cnnz);
    #endif
    
    BTF_B.nnz = bnnz;
    BTF_C.nnz = cnnz;
    
    //Malloc need space
    if((BTF_B.v_fill == BASKER_FALSE) &&
       (BTF_B.nnz > 0))
      //if(BTF_B.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol");
	MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1);
	BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz");
	MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz);
	BTF_B.fill();
      }
    if(BTF_C.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol");
	MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1);
	BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz");
	MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz);
	BTF_C.fill();
      }

    //scan again (Very bad!!!)
    bnnz = 0;
    cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx[i]);
		#endif
		
		BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit");
		//BTF_B.row_idx[bnnz] = M.row_idx[i];
		//Note: do not offset because B srow = 0
		BTF_B.row_idx(bnnz) = M.row_idx(i);
		BTF_B.val(bnnz)     = M.val(i);
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz Lower,k: %d  %d %d %f \n",
		       k, scol, M.row_idx[i], 
		       M.val(i));
		#endif
		//BTF_C.row_idx[cnnz] = M.row_idx[i];
		BTF_C.row_idx(cnnz) = M.row_idx(i)-scol;
		BTF_C.val(cnnz)     = M.val(i);
		cnnz++;
	      }
	  }//over all nnz in k
	if(BTF_B.nnz > 0)
	  {
	    BTF_B.col_ptr(k-scol+1) = bnnz;
	  }
	BTF_C.col_ptr(k-scol+1) = cnnz;
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("After BTF_B nnz: %d \n", bnnz);
    printf("After BTF_C nnz: %d \n", cnnz);
    #endif

    //printf("\n\n");
    //printf("DEBUG\n");
    //BTF_C.print();
    //printf("\n\n");

    return 0;

  }//end break_into_parts
コード例 #14
0
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::permute_col
  (
   BASKER_MATRIX &M,
   INT_1DARRAY col
   )
  {
    if((M.ncol == 0)||(M.nnz == 0))
      return 0;

    Int n = M.ncol;
    Int nnz = M.nnz;
    //printf("Using n: %d nnz: %d \n", n, nnz);
    INT_1DARRAY temp_p;
    MALLOC_INT_1DARRAY(temp_p, n+1);
    init_value(temp_p, n+1, (Int)0);
    INT_1DARRAY temp_i;
    MALLOC_INT_1DARRAY(temp_i, nnz);
    init_value(temp_i, nnz, (Int)0);
    ENTRY_1DARRAY temp_v;
    MALLOC_ENTRY_1DARRAY(temp_v, nnz);
    init_value(temp_v, nnz, (Entry)0.0);
    //printf("done with init \n");
   
    //Determine column ptr of output matrix
    for(Int j = 0; j < n; j++)
      {
        Int i = col (j);
        temp_p (i+1) = M.col_ptr (j+1) - M.col_ptr (j);
      }
    //Get ptrs from lengths
    temp_p (0) = 0;
  
    for(Int j = 0; j < n; j++)
      {
        temp_p (j+1) = temp_p (j+1) + temp_p (j);
      }
    //copy idxs
    
    for(Int ii = 0; ii < n; ii++)
      {
        Int ko = temp_p (col (ii) );
        for(Int k = M.col_ptr (ii); k < M.col_ptr (ii+1); k++)
          {
            temp_i (ko) = M.row_idx (k);
            temp_v (ko) = M.val (k);
            ko++;
          }
      }
    
    //copy back int A
    for(Int ii=0; ii < n+1; ii++)
      {
        M.col_ptr (ii) = temp_p (ii);
      }
    for(Int ii=0; ii < nnz; ii++)
      {
        M.row_idx (ii) = temp_i (ii);
        M.val (ii) = temp_v (ii);
      }
    FREE_INT_1DARRAY(temp_p);
    FREE_INT_1DARRAY(temp_i);
    FREE_ENTRY_1DARRAY(temp_v);

    return 0;
  }//end permute_col(int) 
コード例 #15
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::btf_order()
  {
    //1. Matching ordering on whole matrix
    //currently finds matching and permutes
    //found bottle-neck to work best with circuit problems
    sort_matrix(A);
    //printMTX("A_nonmatch.mtx", A);
    match_ordering(0);
    //printf("DEBUG1: done match\n");
    //for debuging
    sort_matrix(A);
    //printMTX("A_match.mtx", A);
   
    //2. BTF ordering on whole matrix
    // Gets estimate of work on all blocks
    //currently finds btf-hybrid and permutes
    //A -> [BTF_A, BTF_C; 0 , BTF B]


    printf("outter num_threads:%d \n", num_threads);
    MALLOC_INT_1DARRAY(btf_schedule, num_threads+1);
    init_value(btf_schedule, num_threads+1, 0);
    find_btf(A); 

   
    
    if(btf_tabs_offset != 0)
      {

        //  printf("A/B block stuff called\n");
	//3. ND on BTF_A
	//currently finds ND and permute BTF_A
	//Would like to change so finds permuation, 
	//and move into 2D-Structure
	//printMTX("A_BTF_FROM_A.mtx", BTF_A);
	sort_matrix(BTF_A);
	scotch_partition(BTF_A);
    
	//need to do a row perm on BTF_B too
	if(btf_nblks > 1)
	  {
	    permute_row(BTF_B, part_tree.permtab);
	  }
	//needed because  moving into 2D-Structure,
	//assumes sorted columns
	sort_matrix(BTF_A);
	if(btf_nblks > 1)
	  {
	    sort_matrix(BTF_B);
	    sort_matrix(BTF_C);
	  }
	//For debug
	//printMTX("A_BTF_PART_AFTER.mtx", BTF_A);
	
	//4. Init tree structure
	//This reduces the ND ordering into that fits,
	//thread counts
	init_tree_thread();
	

	//5. Permute BTF_A
	//Constrained symamd on A
	INT_1DARRAY cmember;
	MALLOC_INT_1DARRAY(cmember, BTF_A.ncol+1);
	init_value(cmember,BTF_A.ncol+1,(Int) 0);
	for(Int i = 0; i < tree.nblks; ++i)
	  {
	    for(Int j = tree.col_tabs(i); j < tree.col_tabs(i+1); ++j)
	      {
		cmember(j) = i;
	      }
	  }
	//INT_1DARRAY csymamd_perm = order_csym_array;
	MALLOC_INT_1DARRAY(order_csym_array, BTF_A.ncol+1);
	//MALLOC_INT_1DARRAY(csymamd_perm, BTF_A.ncol+1);
	init_value(order_csym_array, BTF_A.ncol+1,(Int) 0);
	//init_value(csymamd_perm, BTF_A.ncol+1,(Int) 0);
	
	csymamd_order(BTF_A, order_csym_array, cmember);
	//csymamd_order(BTF_A, csymamd_perm, cmember);
	
	//permute(BTF_A, csymamd_perm, csymamd_perm);
	permute_col(BTF_A, order_csym_array);
	sort_matrix(BTF_A);
	permute_row(BTF_A, order_csym_array);
	sort_matrix(BTF_A);
	//printMTX("A_BTF_AMD.mtx", BTF_A);
	
	
	if(btf_nblks > 1)
	  {
	    permute_row(BTF_B, order_csym_array);
	    sort_matrix(BTF_B);
	    //printMTX("B_BTF_AMD.mtx", BTF_B);
	    sort_matrix(BTF_C);
	    //printMTX("C_BTF_AMD.mtx", BTF_C);
	  }
    
    
	//6. Move to 2D Structure
	//finds the shapes for both view and submatrices,
	//need to be changed over to just submatrices
	matrix_to_views_2D(BTF_A);
	//finds the starting point of A for submatrices
	find_2D_convert(BTF_A);
	//now we can fill submatrices
        #ifdef BASKER_KOKKOS
	kokkos_order_init_2D<Int,Entry,Exe_Space> iO(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1), iO);
	Kokkos::fence();
        #else
	//Comeback
        #endif

	//printMTX("BTF_A.mtx", BTF_A); 
	
      }//if btf_tab_offset == 0

    
    if(btf_nblks > 1)
      {
	sort_matrix(BTF_C);
	//printMTX("C_TEST.mtx", BTF_C);
	//Permute C

	MALLOC_INT_1DARRAY(order_c_csym_array, BTF_C.ncol+1);
	init_value(order_c_csym_array, BTF_C.ncol+1,(Int) 0);
	
	printf("BEFORE \n");

	//csymamd_order(BTF_C, order_c_csym_array, cmember);

	blk_amd(BTF_C, order_c_csym_array);

	printf("After perm\n");
	
	permute_col(BTF_C, order_c_csym_array);
	sort_matrix(BTF_C);
	permute_row(BTF_C, order_c_csym_array);
	sort_matrix(BTF_C);

	if(btf_tabs_offset != 0)
	  {
	    permute_col(BTF_B, order_c_csym_array);
	    sort_matrix(BTF_B);
	    //printMTX("BTF_B.mtx", BTF_B);
	  }

	//printMTX("BTF_C.mtx", BTF_C);

      }
  
    
    //printf("Done with ordering\n");
    
    return 0;
  }//end btf_order
コード例 #16
0
  BASKER_INLINE
  int Basker<Int, Entry,Exe_Space>::match_ordering(int option)
  {


    //printf("match_order called\n");

    /* ---- Tests --------

    INT_1DARRAY mperm;
    MALLOC_INT_1DARRAY(mperm, A.nrow);
    mc64(2,mperm);

    

    INT_1DARRAY mperm2;
    MALLOC_INT_1DARRAY(mperm2, A.nrow);
    mwm(A,mperm2);
    
    return 0;

    */
      
    Int job = 2; //5 is the default for SuperLU_DIST
    //INT_1DARRAY mperm = order_match_array;
    MALLOC_INT_1DARRAY(order_match_array, A.nrow);
    //MALLOC_INT_1DARRAY(mperm, A.nrow);
    mwm(A,order_match_array);
    //mc64(job,order_match_array);
    //mc64(job,mperm);

    match_flag = BASKER_TRUE;
    
    #ifdef BASKER_DEBUG_ORDER
    printf("Matching Perm \n");
    for(Int i = 0; i < A.nrow; i++)
      {
	printf("%d, \n", order_match_array(i));
	//printf("%d, \n", mperm[i]);
      }
    printf("\n");
    #endif

    //We want to test what the match ordering does if
    //have explicit zeros
    #ifdef BASKER_DEBUG_ORDER
    FILE *fp;
    fp = fopen("match_order.txt", "w");
    for(Int i = 0; i < A.nrow; i++)
      {
	fprintf(fp, "%d \n", order_match_array(i));
      }
    fclose(fp);
    #endif

    
    permute_row(A,order_match_array);
    //permute_row(A,mperm);
    //May have to call row_idx sort
    return 0;
    
  }//end match_ordering()
コード例 #17
0
  BASKER_INLINE
  void BaskerMatrix<Int,Entry,Exe_Space>::convert2D
  (
   BASKER_MATRIX &M,
   BASKER_BOOL   alloc,
   Int kid
  )
  {
    if(nnz == 0)
    {
      for(Int i = 0; i < ncol+1; i++)
      {
        col_ptr(i) = 0;
      }

      MALLOC_INT_1DARRAY(row_idx, 1);
      row_idx(0) = (Int) 0;
      MALLOC_ENTRY_1DARRAY(val, 1);
      val(0) = (Entry) 0;
      return;
    }

    //info();
    //We could check some flag ??
    //We assume a pre-scan has already happened
    if(alloc == BASKER_TRUE)
    {
      //printf("ALLOC\n");
      if(nnz > 0)
      {
        BASKER_ASSERT(nnz > 0, "matrix row nnz 2");
        MALLOC_INT_1DARRAY(row_idx, nnz);
      }
      else if(nnz ==0)
      {
        BASKER_ASSERT((nnz+1)>0, "matrix row nnz 3");
        MALLOC_INT_1DARRAY(row_idx, nnz+1);
      }
    }
    //init_value(row_idx, nnz, (Int) 0);
    //printf("clear row: %d \n", nnz);
    for(Int i = 0; i < nnz; ++i)
    {
      //printf("clear row_idx(%d) \n", i);
      row_idx(i) = 0;
    }

    if(alloc == BASKER_TRUE)
    {
      if(nnz > 0)
      {
        BASKER_ASSERT(nnz > 0, "matrix nnz 4");
        MALLOC_ENTRY_1DARRAY(val, nnz);
      }
      else if(nnz == 0)
      {
        BASKER_ASSERT((nnz+1) > 0, "matrix nnz 5");
        MALLOC_ENTRY_1DARRAY(val, nnz+1);
      }
    }
    //init_value(val, nnz, (Entry) 0);
    for(Int i = 0; i < nnz; ++i)
    {
      val(i) = 0;
    }

    Int temp_count = 0;

    for(Int k = scol; k < scol+ncol; ++k)
    {
      //note col_ptr[k-scol] contains the starting index
      if(col_ptr(k-scol) == BASKER_MAX_IDX)
      {
        col_ptr(k-scol) = temp_count;
        //printf("continue called, k: %d  \n", k);
        continue;
      }

      for(Int i = col_ptr(k-scol); i < M.col_ptr(k+1); i++)
      {
        Int j = M.row_idx(i);
        //printf("i: %d j:%d \n", i,j);
        if(j >= srow+nrow)
        {
          break;
        }
        //printf("writing row_dix: %d i: %d  val: %d nnz: %d srow: %d nrow: %d \n",
        //	   temp_count, i, j, nnz, 
        //	   srow, nrow);
        //BASKER_ASSERT(temp_count < nnz, "2DConvert, too many values");

        //row_idx[temp_count] = j;

        if(j-srow <0)
        {
          std::cout << "kid: " << kid 
                    << " j: " << j 
                    << " srow: " << srow 
                    << " k: " << k
                    << " idx: " << i
                    << std::endl;
          BASKER_ASSERT(0==1, "j-srow NO");
        }


        row_idx(temp_count) = j-srow;
        val(temp_count) = M.val(i);
        temp_count++;
      }
      col_ptr(k-scol) = temp_count;
    }

    //col_ptr[0] = 0;

    //NO!!1
    //Slide over the col counts
    for(Int i = ncol; i > 0; i--)
    {
      col_ptr(i) = col_ptr(i-1);
    }
    col_ptr(0) = (Int) 0;

    //info();
    //print();

  }//end convert2d(Matrix)
コード例 #18
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::solve_interface
  (
   Entry *_x, //Solution (len = gn)
   Entry *_y
   )
  {

    //Need to modify to use global perm 
    INT_1DARRAY temp_array;
    MALLOC_INT_1DARRAY(temp_array, gn);
  
    //===== Move to view===========
    ENTRY_1DARRAY  x;
    ENTRY_1DARRAY  y;

    MALLOC_ENTRY_1DARRAY(x, gn);
    MALLOC_ENTRY_1DARRAY(y, gm);
    
    for(Int i =0; i < gn; i++)
      {
	x(i) = (Entry) 0;
        y(i) = (Entry) _y[i];
      }
    
    //printf("RHS: \n");
    //printVec(y, gn);
    //printf("\n");


    //===== Permute
    //printf("Permute RHS\n");
    //==== Need to make this into one global perm
    if(match_flag == BASKER_TRUE)
      {
	//printf("match order\n");
	//printVec("match.txt", order_match_array, gn);
	permute_inv(y,order_match_array, gn);
      }
    if(btf_flag == BASKER_TRUE)
      {
	//printf("btf order\n");
	//printVec("btf.txt", order_btf_array, gn);
	permute_inv(y,order_btf_array, gn);
        //printVec("btf_amd.txt", order_c_csym_array, gn);
        permute_inv(y,order_blk_amd_array, gn);

      }
    if(nd_flag == BASKER_TRUE)
      {
	//printf("ND order \n");
	//printVec("nd.txt", part_tree.permtab, gn);
	for(Int i = 0; i < BTF_A.ncol; ++i)
	  {
	    temp_array(i) = part_tree.permtab(i);
	  }
	for(Int i = BTF_A.ncol; i < gn; ++i)
	  {
	    temp_array(i) = i;
	  }
	//permute_inv(y,part_tree.permtab, gn);
	permute_inv(y, temp_array,gn);
      }
    if(amd_flag == BASKER_TRUE)
      {
	//printf("AMD order \n");
	//printVec("amd.txt",order_csym_array, gn);
	for(Int i = 0; i < BTF_A.ncol; ++i)
	  {
	    temp_array(i) = order_csym_array(i);
	  }
	for(Int i = BTF_A.ncol; i < gn; ++i)
	  {
	    temp_array(i) = i;
	  }
	//permute_inv(y,order_csym_array, gn);
	permute_inv(y,temp_array, gn);
      }


    //printVec("perm.txt" , gperm, gn);
    permute_inv(y,gperm, gn);


    solve_interface(x,y);

    //Inverse perm
    //Note: don't need to inverse a row only perm
    if(btf_flag == BASKER_TRUE)
      {
	//printf("btf order\n");
	//printVec(order_btf_array, gn);
	permute(x,order_btf_array, gn);
      }
    if(nd_flag == BASKER_TRUE)
      {
	//printf("ND order \n");
	//printVec(part_tree.permtab, gn);
	for(Int i = 0; i < BTF_A.ncol; ++i)
	  {
	    temp_array(i) = part_tree.permtab(i);
	  }
	for(Int i = BTF_A.ncol; i < gn; i++)
	  {
	    temp_array(i) = i;
	  }
	//permute(x,part_tree.permtab, gn);
	permute(x,temp_array, gn);
      }
    if(amd_flag == BASKER_TRUE)
      {
	//printf("AMD order \n");
	//printVec(order_csym_array, gn);
	for(Int i = 0; i < BTF_A.ncol; ++i)
	  {
	    temp_array(i) = order_csym_array(i);
	  }
	for(Int i = BTF_A.ncol; i < gn; ++i)
	  {
	    temp_array(i) = order_csym_array(i);
	  }
	//permute(x,order_csym_array, gn);
	permute(x,temp_array,gn);
      }
   


    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("\n\n");
    printf("X: \n");
    for(Int i = 0; i < gn; i++)
      {
	printf("%f, " , x(i));
      }
    printf("\n\n");
    printf("RHS: \n");
    for(Int i =0; i < gm; i++)
      {
	printf("%f, ", y(i)); 
      }
    printf("\n\n");
    #endif

    for(Int i = 0; i < gn; i++)
      {
        _x[i] = x(i);
      } 

   
    FREE_ENTRY_1DARRAY(x);
    FREE_ENTRY_1DARRAY(y);
    FREE_INT_1DARRAY(temp_array);
   

    return 0;
  }
コード例 #19
0
ファイル: basker_order_amd.hpp プロジェクト: agrippa/Trilinos
  void Basker<Int,Entry,Exe_Space>::blk_amd(BASKER_MATRIX &M, INT_1DARRAY p)
  {
    
    //p == length(M)
    //Scan over all blks
    //Note, that this needs to be made parallel in the 
    //future (Future Josh will be ok with this, right?)

    //This is a horrible way to do this!!!!!
    //KLU does this very nice, but they also make all the little blks
    INT_1DARRAY temp_col;
    MALLOC_INT_1DARRAY(temp_col, M.ncol+1);
    INT_1DARRAY temp_row;
    MALLOC_INT_1DARRAY(temp_row, M.nnz);


    for(Int b = btf_tabs_offset; b < btf_nblks; b++)
      {
	Int blk_size = btf_tabs(b+1) - btf_tabs(b);
	if(blk_size < 3)
	  {
	    
	    //printf("debug, blk_size: %d \n", blk_size);
	    for(Int ii = 0; ii < blk_size; ++ii)
	      {
		//printf("set %d \n", btf_tabs(b)+ii-M.scol);
		p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol;
	      }
	    continue;
	  }
	
	INT_1DARRAY tempp;
	MALLOC_INT_1DARRAY(tempp, blk_size+1);
	
	
	//Fill in temp matrix
	Int nnz = 0;
	Int column = 1;
	temp_col(0) = 0;
	for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++)
	  {
	    for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++)
	      {
		if(M.row_idx(i) < btf_tabs(b))
		  continue;
		  
		temp_row(nnz) = M.row_idx(i) - btf_tabs(b);
		nnz++;
	      }// end over all row_idx
	    temp_col(column) = nnz;
	    column++;
	  }//end over all columns k
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("col_ptr: ");
	for(Int i = 0 ; i < blk_size+1; i++)
	  {
	    printf("%d, ", temp_col(i));
	  }
	printf("\n");
	printf("row_idx: ");
	for(Int i = 0; i < nnz; i++)
	  {
	    printf("%d, ", temp_row(i));
	  }
	printf("\n");
	#endif


	BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), 
					&(temp_row(0)),&(tempp(0)));


	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("blk: %d order: \n", b);
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    printf("%d, ", tempp(ii));
	  }
	#endif

				     
	//Add to the bigger perm vector
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    //printf("loc: %d val: %d \n", 
	    //ii+btf_tabs(b), tempp(ii)+btf_tabs(b));

	    p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b);
	  }


	FREE_INT_1DARRAY(tempp);
	
      }//over all blk_tabs

    #ifdef BASKER_DEBUG_AMD_ORDER
    printf("blk amd final order\n");
    for(Int ii = 0; ii < M.ncol; ii++)
      {
	printf("%d, ", p(ii));
      }
    printf("\n");
    #endif

    FREE_INT_1DARRAY(temp_col);
    FREE_INT_1DARRAY(temp_row);
    
  }//end blk_amd()
コード例 #20
0
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::factor_inc_lvl(Int option)
  {

    printf("Factor Inc Level Called \n");

    gn = A.ncol;
    gm = A.nrow;

    if(Options.btf == BASKER_TRUE)
      {

	//JDB: We can change this for the new inteface

	//call reference copy constructor
	gn = A.ncol;
	gm = A.nrow;
	A = BTF_A; 
	//printf("\n\n Switching A, newsize: %d \n",
	//   A.ncol);
	//printMTX("A_FACTOR.mtx", A);
      }
   

    //Spit into Domain and Sep
    //----------------------Domain-------------------------//
    #ifdef BASKER_KOKKOS

    //====TIMER==
    #ifdef BASKER_TIME
    Kokkos::Impl::Timer       timer;
    #endif
    //===TIMER===

    typedef Kokkos::TeamPolicy<Exe_Space>        TeamPolicy;

    if(btf_tabs_offset != 0)
      {

	kokkos_nfactor_domain_inc_lvl <Int,Entry,Exe_Space>
	  domain_nfactor(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1), 
			     domain_nfactor);
	Kokkos::fence();
    

	//=====Check for error======
	while(true)
	  {
	INT_1DARRAY thread_start;
	MALLOC_INT_1DARRAY(thread_start, num_threads+1);
	init_value(thread_start, num_threads+1, 
		   (Int) BASKER_MAX_IDX);
	int nt = nfactor_domain_error(thread_start);
	if(nt == BASKER_SUCCESS)
	  {
	    break;
	  }
	else
	  {
	    printf("restart \n");
	    kokkos_nfactor_domain_remalloc <Int, Entry, Exe_Space>
	      diag_nfactor_remalloc(this, thread_start);
	    Kokkos::parallel_for(TeamPolicy(num_threads,1),
				 diag_nfactor_remalloc);
	    Kokkos::fence();
	  }
      }//end while
   

    //====TIMER===
    #ifdef BASKER_TIME
    printf("Time DOMAIN: %f \n", timer.seconds());
    timer.reset();
    #endif
    //====TIMER====
    

    #else// else basker_kokkos
    #pragma omp parallel
    {


    }//end omp parallel
    #endif //end basker_kokkos

      }
    //-------------------End--Domian--------------------------//

   
    //---------------------------Sep--------------------------//

    
    
    if(btf_tabs_offset != 0)
      {
        //for(Int l=1; l<=1; l++)
       for(Int l=1; l <= tree.nlvls; l++)
      {

        //Come back for syncs
        
	//#ifdef BASKER_OLD_BARRIER
	Int lthreads = pow(2,l);
	Int lnteams = num_threads/lthreads;
	//#else
	//Int lthreads = 1;
	//Int lnteams = num_threads/lthreads;
	//#endif

	
	//printf("\n\n   ============ SEP: %d ======\n\n",l);

	#ifdef BASKER_KOKKOS
	Kokkos::Impl::Timer  timer_inner_sep;
	#ifdef BASKER_NO_LAMBDA
       
	kokkos_nfactor_sep2_inc_lvl <Int, Entry, Exe_Space>
	  sep_nfactor(this,l);
	
	Kokkos::parallel_for(TeamPolicy(lnteams,lthreads),
			     sep_nfactor);
	Kokkos::fence();
	
	#ifdef BASKER_TIME
	printf("Time INNERSEP: %d %f \n", 
	       l, timer_inner_sep.seconds());
	#endif
        #else //ELSE BASKER_NO_LAMBDA
	//Note: to be added
        #endif //end BASKER_NO_LAMBDA
	#else
	#pragma omp parallel
	{

	}//end omp parallel
	#endif
      }//end over each level

    #ifdef BASKER_TIME
    printf("Time SEP: %f \n", timer.seconds());
    #endif
      }

   
    //-------------------------End Sep----------------//


    //-------------------IF BTF-----------------------//
    if(Options.btf == BASKER_TRUE)
      {
	//=====Timer
	#ifdef BASKER_TIME
	Kokkos::Impl::Timer  timer_btf;
	#endif
	//====Timer
	
	//======Call diag factor====
	/*
	kokkos_nfactor_diag <Int, Entry, Exe_Space> 
	  diag_nfactor(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1),
			     diag_nfactor);
	Kokkos::fence();
	*/
	//=====Check for error======
	//while(true)
	// {
	    //INT_1DARRAY thread_start;
	    // MALLOC_INT_1DARRAY(thread_start, num_threads+1);
	    //init_value(thread_start, num_threads+1, 
	    //	       (Int) BASKER_MAX_IDX);
	    //int nt = nfactor_diag_error(thread_start);
	    // if(nt == BASKER_SUCCESS)
	    //  {
	    ///		break;
	    // }
	    //else
	    // {
		/*
		break;
		printf("restart \n");
		kokkos_nfactor_diag_remalloc <Int, Entry, Exe_Space>
		  diag_nfactor_remalloc(this, thread_start);
		Kokkos::parallel_for(TeamPolicy(num_threads,1),
				     diag_nfactor);
		Kokkos::fence();
		*/
	    //}
	    // }//end while

	//====TIMER
	#ifdef BASKER_TIME
	printf("Time BTF: %f \n", 
	       timer_btf.seconds());
	#endif
	//===TIMER

      }//end btf call

    
    return 0;
  }//end factor_lvl_inc()
コード例 #21
0
ファイル: basker_order_btf.hpp プロジェクト: petsc/Trilinos
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf2
  (
   BASKER_MATRIX &M
  )
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_nblks = nblks;

    btf_flag = BASKER_TRUE;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    //BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    //#endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    //#ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("num_threads: %d \n", num_threads);
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    // #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol);
    init_value(order_blk_amd_array, M.ncol, (Int)0);
    MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1);
    init_value(btf_blk_nnz, nblks+1, (Int) 0);
    MALLOC_INT_1DARRAY(btf_blk_work, nblks+1);
    init_value(btf_blk_work, nblks+1, (Int) 0);


    //Find AMD blk ordering, get nnz, and get work
    btf_blk_amd( M, order_blk_amd_array,
		 btf_blk_nnz, btf_blk_work);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("blk_perm:\n");
    for(Int i = 0; i < M.ncol; i++)
      {
	printf("(%d,%d) ", i, order_blk_amd_array(i));
      }
    printf("\n");
    printf("id/blk_size/blk_nnz/work: \n");
    for(Int i = 0; i < nblks; i++)
      {
	printf("(%d, %d, %d, %d) ", i,
	       btf_tabs(i+1)-btf_tabs(i), 
	       btf_blk_nnz(i), btf_blk_work(i));
      }
    printf("\n");
    #endif

    //printMTX("A_BEFORE.mtx", M);
    //printVec("AMD.txt", order_blk_amd_array, M.ncol);
    

    permute_col(M, order_blk_amd_array);
    permute_row(M, order_blk_amd_array);
    sort_matrix(M);

    //changed col to row, error.
    //print to see issue
    //printMTX("A_AMD.mtx", M);
    
       
    break_into_parts2(M, nblks, btf_tabs);

    //find schedule
    find_btf_schedule(M, nblks, btf_tabs);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    #endif

    return 0;
  }//end find BTF(nnz)
コード例 #22
0
ファイル: basker_order_amd.hpp プロジェクト: agrippa/Trilinos
  void Basker<Int,Entry,Exe_Space>::btf_blk_amd
  (
   BASKER_MATRIX &M, 
   INT_1DARRAY p, 
   INT_1DARRAY btf_nnz, 
   INT_1DARRAY btf_work
  )
  {
   

    // printf("=============BTF_BLK_AMD_CALLED========\n");
    if(Options.incomplete == BASKER_TRUE)
      {
	//We note that AMD on incomplete ILUK
	//Seems realy bad and leads to a zero on the diag
	//Therefore, we simply return the natural ordering
	for(Int i = 0 ; i < M.ncol; i++)
	  {
	    p(i) = i;
	  }
	//We will makeup work to be 1, 
	//Since BTF is not supported in our iluk
	for(Int b = 0; b < btf_nblks; b++)
	  {
	    btf_nnz(b) = 1;
	    btf_work(b) =1;
	  }
       
	//printf("Short amd blk\n");

	return;
      }

 
    //p == length(M)
    //Scan over all blks
    //Note, that this needs to be made parallel in the 
    //future (Future Josh will be ok with this, right?)

    //This is a horrible way to do this!!!!!
    //KLU does this very nice, but they also make all the little blks
    INT_1DARRAY temp_col;
    MALLOC_INT_1DARRAY(temp_col, M.ncol+1);
    INT_1DARRAY temp_row;
    MALLOC_INT_1DARRAY(temp_row, M.nnz);
    //printf("Done with btf_blk_amd malloc \n");
    //printf("blks: %d \n" , btf_nblks);


    for(Int b = 0; b < btf_nblks; b++)
      {
	Int blk_size = btf_tabs(b+1) - btf_tabs(b);

	//printf("blk: %d blk_size: %d \n",
	//     b, blk_size);

	if(blk_size < 3)
	  {
	    
	    //printf("debug, blk_size: %d \n", blk_size);
	    for(Int ii = 0; ii < blk_size; ++ii)
	      {
		//printf("set %d \n", btf_tabs(b)+ii-M.scol);
		p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol;
	      }
	    btf_work(b) = blk_size*blk_size*blk_size;
	    btf_nnz(b)  = (.5*(blk_size*blk_size) + blk_size);
	    continue;
	  }
	
	INT_1DARRAY tempp;
	MALLOC_INT_1DARRAY(tempp, blk_size+1);
	
	
	//Fill in temp matrix
	Int nnz = 0;
	Int column = 1;
	temp_col(0) = 0;
	for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++)
	  {
	    for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++)
	      {
		if(M.row_idx(i) < btf_tabs(b))
		  continue;
		  
		temp_row(nnz) = M.row_idx(i) - btf_tabs(b);
		nnz++;
	      }// end over all row_idx
	    temp_col(column) = nnz;
	    column++;
	  }//end over all columns k
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("col_ptr: ");
	for(Int i = 0 ; i < blk_size+1; i++)
	  {
	    printf("%d, ", temp_col(i));
	  }
	printf("\n");
	printf("row_idx: ");
	for(Int i = 0; i < nnz; i++)
	  {
	    printf("%d, ", temp_row(i));
	  }
	printf("\n");
	#endif


	double l_nnz = 0;
	double lu_work = 0;
	BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), 
					&(temp_row(0)),&(tempp(0)), 
					l_nnz, lu_work);


	btf_nnz(b)  = l_nnz;
	btf_work(b) = lu_work;
       
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("blk: %d order: \n", b);
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    printf("%d, ", tempp(ii));
	  }
	#endif

				     
	//Add to the bigger perm vector
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    //printf("loc: %d val: %d \n", 
	    //ii+btf_tabs(b), tempp(ii)+btf_tabs(b));

	    p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b);
	  }


	FREE_INT_1DARRAY(tempp);
	
      }//over all blk_tabs

    #ifdef BASKER_DEBUG_AMD_ORDER
    printf("blk amd final order\n");
    for(Int ii = 0; ii < M.ncol; ii++)
      {
	printf("%d, ", p(ii));
      }
    printf("\n");
    #endif

    FREE_INT_1DARRAY(temp_col);
    FREE_INT_1DARRAY(temp_row);
    
  }//end blk_amd()
コード例 #23
0
ファイル: shylubasker_def.hpp プロジェクト: trilinos/Trilinos
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::Factor(Int nrow, Int ncol,
	        Int nnz, Int *col_ptr, Int *row_idx, Entry *val) 
  {
    int err = 0;

    if (Options.verbose == BASKER_TRUE)
    {
      std::cout << "Basker Factor Called" << std::endl;
      std::cout << "Matrix: " << nrow << " " << ncol << " " << nnz << std::endl;
    }

    /*
       int err = A.copy_values(nrow, ncol, nnz, col_ptr, 
       row_idx, val);
    */

    if((Options.same_pattern == BASKER_TRUE) && (Options.no_pivot == BASKER_FALSE))
    {
      printf("Warning: Same Pattern will not allow pivoting\n");
      Options.no_pivot = BASKER_TRUE;
    }


    if(Options.transpose == BASKER_FALSE)
    {
      //printf("=======NO TRANS=====\n");
      //A.init_matrix("Original Matrix",
      //	      nrow, ncol, nnz, col_ptr, row_idx, val);
      //A.scol = 0;
      //A.srow = 0;
      A.copy_values(nrow, ncol, nnz, col_ptr,
          row_idx, val);
      //printf("Copy done\n");
      //printMTX("A_LOAD.mtx", A);
    }
    else
    {
      //printf("======TRANS=====\n");
      //Will transpose and put in A using little extra
      matrix_transpose(0, 
          nrow,
          0, 
          ncol,
          nnz,
          col_ptr,
          row_idx,
          val,
          A);
    }
    sort_matrix(A);
    if(Options.verbose_matrix_out == BASKER_TRUE)
    {
      printMTX("A_Factor.mtx", A);
    }

    matrix_flag = BASKER_TRUE;

    if(err == BASKER_ERROR)
    {
      return BASKER_ERROR;
    }
    //err = sfactor_copy();
    err = sfactor_copy2();
    if (Options.verbose == BASKER_TRUE)
    {
      printf("Basker Copy Structure Done \n");
    }

    //printf("Done with sfactor_copy: %d \n", err);
    if(err == BASKER_ERROR)
    {
      return BASKER_ERROR;
    }
    //printf("before notoken\n");

    //Kokkos::Impl::Timer timer;

    if(Options.incomplete == BASKER_FALSE)    
    {
      err = factor_notoken(0);
      //printf("Notoken called\n");
    }
    else
    {
      err = factor_inc_lvl(0);
    }
    if(err == BASKER_ERROR)
    {
      return BASKER_ERROR;
    }

    if(Options.verbose == BASKER_TRUE)
    {
      printf("Basker Factor Done \n");
    }

    /*
       std::cout << "Raw Factor Time: "
       << timer.seconds()
       << std::endl;
       */

    //DEBUG_PRINT();

    // NDE
    MALLOC_ENTRY_1DARRAY(x_view_ptr_copy, gn); //used in basker_solve_rhs - move alloc
    MALLOC_ENTRY_1DARRAY(y_view_ptr_copy, gm);
    MALLOC_INT_1DARRAY(perm_inv_comp_array , gm); //y
    MALLOC_INT_1DARRAY(perm_comp_array, gn); //x

    MALLOC_INT_1DARRAY(perm_comp_iworkspace_array, gn);
    MALLOC_ENTRY_1DARRAY(perm_comp_fworkspace_array, gn);
    permute_composition_for_solve();

    factor_flag = BASKER_TRUE;

    return 0;
  }//end Factor()
コード例 #24
0
ファイル: basker_nfactor.hpp プロジェクト: agrippa/Trilinos
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::factor_notoken(Int option)
  {

    //printf("factor no token called \n");

    gn = A.ncol;
    gm = A.nrow;
    BASKER_MATRIX ATEMP;

    //Kokkos::Impl::Timer tza;
    if(Options.btf == BASKER_TRUE)
      {
	//JDB: We can change this for the new inteface
	gn = A.ncol;
	gm = A.nrow;
	ATEMP = A;
	A = BTF_A; 
      }
    //printf("Switch time: %f \n", tza.seconds());

   

    //Spit into Domain and Sep
    //----------------------Domain-------------------------//
    #ifdef BASKER_KOKKOS

    //====TIMER==
    #ifdef BASKER_TIME
    Kokkos::Impl::Timer       timer;
    #endif
    //===TIMER===

    typedef Kokkos::TeamPolicy<Exe_Space>        TeamPolicy;

    if(btf_tabs_offset != 0)
      {

	if(Options.verbose == BASKER_TRUE)
	  {
	    printf("Factoring Dom num_threads: %d \n",
		   num_threads);
	  }


	Int domain_restart = 0;
	kokkos_nfactor_domain <Int,Entry,Exe_Space>
	  domain_nfactor(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1), 
			     domain_nfactor);
	Kokkos::fence();
    

    //=====Check for error======
    while(true)
      {
	INT_1DARRAY thread_start;
	MALLOC_INT_1DARRAY(thread_start, num_threads+1);
	init_value(thread_start, num_threads+1, 
		   (Int) BASKER_MAX_IDX);
	int nt = nfactor_domain_error(thread_start);
	if((nt == BASKER_SUCCESS) ||
	   (nt == BASKER_ERROR) ||
	   (domain_restart > BASKER_RESTART))
	  {
	    break;
	  }
	else
	  {
	    domain_restart++;
	    if(Options.verbose == BASKER_TRUE)
	      {
		printf("restart \n");
	      }
	    kokkos_nfactor_domain_remalloc <Int, Entry, Exe_Space>
	      diag_nfactor_remalloc(this, thread_start);
	    Kokkos::parallel_for(TeamPolicy(num_threads,1),
				 diag_nfactor_remalloc);
	    Kokkos::fence();
	  }
      }//end while

    //====TIMER===
    #ifdef BASKER_TIME
    printf("Time DOMAIN: %f \n", timer.seconds());
    timer.reset();
    #endif
    //====TIMER====
    

    #else// else basker_kokkos
    #pragma omp parallel
    {


    }//end omp parallel
    #endif //end basker_kokkos

      }
    //-------------------End--Domian--------------------------//
    
    //printVec("domperm.csc", gpermi, A.nrow);
   
    //---------------------------Sep--------------------------//
   
    if(btf_tabs_offset != 0)
      {
    //for(Int l=1; l<=4; l++)
    for(Int l=1; l <= tree.nlvls; l++)
      {

	//#ifdef BASKER_OLD_BARRIER
	//Int lthreads = pow(2,l);
	//Int lnteams = num_threads/lthreads;
	//#else
	Int lthreads = 1;
	Int lnteams = num_threads/lthreads;
	//#endif

	Int sep_restart = 0;


	if(Options.verbose == BASKER_TRUE)
	  {
	    printf("Factoring Sep num_threads: %d %d \n",
		   lnteams, lthreads);
	  }

	#ifdef BASKER_KOKKOS
	Kokkos::Impl::Timer  timer_inner_sep;
	#ifdef BASKER_NO_LAMBDA

	//kokkos_nfactor_sep <Int, Entry, Exe_Space> 
	//sep_nfactor(this, l);

	kokkos_nfactor_sep2 <Int, Entry, Exe_Space>
	  sep_nfactor(this,l);
	
	Kokkos::parallel_for(TeamPolicy(lnteams,lthreads),
			     sep_nfactor);
	Kokkos::fence();

	//======Check for error=====
	while(true)
	  {
	    INT_1DARRAY thread_start;
	    MALLOC_INT_1DARRAY(thread_start, num_threads+1);
	    init_value(thread_start, num_threads+1,
		      (Int) BASKER_MAX_IDX);
	    int nt = nfactor_sep_error(thread_start);
	    if((nt == BASKER_SUCCESS)||
	       (nt == BASKER_ERROR) ||
	       (sep_restart > BASKER_RESTART))
	      {
		FREE_INT_1DARRAY(thread_start);
		break;
	      }
	    else
	      {
		sep_restart++;
		if (Options.verbose == BASKER_TRUE)
		  {
		    printf("restart \n");
		  }
		Kokkos::parallel_for(TeamPolicy(lnteams,lthreads),  sep_nfactor);
		Kokkos::fence();

	      }
	  }//end while-true


	#ifdef BASKER_TIME
	printf("Time INNERSEP: %d %f \n", 
	       l, timer_inner_sep.seconds());
	#endif
        #else //ELSE BASKER_NO_LAMBDA
	//Note: to be added
        #endif //end BASKER_NO_LAMBDA
	#else
	#pragma omp parallel
	{

	}//end omp parallel
	#endif
      }//end over each level

    #ifdef BASKER_TIME
    printf("Time SEP: %f \n", timer.seconds());
    #endif
      }
    
    //-------------------------End Sep----------------//


    //-------------------IF BTF-----------------------//
    if(Options.btf == BASKER_TRUE)
      {

	Int btf_restart = 0;

	if(Options.verbose == BASKER_TRUE)
	  {
	    printf("Factoring BLKs num_threads: %d \n",
		   num_threads);
	  }

	//=====Timer
	#ifdef BASKER_TIME
	Kokkos::Impl::Timer  timer_btf;
	#endif
	//====Timer
	
	//======Call diag factor====
	kokkos_nfactor_diag <Int, Entry, Exe_Space> 
	  diag_nfactor(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1),
			     diag_nfactor);
	Kokkos::fence();
	
	//=====Check for error======
	while(true)
	  {
	    INT_1DARRAY thread_start;
	    MALLOC_INT_1DARRAY(thread_start, num_threads+1);
	    init_value(thread_start, num_threads+1, 
		       (Int) BASKER_MAX_IDX);
	    int nt = nfactor_diag_error(thread_start);
	    //printf("RETURNED: %d \n", nt);
	    if((nt == BASKER_SUCCESS) || 
	       (nt == BASKER_ERROR) ||
	       (btf_restart > BASKER_RESTART))
	      {
		break;
	      }
	    else
	      {
		btf_restart++;
		if (Options.verbose == BASKER_TRUE)
		  {
		    printf("restart \n");
		  }
		kokkos_nfactor_diag_remalloc <Int, Entry, Exe_Space>
		  diag_nfactor_remalloc(this, thread_start);
		Kokkos::parallel_for(TeamPolicy(num_threads,1),
				     diag_nfactor_remalloc);
		Kokkos::fence();
	      }
	  }//end while

	//====TIMER
	#ifdef BASKER_TIME
	printf("Time BTF: %f \n", 
	       timer_btf.seconds());
	#endif
	//===TIMER

      }//end btf call

    Kokkos::Impl::Timer tzback;
     if(Options.btf == BASKER_TRUE)
      {
	A = ATEMP;
      }
     //printf("Switch back: %f \n",
     //	    tzback.seconds());
    
    return 0;
  }//end factor_notoken()
コード例 #25
0
ファイル: basker_order_btf.hpp プロジェクト: petsc/Trilinos
  BASKER_INLINE
  int Basker<Int, Entry,Exe_Space>::break_into_parts2
  (
   BASKER_MATRIX &M,
   Int           nblks,
   INT_1DARRAY   btf_tabs
   )
  {

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("break_into_parts2 called \n");
    printf("nblks: %d \n", nblks);
    #endif
    
    Options.btf = BASKER_TRUE;

    //Alg.  
    // A -> [BTF_A  BTF_B] 
    //      [0      BTF_C]
    //1. Run backward through the btf_tabs to find size C
    //2. Form A,B,C based on size in 1.

    //Short circuit, 
    //If nblks  == 1, than only BTF_A exists
    if(nblks == 1)
      {
        //#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Short Circuit part_call \n");
	//#endif
	BTF_A = A;
	//Options.btf = BASKER_FALSE;
	btf_tabs_offset = 1;
	return 0;
      }

    //Step 1.
    //Find total work estimate
    Int total_work_estimate = 0;
    for(Int b = 0; b < nblks; b++)
      {
	total_work_estimate += btf_blk_work(b);
      }
    //Set a class variable to use later
    btf_total_work = total_work_estimate;
    //printf("Total work estimate: %d \n",
    //	   total_work_estimate);
    //printf("num_threads: %d epsilon: %f \n",
    //	   num_threads, 
    //	   ((double)1/num_threads) +
    //	   ((double)BASKER_BTF_IMBALANCE));
    Int break_size    = ceil((double)total_work_estimate*(
		((double)1/num_threads) + 
		((double)BASKER_BTF_IMBALANCE)));

    printf("Break size: %d \n", break_size);

    Int t_size            = 0;
    Int scol              = M.ncol;
    Int blk_idx           = nblks;
    BASKER_BOOL  move_fwd = BASKER_TRUE;

    while(move_fwd==BASKER_TRUE)
      {

	//printf("------TEST blk_idx: %d \n",
	// blk_idx);
	Int blk_work = btf_blk_work(blk_idx-1);
	Int blk_size  = btf_tabs(blk_idx) - 
	                        btf_tabs(blk_idx-1);


	#ifdef BASKER_DEBUG_ORDER_BTF
	printf(" \n move_fwd loop \n");
	BASKER_ASSERT(blk_idx>=0, "btf blk idx off");
	BASKER_ASSERT(blk_work>=0, "btk_work wrong");
	BASKER_ASSERT(blk_size>0, "btf blk size wrong");
	printf("blk_idx: %d blk_work: %d break_size: %d \n",
	       blk_idx, blk_work, break_size);
	#endif

	//Should be end
	//if(((blk_work < break_size) ||
	//  (blk_size < BASKER_BTF_SMALL)) &&
	//  (blk_idx > 1))
       
	//Continue to be in btf
	if(((blk_work < break_size) &&
	    (blk_idx > 1)))
	  {
	    #ifdef BASKER_DEBUG_ORDER_BTF
	    printf("first choice \n");
	    #endif

	    t_size = t_size+blk_size;
	    blk_idx = blk_idx-1;
	    scol   = btf_tabs[blk_idx];
	   
	  }
	//break due to size
	else if(blk_work >= break_size)
	  {
	    printf("break due to size\n");
	    move_fwd = BASKER_FALSE;
	  }
	//break due to end
	else if(blk_idx == 1)
	  {
	    printf("break last blk\n");
	    blk_idx = 0;
	    t_size = t_size + blk_size;
	    scol = btf_tabs[blk_idx];	
	    move_fwd = BASKER_FALSE;

	  }
	//should not be called
	else
	  {
	    BASKER_ASSERT(1==0, "btf order break");
	    move_fwd = BASKER_FALSE;
	  }
      }//end while(move_fwd)

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("Done finding BTF2 cut.  Cut size: %d scol: %d \n",
	   t_size, scol);
    printf("Done finding BTF2 cut. blk_idx: %d \n", 
	   blk_idx);
    //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n");
    
    BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n");
    //#endif
    
    //Comeback and change
    btf_tabs_offset = blk_idx;


    //2. Move into Blocks

    if(btf_tabs_offset != 0)
      {
    //--Move A into BTF_A;
    BTF_A.set_shape(0, scol, 0, scol);
    BTF_A.nnz = M.col_ptr(scol);
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Init BTF_A. ncol: %d nnz: %d \n",
	   scol, BTF_A.nnz);
    #endif

    if(BTF_A.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr");
	MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1);
	BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz");
	MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz);
	BTF_A.fill();
      }
    
    Int annz = 0;
    for(Int k = 0; k < scol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("copy column: %d into A_BTF, [%d %d] \n", 
	       k, M.col_ptr(k), M.col_ptr(k+1));
	#endif
	
	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    //printf("annz: %d i: %d \n", annz, i);
	    BTF_A.row_idx(annz) = M.row_idx(i);
	    BTF_A.val(annz)     = M.val(i);
	    annz++;
	  }
	
	BTF_A.col_ptr(k+1) = annz;
      }

      }//no A
 
    //Fill in B and C at the same time
    INT_1DARRAY cws;
    BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC");
    MALLOC_INT_1DARRAY(cws, M.ncol-scol+1);
    init_value(cws, M.ncol-scol+1, (Int)M.ncol);
    BTF_B.set_shape(0   , scol,
		    scol, M.ncol-scol);
    BTF_C.set_shape(scol, M.ncol-scol,
		    scol, M.ncol-scol);

    
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Set Shape BTF_B: %d %d %d %d \n",
	   BTF_B.srow, BTF_B.nrow,
	   BTF_B.scol, BTF_B.ncol);
    printf("Set Shape BTF_C: %d %d %d %d \n",
	   BTF_C.srow, BTF_C.nrow,
	   BTF_C.scol, BTF_C.nrow);
    #endif
    
    //Scan and find nnz
    //We can do this much better!!!!
    Int bnnz = 0;
    Int cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Lower, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		cnnz++;
	      }
	  }//over all nnz in k
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF_B nnz: %d \n", bnnz);
    printf("BTF_C nnz: %d \n", cnnz);
    #endif
    
    BTF_B.nnz = bnnz;
    BTF_C.nnz = cnnz;
    
    //Malloc need space
    if((BTF_B.v_fill == BASKER_FALSE) &&
       (BTF_B.nnz > 0))
      //if(BTF_B.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol");
	MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1);
	BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz");
	MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz);
	BTF_B.fill();
      }
    if(BTF_C.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol");
	MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1);
	BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz");
	MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz);
	BTF_C.fill();
      }

    //scan again (Very bad!!!)
    bnnz = 0;
    cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx[i]);
		#endif
		
		BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit");
		//BTF_B.row_idx[bnnz] = M.row_idx[i];
		//Note: do not offset because B srow = 0
		BTF_B.row_idx(bnnz) = M.row_idx(i);
		BTF_B.val(bnnz)     = M.val(i);
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz Lower,k: %d  %d %d %f \n",
		       k, scol, M.row_idx[i], 
		       M.val(i));
		#endif
		//BTF_C.row_idx[cnnz] = M.row_idx[i];
		BTF_C.row_idx(cnnz) = M.row_idx(i)-scol;
		BTF_C.val(cnnz)     = M.val(i);
		cnnz++;
	      }
	  }//over all nnz in k
	if(BTF_B.nnz > 0)
	  {
	    BTF_B.col_ptr(k-scol+1) = bnnz;
	  }
	BTF_C.col_ptr(k-scol+1) = cnnz;
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("After BTF_B nnz: %d \n", bnnz);
    printf("After BTF_C nnz: %d \n", cnnz);
    #endif

    //printf("\n\n");
    //printf("DEBUG\n");
    //BTF_C.print();
    //printf("\n\n");
    
    return 0;

  }//end break_into_parts2 (based on imbalance)