예제 #1
0
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::SetThreads(Int nthreads)
  {
    //Need to test if power of 2.
    if((nthreads != 1) && (nthreads%2 != 0))
    {
      BASKER_ASSERT(0==1, "Number of thread error");
      //Set default 1
      num_threads = 1;
      return BASKER_ERROR;
    }

    //Next test if Kokkos has that many threads!
    //This is a common mistake in mpi-based apps
#ifdef KOKKOS_HAVE_OPENMP
    int check_value = Kokkos::OpenMP::max_hardware_threads();
    if(nthreads > check_value)
    {
      BASKER_ASSERT(0==1, "Number of thread not aval in Kokkos");
      num_threads =  1;
      return BASKER_ERROR;
    }
#else
    nthreads = 1;
#endif

    num_threads = nthreads;
    return BASKER_SUCCESS;
  }//end SetThreads()
예제 #2
0
  BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::upper_tri_solve
  (
   BASKER_MATRIX &M,
   ENTRY_1DARRAY x,
   ENTRY_1DARRAY y
   )
  {
    const Int bcol = M.scol;
    const Int brow = M.srow;
    
    //printf("Upper Tri Solve, scol: %d ncol: %d \n",
    //	   M.scol, M.ncol);

    //end over all columns
    for(Int k = M.ncol; k >= 1; k--)
      {
	
	//printf("Upper Tri Solve: k: %d \n", k);

	#ifdef BASKER_DEBUG_SOLVE_RHS
	BASKER_ASSERT(M.val[M.col_ptr[k]-1]!=0.0,"UpperPivot\n");
	#endif

	//if(M.val[M.col_ptr[k]-1]==0.0)
	if(M.val(M.col_ptr(k)-1)==0)
	  {
	    printf("Upper pivot: %d %f \n",
		   M.row_idx[M.col_ptr[k]-1],
		   M.val[M.col_ptr[k]-1]);
	    return -1;
	  }


	//printf("TEST, k: %d  out: %f in: %f pivot: %f\n", 
	//   k, y[k+bcol-1], x[k+bcol-1], 
	//   M.val[M.col_ptr[k]-1]);

	//Comeback and do with and entry divide
	//y[k+bcol-1] = x[k+bcol-1] / M.val[M.col_ptr[k]-1];
	y(k+brow-1)  =  x(k+bcol-1) / M.val(M.col_ptr(k)-1);
	
	
	//for(Int i = M.col_ptr[k]-2; i >= M.col_ptr[k-1]; i--)
	for(Int i = M.col_ptr(k)-2; i >= M.col_ptr(k-1); --i)
	  {
	    //Int j = M.row_idx[i];
	    const Int j = M.row_idx(i);

	    // printf("Updating row_idx: %d %f %f \n",
	    //	   j, x[j], M.val[i]*y[k+bcol-1]);

	    //x[j] -=  M.val[i]*y[k+bcol-1];
	    x(j+brow) -= M.val(i) * y(k+bcol-1);
	  }

      }//end over all columns

    return 0;
  }//end upper_tri_solve
예제 #3
0
  BASKER_INLINE
  void BaskerMatrix<Int,Entry,Exe_Space>::malloc_union_bit()
  {

    BASKER_ASSERT(nrow > 0, "matrix_malloc_union_bit");
    MALLOC_BOOL_1DARRAY(union_bit, nrow);

  }//end malloc_union_bit
예제 #4
0
 BASKER_INLINE
 void BaskerMatrix<Int, Entry, Exe_Space>::init_col()
 {
   BASKER_ASSERT(ncol >= 0, "INIT_COL, ncol > 0");
   MALLOC_INT_1DARRAY(col_ptr, ncol+1);
   for(Int i = 0; i < ncol+1; ++i)
   {
     col_ptr(i) = (Int) BASKER_MAX_IDX;
   }
 }//end init_col()
예제 #5
0
  BASKER_INLINE
  void BaskerMatrix<Int,Entry,Exe_Space>::malloc_perm(Int n)
  {
    BASKER_ASSERT(n > 0, "matrix malloc_perm");
    MALLOC_INT_1DARRAY(lpinv,n);    
 
    //Fix later.  //NDE determine what the issue is...
    init_perm();

  }//end init_perm(Int)
예제 #6
0
  BASKER_INLINE
  void BaskerMatrix<Int, Entry, Exe_Space>::init_vectors
  (
   Int _m, Int _n, Int _nnz
   )
  {
    nrow = _m;
    ncol = _n;
    nnz  = _nnz;
    mnnz = _nnz;
    
    if(ncol >= 0)
      {
	BASKER_ASSERT((ncol+1)>0, "matrix init_vector ncol");
	MALLOC_INT_1DARRAY(col_ptr,ncol+1);
      }
    if(nnz > 0)
      {
	BASKER_ASSERT(nnz > 0, "matrix init_vector nnz");
	MALLOC_INT_1DARRAY(row_idx,nnz);
	MALLOC_ENTRY_1DARRAY(val,nnz);
	#ifdef BASKER_INC_LVL
	MALLOC_INT_1DARRAY(inc_lvl, nnz);
	#endif

      }
    else if(nnz==0)
      {

	BASKER_ASSERT((nnz+1)>0, "nnz+1 init_vector ");
	MALLOC_INT_1DARRAY(row_idx, nnz+1);
	row_idx(0) = (Int) 0;
	MALLOC_ENTRY_1DARRAY(val, nnz+1);
	val(0) = (Entry) 0;
	#ifdef BASKER_INC_LVL
	MALLOC_INT_1DARRAY(inc_lvl, nnz+1);
	#endif

      }

  }//end init_vectors(Int, Int, Int)
예제 #7
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf(BASKER_MATRIX &M)
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_flag = BASKER_TRUE;

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    #endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    #ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    break_into_parts(M, nblks, btf_tabs);

    btf_nblks = nblks;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    //#endif

    return 0;
  }//end find BTF
예제 #8
0
 void BaskerMatrix<Int,Entry,Exe_Space>::init_pend()
 {
   if(ncol > 0)
   {
     BASKER_ASSERT((ncol+1)>0, "matrix init_pend")
     MALLOC_INT_1DARRAY(pend,ncol+1);
     for(Int i =0; i < ncol+1; ++i)
     {
       pend(i) = BASKER_MAX_IDX;
     }
   }
 }//end init_pend()
예제 #9
0
  BASKER_FINLINE
  void Basker<Int, Entry,Exe_Space>::csymamd_order
  (
   BASKER_MATRIX &M,
   INT_1DARRAY p,
   INT_1DARRAY cmember
   )
  {    

    amd_flag = BASKER_TRUE;

    //Debug,
    #ifdef BASKER_DEBUG_ORDER_AMD
    printf("cmember: \n");
    for(Int i = 0; i < M.ncol; ++i)
      {
	printf("(%d, %d), ", i, cmember(i));
      }
    printf("\n"); 
    #endif
    
    //If doing  iluk, we will not want this.
    //See amd blk notes
    if(Options.incomplete == BASKER_TRUE)
      {
	for(Int i = 0; i < M.ncol; i++)
	  {
	    p(i) = i;
	  }
	//printf("Short csym \n");
	return;
      }



    INT_1DARRAY temp_p;
    BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough");
    MALLOC_INT_1DARRAY(temp_p, M.ncol+1);
    init_value(temp_p, M.ncol+1, (Int) 0);
    
    my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)),
		     &(temp_p(0)), &(cmember(0)));


    for(Int i = 0; i < M.ncol; ++i)
      {
	p(temp_p(i)) = i;
      }


  }//end csymamd()
예제 #10
0
  BASKER_FINLINE
  void Basker<Int, Entry,Exe_Space>::csymamd_order
  (
   BASKER_MATRIX &M,
   INT_1DARRAY p,
   INT_1DARRAY cmember
   )
  {    

    amd_flag = BASKER_TRUE;

    //Debug,
    #ifdef BASKER_DEBUG_ORDER_AMD
    printf("cmember: \n");
    for(Int i = 0; i < M.ncol; ++i)
      {
	printf("(%d, %d), ", i, cmember(i));
      }
    printf("\n"); 
    #endif



    INT_1DARRAY temp_p;
    BASKER_ASSERT(M.ncol > 0, "AMD perm not long enough");
    MALLOC_INT_1DARRAY(temp_p, M.ncol+1);
    init_value(temp_p, M.ncol+1, (Int) 0);
    
    my_amesos_csymamd(M.ncol, &(M.col_ptr(0)), &(M.row_idx(0)),
		     &(temp_p(0)), &(cmember(0)));


    for(Int i = 0; i < M.ncol; ++i)
      {
	p(temp_p(i)) = i;
      }


  }//end csymamd()
예제 #11
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::nfactor_domain_error
  (
   INT_1DARRAY threads_start
   )
  {
    Int nthread_remalloc = 0;
    for(Int ti = 0; ti < num_threads; ti++)
      {

	//Note: jdb we can make this into a switch
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_NOERROR)
	  {
	    threads_start(ti) = BASKER_MAX_IDX;
	    continue;
	  }//end if NOERROR

	if(thread_array(ti).error_type ==
	   BASKER_ERROR_SINGULAR)
	  {
	    printf("ERROR THREAD: %d DOMBLK SINGULAR: %d\n",
		   ti,
		   thread_array(ti).error_blk);
	    return BASKER_ERROR;
	  }//end if SINGULAR
	
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_NOMALLOC)
	  {
	    printf("ERROR THREADS: %d DOMBLK NOMALLOC: %d\n",
		   ti,
		   thread_array(ti).error_blk);
	    return BASKER_ERROR;
	  }//end if NOMALLOC
	
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_REMALLOC)
	  {

	    BASKER_ASSERT(thread_array(ti).error_blk > 0,
			  "nfactor_dom_error error_blk");

	    printf("ERROR THREADS: %d DOMBLK MALLOC: %d \n",
		   ti,
		   thread_array(ti).error_blk);
	    

	    //Resize L
	    BASKER_MATRIX &L = LL(thread_array(ti).error_blk)(0);
	    REALLOC_INT_1DARRAY(L.row_idx,
			       L.nnz,
			       thread_array(ti).error_info);
	    REALLOC_ENTRY_1DARRAY(L.val,
				 L.nnz,
				 thread_array(ti).error_info);
	    L.nnz = thread_array(ti).error_info;

	    //clean up workspace
	    if(L.w_fill == BASKER_TRUE)
	      {
		//Clear workspace
		for(Int i = 0; i < L.iws_size*L.iws_mult; ++i)
		  {
		    L.iws(i) = (Int) 0;
		  }
		for(Int i = 0; i < L.ews_size*L.ews_mult; ++i)
		  {
		    L.ews(i) = (Entry) 0;
		  }
		//Clear perm
		for(Int i = L.srow; i < L.srow+L.nrow; ++i)
		  {
		    gperm(i) = BASKER_MAX_IDX;
		  }
	      }
	    
	    //Resize U
	    BASKER_MATRIX &U = LU(thread_array(ti).error_blk)(0);
	    REALLOC_INT_1DARRAY(U.row_idx,
			       U.nnz,
			       thread_array(ti).error_info);
	    REALLOC_ENTRY_1DARRAY(U.val,
				 U.nnz,
				 thread_array(ti).error_info);
	    U.nnz = thread_array(ti).error_info;
	    
	    threads_start(ti) = thread_array(ti).error_blk;
	

	    //Reset 
	    thread_array(ti).error_type = BASKER_ERROR_NOERROR;
	    thread_array(ti).error_blk  = BASKER_MAX_IDX;
	    thread_array(ti).error_info = BASKER_MAX_IDX;

	    nthread_remalloc++;
    
	  }//if REMALLOC

      }//for all threads
    
    if(nthread_remalloc == 0)
      {
	return BASKER_SUCCESS;
      }
    else
      {
	return nthread_remalloc;
      }

    //Should never be here
    BASKER_ASSERT(0==1, "nfactor_diag_error, should never");
    return BASKER_SUCCESS;
  }//end nfactor_domain_error
예제 #12
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::nfactor_diag_error
  (
   INT_1DARRAY threads_start
   )
  {
    Int nthread_remalloc = 0;
    for(Int ti = 0; ti < num_threads; ti++)
      {

	//Note: jdb we can make this into a switch
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_NOERROR)
	  {
	    threads_start(ti) = BASKER_MAX_IDX;
	    continue;
	  }//end if NOERROR

	if(thread_array(ti).error_type ==
	   BASKER_ERROR_SINGULAR)
	  {
	    printf("ERROR THREAD: %d DIAGBLK SINGULAR: %d\n",
		   ti,
		   thread_array(ti).error_blk);
	    return BASKER_ERROR;
	  }//end if SINGULAR
	
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_NOMALLOC)
	  {
	    printf("ERROR THREADS: %d DIAGBLK NOMALLOC: %d\n",
		   ti,
		   thread_array(ti).error_blk);
	    return BASKER_ERROR;
	  }//end if NOMALLOC
	
	if(thread_array(ti).error_type ==
	   BASKER_ERROR_REMALLOC)
	  {

	    BASKER_ASSERT(thread_array(ti).error_blk > 0,
			  "nfactor_diag_error error_blk");

	    printf("ERROR THREADS: %d DIAGBLK MALLOC: %d \n",
		   ti,
		   thread_array(ti).error_blk);
	    
	    	    //Clean the workspace
	    printf("test: %d %d \n",
		   thread_array(ti).iws_size*thread_array(ti).iws_mult,
		   thread_array(ti).ews_size*thread_array(ti).ews_mult);

	    for(Int i = 0; 
		i < thread_array(ti).iws_size*thread_array(ti).iws_mult;
		i++)
	      {
		thread_array(ti).iws(i) = (Int) 0;
	      }
	    for(Int i = 0;
		i < thread_array(ti).ews_size*thread_array(ti).ews_mult;
		i++)
	      {
		thread_array(ti).ews(i) = (Entry) 0;
	      }

	   

	    //Resize L
	    BASKER_MATRIX &L = LBTF(thread_array(ti).error_blk);
	    REALLOC_INT_1DARRAY(L.row_idx,
			       L.nnz,
			       thread_array(ti).error_info);
	    REALLOC_ENTRY_1DARRAY(L.val,
				 L.nnz,
				 thread_array(ti).error_info);
	    L.nnz = thread_array(ti).error_info;
	    for(Int i = 0; i < L.ncol; i++)
	      {
		L.col_ptr(i) = 0;
	      }

	    for(Int i = L.srow; i < (L.srow+L.nrow); i++)
	      {
		gperm(i) = BASKER_MAX_IDX;
	      }

	    //Resize U
	    BASKER_MATRIX &U = UBTF(thread_array(ti).error_blk);
	    REALLOC_INT_1DARRAY(U.row_idx,
			       U.nnz,
			       thread_array(ti).error_info);
	    REALLOC_ENTRY_1DARRAY(U.val,
				 U.nnz,
				 thread_array(ti).error_info);
	    U.nnz = thread_array(ti).error_info;
	    for(Int i = 0; i < U.ncol; i++)
	      {
		U.col_ptr(i) = 0;
	      }

	    
	    printf("Setting thread start(%d) %d \n",
		   ti, thread_array(ti).error_blk);

	    threads_start(ti) = thread_array(ti).error_blk;
	

	    //Reset 
	    thread_array(ti).error_type = BASKER_ERROR_NOERROR;
	    thread_array(ti).error_blk  = BASKER_MAX_IDX;
	    thread_array(ti).error_info = BASKER_MAX_IDX;

	    nthread_remalloc++;
    
	  }//if REMALLOC

      }//for all threads
    
    if(nthread_remalloc == 0)
      {
	return BASKER_SUCCESS;
      }
    else
      {
	return nthread_remalloc;
      }

    //Should never be here
    BASKER_ASSERT(0==1, "nfactor_diag_error, should never");
    return BASKER_SUCCESS;
  }//end nfactor_diag_error
예제 #13
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf2
  (
   BASKER_MATRIX &M
  )
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_nblks = nblks;

    btf_flag = BASKER_TRUE;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    //#endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    #ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("num_threads: %d \n", num_threads);
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol);
    init_value(order_blk_amd_array, M.ncol, (Int)0);
    MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1);
    init_value(btf_blk_nnz, nblks+1, (Int) 0);
    MALLOC_INT_1DARRAY(btf_blk_work, nblks+1);
    init_value(btf_blk_work, nblks+1, (Int) 0);


    //Find AMD blk ordering, get nnz, and get work
    btf_blk_amd( M, order_blk_amd_array,
		 btf_blk_nnz, btf_blk_work);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("blk_perm:\n");
    for(Int i = 0; i < M.ncol; i++)
      {
	printf("(%d,%d) ", i, order_blk_amd_array(i));
      }
    printf("\n");
    printf("id/blk_size/blk_nnz/work: \n");
    for(Int i = 0; i < nblks; i++)
      {
	printf("(%d, %d, %d, %d) ", i,
	       btf_tabs(i+1)-btf_tabs(i), 
	       btf_blk_nnz(i), btf_blk_work(i));
      }
    printf("\n");
    #endif

    //printMTX("A_BEFORE.mtx", M);
    //printVec("AMD.txt", order_blk_amd_array, M.ncol);
    

    permute_col(M, order_blk_amd_array);
    permute_row(M, order_blk_amd_array);
    sort_matrix(M);

    //changed col to row, error.
    //print to see issue
    //printMTX("A_AMD.mtx", M);
    
       
    break_into_parts2(M, nblks, btf_tabs);

    //find schedule
    find_btf_schedule(M, nblks, btf_tabs);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    #endif

    return 0;
  }//end find BTF(nnz)
예제 #14
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::lower_tri_solve
  (
   BASKER_MATRIX &M,
   ENTRY_1DARRAY x, 
   ENTRY_1DARRAY y
   )
  {
    const Int bcol = M.scol;
    const Int brow = M.scol;
    
    //M.info();

    //printf("Lower-Tri-Solve-Test, [%d %d %d %d] \n",
    //	  M.srow, M.nrow, M.scol, M.ncol);

    for(Int k = 0; k < M.ncol; ++k)
      {
	//Test if zero pivot value
	#ifdef BASKER_DEBUG_SOLVE_RHS
	BASKER_ASSERT(M.val[M.col_ptr[k]]!=0.0, "LOWER PIVOT 0");
	#endif
	
	if(M.val[M.col_ptr[k]] == 0.0)
	  {
	    printf("Lower Pivot: %d %f \n", 
		   M.row_idx[M.col_ptr[k]],
		   M.val[M.col_ptr[k]]);
	    return -1;
	  }

	//printf("Lower tri.  k: %d out: %f in: %f piv: %f \n",
	//   k+bcol, y[k+bcol], x[k+bcol], M.val[M.col_ptr[k]]);

	//Replace with Entry divide in future
	//y[k+bcol] = x[k+bcol] / M.val[M.col_ptr[k]];
	y(k+brow) = x(k+bcol) / M.val(M.col_ptr(k));
	
	//for(Int i = M.col_ptr[k]+1; i < M.col_ptr[k+1]; i++)
	for(Int i = M.col_ptr(k)+1; i < M.col_ptr(k+1); ++i)
	  {
	   

	    //Int j = gperm[M.row_idx[i]];
	    const Int j = gperm(M.row_idx(i)+brow);
	    
	    #ifdef BASKER_DEBUG_SOLVE_RHS
	    BASKER_ASSERT(j != BASKER_MAX_IDX,"Using nonperm\n");
	    #endif
	    
	    //x[j] -= M.val[i]*y[k+bcol];
	    //printf("gperm: %d x(%d) y(i)  \n",
	    //	   M.row_idx(i) + brow, j, k+bcol);
	    x(j) -= M.val(i)*y(k+bcol);

	  }//over all nnz in a column

      }//over each column

    return 0;

  }//end lower_tri_solve
예제 #15
0
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::test_solve()
  {
    ENTRY_1DARRAY  x_known;
    ENTRY_1DARRAY  x;
    ENTRY_1DARRAY  y;


    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("test_solve called \n");
    printf("Global pivot permuation\n");
    printVec(gperm, gn);
    printf("\n");
    printf("Global pivot permutation inverse\n");
    printVec(gpermi, gn);
    printf("\n");
    #endif


    BASKER_ASSERT(gn > 0, "solve testsolve gn");
    MALLOC_ENTRY_1DARRAY(x_known, gn);
    init_value(x_known, gn , (Entry)1.0);


    //temp
    for(Int i = 0; i < gn; i++)
      {
	//x_known(i) = (Entry)(i+1);
        x_known(i) = (Entry) 1.0;
      }
    //JDB: used for other test
    //permute(x_known, order_csym_array, gn);



    MALLOC_ENTRY_1DARRAY(x, gn);
    init_value(x, gn, (Entry) 0.0);
    BASKER_ASSERT(gm > 0, "solve testsolve gm");
    MALLOC_ENTRY_1DARRAY(y, gm);
    init_value(y, gm, (Entry) 0.0);
    
    if(btf_nblks > 0)
      {
	sort_matrix(BTF_C);
	//printMTX("C_BEFORE_SOLVE.mtx", BTF_C);
      }

    if(Options.btf == BASKER_TRUE)
      {
      
	//printf("btf_tabs_offset: %d ", btf_tabs_offset);
        //printf("btf_nblks: %d \n", btf_nblks);
	if(btf_tabs_offset != 0)
	  {
            //printf("BTF_A spmv\n");
	    spmv(BTF_A, x_known,y);
            if(btf_nblks> 1)
              {
                //printf("btf_B spmv \n");
                spmv(BTF_B, x_known, y);
              }
          }
        if(btf_nblks > 1)
          {
            
            //printf("btf_c spmv \n");
            spmv(BTF_C, x_known, y);
          }
	//return -1;
      }
    else
      {
        //printf("other\n");
	//spmv(BTF_A, x_known,y);
      }
    
    

    
    
    //printf("\n Before Test Points \n");
    //printf("i: %d x: %f y: %f \n", 0, x_known(0), y(0));
    //if(gn > 24)
    //  {
    //   printf("i: %d x: %f y: %f \n", 24, x_known(24), y(24));
    //  }
    
    //pivot permuation
    //printVec("gperm.csc", gpermi, gn);

    for(Int i = 0; i < gn; i++)
      {
        x(gpermi(i)) = y(i);
      }
    for(Int i = 0; i < gn; i++)
      {
        y(i) = x(i);
        x(i) = 0;
      }




    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("\n\n");
    //printf("Known Solution: \n");
    //for(Int i = 0; i < gn; i++)
    //  {
    //	printf("%f, " , x_known(i));
    //  }
    printf("\n\n");
    printf("RHS: \n");
    for(Int i =0; i < gm; i++)
      {
	printf("%d %f,\n ", i, y(i)); 
      }
    printf("\n\n");
    #endif

    

    if(Options.btf == BASKER_FALSE)
      {
        
        //printf("before serial solve\n");
	if(btf_tabs_offset != 0)
	  {
	    serial_solve(y,x);
	  }
	//printf("After serial solve\n");
	//printf("i: %d x: %f y: %f \n", 0, x(0), y(0));
	//printf("i: %d x: %f y: %f \n", 24, x(24), y(24));
      }
    else
      {
	//A\y -> y
	//serial_btf_solve(y,x);
        //printf("before btf serial solve\n");
	serial_btf_solve(y,x);
        
	//printf("After btf solve\n");
	//printf("i: %d x: %f y: %f \n", 0, x(0), y(0));
	//printf("i: %d x: %f y: %f \n", 24, x(24), y(24));
      }


    Entry diff =0.0;
  
    for(Int i = 0; i < gn; i++)
      {
	diff += (x_known(i) - x(i));
      }
    diff = diff/(Entry) gn;

    #ifdef BASKER_DEBUG_SOLVE_RHS
    printf("\n\n");
    printf("Solve Compare: \n");
    for(Int i = 0; i < gn; i++)
      {
	printf("%d %f %f \n", 
	       i, x_known(i), x(i));
 
      }
    printf("\n\n");
    #endif



    printf("\n Test Points \n");
    printf("i: %d x: %f %f \n", 0, x_known(0), x(0));
    if(gn > 24)
      {
        printf("i: %d x: %f %f \n", 10, x_known(10), x(10));
        printf("i: %d x: %f %f \n", 24, x_known(24), x(24));
      }
    printf("\n");
    printf("TEST_SOLVE: ||x-x||/||x| = %e", diff);
    printf("\n");

    if((diff > -1e-2) && (diff < 1e-2))
      {
        printf("TEST PASSED \n");
      }  

    return 0;
  }//end test_solve
예제 #16
0
  BASKER_INLINE
  void BaskerMatrix<Int,Entry,Exe_Space>::convert2D
  (
   BASKER_MATRIX &M,
   BASKER_BOOL   alloc,
   Int kid
  )
  {
    if(nnz == 0)
    {
      for(Int i = 0; i < ncol+1; i++)
      {
        col_ptr(i) = 0;
      }

      MALLOC_INT_1DARRAY(row_idx, 1);
      row_idx(0) = (Int) 0;
      MALLOC_ENTRY_1DARRAY(val, 1);
      val(0) = (Entry) 0;
      return;
    }

    //info();
    //We could check some flag ??
    //We assume a pre-scan has already happened
    if(alloc == BASKER_TRUE)
    {
      //printf("ALLOC\n");
      if(nnz > 0)
      {
        BASKER_ASSERT(nnz > 0, "matrix row nnz 2");
        MALLOC_INT_1DARRAY(row_idx, nnz);
      }
      else if(nnz ==0)
      {
        BASKER_ASSERT((nnz+1)>0, "matrix row nnz 3");
        MALLOC_INT_1DARRAY(row_idx, nnz+1);
      }
    }
    //init_value(row_idx, nnz, (Int) 0);
    //printf("clear row: %d \n", nnz);
    for(Int i = 0; i < nnz; ++i)
    {
      //printf("clear row_idx(%d) \n", i);
      row_idx(i) = 0;
    }

    if(alloc == BASKER_TRUE)
    {
      if(nnz > 0)
      {
        BASKER_ASSERT(nnz > 0, "matrix nnz 4");
        MALLOC_ENTRY_1DARRAY(val, nnz);
      }
      else if(nnz == 0)
      {
        BASKER_ASSERT((nnz+1) > 0, "matrix nnz 5");
        MALLOC_ENTRY_1DARRAY(val, nnz+1);
      }
    }
    //init_value(val, nnz, (Entry) 0);
    for(Int i = 0; i < nnz; ++i)
    {
      val(i) = 0;
    }

    Int temp_count = 0;

    for(Int k = scol; k < scol+ncol; ++k)
    {
      //note col_ptr[k-scol] contains the starting index
      if(col_ptr(k-scol) == BASKER_MAX_IDX)
      {
        col_ptr(k-scol) = temp_count;
        //printf("continue called, k: %d  \n", k);
        continue;
      }

      for(Int i = col_ptr(k-scol); i < M.col_ptr(k+1); i++)
      {
        Int j = M.row_idx(i);
        //printf("i: %d j:%d \n", i,j);
        if(j >= srow+nrow)
        {
          break;
        }
        //printf("writing row_dix: %d i: %d  val: %d nnz: %d srow: %d nrow: %d \n",
        //	   temp_count, i, j, nnz, 
        //	   srow, nrow);
        //BASKER_ASSERT(temp_count < nnz, "2DConvert, too many values");

        //row_idx[temp_count] = j;

        if(j-srow <0)
        {
          std::cout << "kid: " << kid 
                    << " j: " << j 
                    << " srow: " << srow 
                    << " k: " << k
                    << " idx: " << i
                    << std::endl;
          BASKER_ASSERT(0==1, "j-srow NO");
        }


        row_idx(temp_count) = j-srow;
        val(temp_count) = M.val(i);
        temp_count++;
      }
      col_ptr(k-scol) = temp_count;
    }

    //col_ptr[0] = 0;

    //NO!!1
    //Slide over the col counts
    for(Int i = ncol; i > 0; i--)
    {
      col_ptr(i) = col_ptr(i-1);
    }
    col_ptr(0) = (Int) 0;

    //info();
    //print();

  }//end convert2d(Matrix)
예제 #17
0
    static 
    inline
    int my_strong_component 
    (   
     long           &n,
     long           *col_ptr,
     long          *row_idx,
     long           &nblks,
     long           *perm,
     long           *perm_in,
     long          *CC
        )
    {
      typedef long  l_Int;
      
      //l_Int p[n]; //output row_per
      l_Int *p = new l_Int[n];
      //l_Int r[n+1]; //comp_tabs
      l_Int *r = new l_Int[n+1];
      //We will want to add option to use q in the future
      
      //l_Int work[n*4];
      l_Int *work = new l_Int[n*4];
      
      //printf("before amesos call \n");
      /*
        nblks = amesos_btf_l_strongcomp(M.ncol,&(M.col_ptr[0]),
        &(M.row_idx[0]), 
        &(perm_in[0]), p, r, work);
      */
      nblks = amesos_btf_l_strongcomp(n,
                                      col_ptr,
                                      row_idx, 
                                      perm_in, p, r, work);
      //printf("after amesos call \n");
      

      
#ifdef BASKER_DEBUG_ORDER_BTF
      printf("\nBTF perm: \n");
      for(Int i=0; i <n; i++)
      {
        printf("%d, ", p[i]);
      }

      printf("\n\nBTF tabs: <right> \n");
      for(l_Int i=0; i < nblks+1; i++)
      {
        printf("%d, ", r[i]);
      }
      printf("\n");
#endif

    BASKER_ASSERT(n > 0, "M.nrow btf");
    //MALLOC_INT_1DARRAY(perm,M.nrow);
    for(l_Int i = 0; i < n; i++)
    {
      perm[p[i]] = i;
    }
    BASKER_ASSERT((nblks+1) > 0, "nblks+1 btf");
    //MALLOC_INT_1DARRAY(CC, nblks+1);
    for(l_Int i = 0; i < nblks+1; i++)
    {
      CC[i] = r[i];
    }

    delete [] p;
    delete [] r;
    delete [] work;

    return 0;
  }//strong_component<long int, Entry, Exe_Space>
예제 #18
0
    //=========strong componenet===========
    static 
    inline
    int my_strong_component 
    (
     int           &n,
     int           *col_ptr,
     int           *row_idx,
     int           &nblks,
     int           *perm,
     int           *perm_in,
     int           *CC
     )
    {
      typedef int l_Int;
      
      l_Int *p = new l_Int[n];
      l_Int *r = new l_Int[n+1];
      //We will want to add option to use q in the future
      
      l_Int *work = new l_Int[n*4];
      
      //printf("before amesos call \n");
      /*
        nblks = amesos_btf_strongcomp(M.ncol,&(M.col_ptr[0]),
        &(M.row_idx[0]), 
        &(perm_in[0]), p, r, work);
      */
      nblks = amesos_btf_strongcomp(n, col_ptr,
				    row_idx, 
				    perm_in, p, r, work);
      //printf("after amesos call \n");
      
#ifdef BASKER_DEBUG_ORDER_BTF
      printf("\nBTF perm: \n");
      //for(Int i=0; i <M.nrow; i++)
      for(Int i=0; i < n; i++)
        {
          printf("%d, ", p[i]);
        }
      
      printf("\n\nBTF tabs: <right> \n");
      for(Int i=0; i < nblks+1; i++)
        {
          printf("%d, ", r[i]);
        }
      printf("\n");
#endif
      
      //BASKER_ASSERT(M.nrow > 0, "M.nrow btf");
      BASKER_ASSERT(n > 0, "M.nrow btf");
      //MALLOC_INT_1DARRAY(perm,M.nrow);
      // MALLOC_INT_1DARRAY(perm, n);
      for(l_Int i = 0; i < n; i++)
        {
          perm[p[i]] = i;
        }
      BASKER_ASSERT((nblks+1) > 0, "nblks+1 btf");
      //MALLOC_INT_1DARRAY(CC, nblks+1);
      for(l_Int i = 0; i < nblks+1; i++)
        {
          CC[i] = r[i];
        }
      

      delete [] p;
      delete [] r;
      delete [] work;

      return 0;
    }
예제 #19
0
  BASKER_INLINE
  int Basker<Int, Entry,Exe_Space>::break_into_parts2
  (
   BASKER_MATRIX &M,
   Int           nblks,
   INT_1DARRAY   btf_tabs
   )
  {

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("break_into_parts2 called \n");
    printf("nblks: %d \n", nblks);
    #endif
    
    Options.btf = BASKER_TRUE;

    //Alg.  
    // A -> [BTF_A  BTF_B] 
    //      [0      BTF_C]
    //1. Run backward through the btf_tabs to find size C
    //2. Form A,B,C based on size in 1.

    //Short circuit, 
    //If nblks  == 1, than only BTF_A exists
    if(nblks == 1)
      {
        //#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Short Circuit part_call \n");
	//#endif
	BTF_A = A;
	//Options.btf = BASKER_FALSE;
	btf_tabs_offset = 1;
	return 0;
      }

    //Step 1.
    //Find total work estimate
    Int total_work_estimate = 0;
    for(Int b = 0; b < nblks; b++)
      {
	total_work_estimate += btf_blk_work(b);
      }
    //Set a class variable to use later
    btf_total_work = total_work_estimate;
    //printf("Total work estimate: %d \n",
    //	   total_work_estimate);
    //printf("num_threads: %d epsilon: %f \n",
    //	   num_threads, 
    //	   ((double)1/num_threads) +
    //	   ((double)BASKER_BTF_IMBALANCE));
    Int break_size    = ceil((double)total_work_estimate*(
		((double)1/num_threads) + 
		((double)BASKER_BTF_IMBALANCE)));

    printf("Break size: %d \n", break_size);

    Int t_size            = 0;
    Int scol              = M.ncol;
    Int blk_idx           = nblks;
    BASKER_BOOL  move_fwd = BASKER_TRUE;

    while(move_fwd==BASKER_TRUE)
      {

	//printf("------TEST blk_idx: %d \n",
	// blk_idx);
	Int blk_work = btf_blk_work(blk_idx-1);
	Int blk_size  = btf_tabs(blk_idx) - 
	                        btf_tabs(blk_idx-1);


	#ifdef BASKER_DEBUG_ORDER_BTF
	printf(" \n move_fwd loop \n");
	BASKER_ASSERT(blk_idx>=0, "btf blk idx off");
	BASKER_ASSERT(blk_work>=0, "btk_work wrong");
	BASKER_ASSERT(blk_size>0, "btf blk size wrong");
	printf("blk_idx: %d blk_work: %d break_size: %d \n",
	       blk_idx, blk_work, break_size);
	#endif

	//Should be end
	//if(((blk_work < break_size) ||
	//  (blk_size < BASKER_BTF_SMALL)) &&
	//  (blk_idx > 1))
       
	//Continue to be in btf
	if(((blk_work < break_size) &&
	    (blk_idx > 1)))
	  {
	    #ifdef BASKER_DEBUG_ORDER_BTF
	    printf("first choice \n");
	    #endif

	    t_size = t_size+blk_size;
	    blk_idx = blk_idx-1;
	    scol   = btf_tabs[blk_idx];
	   
	  }
	//break due to size
	else if(blk_work >= break_size)
	  {
	    printf("break due to size\n");
	    move_fwd = BASKER_FALSE;
	  }
	//break due to end
	else if(blk_idx == 1)
	  {
	    printf("break last blk\n");
	    blk_idx = 0;
	    t_size = t_size + blk_size;
	    scol = btf_tabs[blk_idx];	
	    move_fwd = BASKER_FALSE;

	  }
	//should not be called
	else
	  {
	    BASKER_ASSERT(1==0, "btf order break");
	    move_fwd = BASKER_FALSE;
	  }
      }//end while(move_fwd)

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("Done finding BTF2 cut.  Cut size: %d scol: %d \n",
	   t_size, scol);
    printf("Done finding BTF2 cut. blk_idx: %d \n", 
	   blk_idx);
    //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n");
    
    BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n");
    //#endif
    
    //Comeback and change
    btf_tabs_offset = blk_idx;


    //2. Move into Blocks

    if(btf_tabs_offset != 0)
      {
    //--Move A into BTF_A;
    BTF_A.set_shape(0, scol, 0, scol);
    BTF_A.nnz = M.col_ptr(scol);
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Init BTF_A. ncol: %d nnz: %d \n",
	   scol, BTF_A.nnz);
    #endif

    if(BTF_A.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr");
	MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1);
	BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz");
	MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz);
	BTF_A.fill();
      }
    
    Int annz = 0;
    for(Int k = 0; k < scol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("copy column: %d into A_BTF, [%d %d] \n", 
	       k, M.col_ptr(k), M.col_ptr(k+1));
	#endif
	
	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    //printf("annz: %d i: %d \n", annz, i);
	    BTF_A.row_idx(annz) = M.row_idx(i);
	    BTF_A.val(annz)     = M.val(i);
	    annz++;
	  }
	
	BTF_A.col_ptr(k+1) = annz;
      }

      }//no A
 
    //Fill in B and C at the same time
    INT_1DARRAY cws;
    BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC");
    MALLOC_INT_1DARRAY(cws, M.ncol-scol+1);
    init_value(cws, M.ncol-scol+1, (Int)M.ncol);
    BTF_B.set_shape(0   , scol,
		    scol, M.ncol-scol);
    BTF_C.set_shape(scol, M.ncol-scol,
		    scol, M.ncol-scol);

    
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Set Shape BTF_B: %d %d %d %d \n",
	   BTF_B.srow, BTF_B.nrow,
	   BTF_B.scol, BTF_B.ncol);
    printf("Set Shape BTF_C: %d %d %d %d \n",
	   BTF_C.srow, BTF_C.nrow,
	   BTF_C.scol, BTF_C.nrow);
    #endif
    
    //Scan and find nnz
    //We can do this much better!!!!
    Int bnnz = 0;
    Int cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Lower, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		cnnz++;
	      }
	  }//over all nnz in k
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF_B nnz: %d \n", bnnz);
    printf("BTF_C nnz: %d \n", cnnz);
    #endif
    
    BTF_B.nnz = bnnz;
    BTF_C.nnz = cnnz;
    
    //Malloc need space
    if((BTF_B.v_fill == BASKER_FALSE) &&
       (BTF_B.nnz > 0))
      //if(BTF_B.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol");
	MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1);
	BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz");
	MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz);
	BTF_B.fill();
      }
    if(BTF_C.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol");
	MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1);
	BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz");
	MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz);
	BTF_C.fill();
      }

    //scan again (Very bad!!!)
    bnnz = 0;
    cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx[i]);
		#endif
		
		BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit");
		//BTF_B.row_idx[bnnz] = M.row_idx[i];
		//Note: do not offset because B srow = 0
		BTF_B.row_idx(bnnz) = M.row_idx(i);
		BTF_B.val(bnnz)     = M.val(i);
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz Lower,k: %d  %d %d %f \n",
		       k, scol, M.row_idx[i], 
		       M.val(i));
		#endif
		//BTF_C.row_idx[cnnz] = M.row_idx[i];
		BTF_C.row_idx(cnnz) = M.row_idx(i)-scol;
		BTF_C.val(cnnz)     = M.val(i);
		cnnz++;
	      }
	  }//over all nnz in k
	if(BTF_B.nnz > 0)
	  {
	    BTF_B.col_ptr(k-scol+1) = bnnz;
	  }
	BTF_C.col_ptr(k-scol+1) = cnnz;
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("After BTF_B nnz: %d \n", bnnz);
    printf("After BTF_C nnz: %d \n", cnnz);
    #endif

    //printf("\n\n");
    //printf("DEBUG\n");
    //BTF_C.print();
    //printf("\n\n");
    
    return 0;

  }//end break_into_parts2 (based on imbalance)
예제 #20
0
  BASKER_INLINE
  int Basker<Int, Entry,Exe_Space>::break_into_parts
  (
   BASKER_MATRIX &M,
   Int           nblks,
   INT_1DARRAY   btf_tabs
   )
  {

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("break_into_parts called \n");
    printf("nblks: %d \n", nblks);
    #endif
    
    Options.btf = BASKER_TRUE;

    //Alg.  
    // A -> [BTF_A  BTF_B] 
    //      [0      BTF_C]
    //1. Run backward through the btf_tabs to find size C
    //2. Form A,B,C based on size in 1.

    //Short circuit, 
    //If nblks  == 1, than only BTF_A exists
    if(nblks == 1)
      {
	
        #ifdef BASKER_DEBUG_ORDER_BTF
	printf("Short Circuit part_call \n");
	#endif
	BTF_A = A;
	//Options.btf = BASKER_FALSE;
	btf_tabs_offset = 1;
	return 0;
      }

    //Step 1.
    Int t_size            = 0;
    Int scol              = M.ncol;
    Int blk_idx           = nblks;
    BASKER_BOOL  move_fwd = BASKER_TRUE;
    while(move_fwd==BASKER_TRUE)
      {

	Int blk_size = btf_tabs(blk_idx)-
	                     btf_tabs(blk_idx-1);

	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("move_fwd loop \n");
	BASKER_ASSERT(blk_idx>=0, "btf blk idx off");
	BASKER_ASSERT(blk_size>0, "btf blk size wrong");
	printf("blk_idx: %d blk_size: %d \n", 
	       blk_idx, blk_size);
	std::cout << blk_size << std::endl;
	#endif


	if((blk_size < Options.btf_large) &&
	   ((((double)t_size+blk_size)/(double)M.ncol) < Options.btf_max_percent))
	  {
	    #ifdef BASKER_DEBUG_ORDER_BTF
	    printf("first choice \n");
	    printf("blksize test: %d %d %d \n",
		   blk_size, Options.btf_large, 
		   BASKER_BTF_LARGE);
	    printf("blkpercent test: %f %f %f \n", 
		   ((double)t_size+blk_size)/(double)M.ncol, 
		   Options.btf_max_percent, 
		   (double) BASKER_BTF_MAX_PERCENT);
	    #endif

	   
		t_size = t_size+blk_size;
		blk_idx = blk_idx-1;
		scol   = btf_tabs[blk_idx];
	   
	  }
	else
	  {
	    //printf("second choice \n");
	    //#ifdef BASKER_DEBUG_ORDER_BTF
	    printf("Cut: blk_size: %d percent: %f \n",
		   blk_size, ((double)t_size+blk_size)/(double)M.ncol);
	    
	    if((((double)t_size+blk_size)/(double)M.ncol)
	       == 1.0)
	      {
		blk_idx = 0;
		t_size = t_size + blk_size;
		scol = btf_tabs[blk_idx];
		
	      }

	    //#endif
	    move_fwd = BASKER_FALSE;
	  }
      }//end while(move_fwd)

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Done finding BTF cut.  Cut size: %d scol: %d \n",
	   t_size, scol);
    //BASKER_ASSERT(t_size > 0, "BTF CUT SIZE NOT BIG ENOUGH\n");
    BASKER_ASSERT((scol >= 0) && (scol < M.ncol), "SCOL\n");
    #endif
    
    //Comeback and change
    btf_tabs_offset = blk_idx;


    //2. Move into Blocks

    if(btf_tabs_offset != 0)
      {
    //--Move A into BTF_A;
    BTF_A.set_shape(0, scol, 0, scol);
    BTF_A.nnz = M.col_ptr(scol);
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Init BTF_A. ncol: %d nnz: %d \n",
	   scol, BTF_A.nnz);
    #endif

    if(BTF_A.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_A.ncol >= 0, "BTF_A, col_ptr");
	MALLOC_INT_1DARRAY(BTF_A.col_ptr, BTF_A.ncol+1);
	BASKER_ASSERT(BTF_A.nnz > 0, "BTF_A, nnz");
	MALLOC_INT_1DARRAY(BTF_A.row_idx, BTF_A.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_A.val, BTF_A.nnz);
	BTF_A.fill();
      }
    
    Int annz = 0;
    for(Int k = 0; k < scol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("copy column: %d into A_BTF, [%d %d] \n", 
	       k, M.col_ptr(k), M.col_ptr(k+1));
	#endif
	
	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    //printf("annz: %d i: %d \n", annz, i);
	    BTF_A.row_idx(annz) = M.row_idx(i);
	    BTF_A.val(annz)     = M.val(i);
	    annz++;
	  }
	
	BTF_A.col_ptr(k+1) = annz;
      }

      }//no A
 
    //Fill in B and C at the same time
    INT_1DARRAY cws;
    BASKER_ASSERT((M.ncol-scol+1) > 0, "BTF_SIZE MALLOC");
    MALLOC_INT_1DARRAY(cws, M.ncol-scol+1);
    init_value(cws, M.ncol-scol+1, (Int)M.ncol);
    BTF_B.set_shape(0   , scol,
		    scol, M.ncol-scol);
    BTF_C.set_shape(scol, M.ncol-scol,
		    scol, M.ncol-scol);

    
    
    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("Set Shape BTF_B: %d %d %d %d \n",
	   BTF_B.srow, BTF_B.nrow,
	   BTF_B.scol, BTF_B.ncol);
    printf("Set Shape BTF_C: %d %d %d %d \n",
	   BTF_C.srow, BTF_C.nrow,
	   BTF_C.scol, BTF_C.nrow);
    #endif
    
    //Scan and find nnz
    //We can do this much better!!!!
    Int bnnz = 0;
    Int cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Lower, %d %d \n",
		       scol, M.row_idx(i));
		#endif
		cnnz++;
	      }
	  }//over all nnz in k
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF_B nnz: %d \n", bnnz);
    printf("BTF_C nnz: %d \n", cnnz);
    #endif
    
    BTF_B.nnz = bnnz;
    BTF_C.nnz = cnnz;
    
    //Malloc need space
    if((BTF_B.v_fill == BASKER_FALSE) &&
       (BTF_B.nnz > 0))
      //if(BTF_B.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_B.ncol >= 0, "BTF_B ncol");
	MALLOC_INT_1DARRAY(BTF_B.col_ptr, BTF_B.ncol+1);
	BASKER_ASSERT(BTF_B.nnz > 0, "BTF_B.nnz");
	MALLOC_INT_1DARRAY(BTF_B.row_idx, BTF_B.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_B.val, BTF_B.nnz);
	BTF_B.fill();
      }
    if(BTF_C.v_fill == BASKER_FALSE)
      {
	BASKER_ASSERT(BTF_C.ncol >= 0, "BTF_C.ncol");
	MALLOC_INT_1DARRAY(BTF_C.col_ptr, BTF_C.ncol+1);
	BASKER_ASSERT(BTF_C.nnz > 0, "BTF_C.nnz");
	MALLOC_INT_1DARRAY(BTF_C.row_idx, BTF_C.nnz);
	MALLOC_ENTRY_1DARRAY(BTF_C.val, BTF_C.nnz);
	BTF_C.fill();
      }

    //scan again (Very bad!!!)
    bnnz = 0;
    cnnz = 0;
    for(Int k = scol; k < M.ncol; ++k)
      {
	#ifdef BASKER_DEBUG_ORDER_BTF
	printf("Scanning nnz, k: %d \n", k);
	#endif

	for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); ++i)
	  {
	    if(M.row_idx(i) < scol)
	      {
		
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz to Upper, %d %d \n",
		       scol, M.row_idx[i]);
		#endif
		
		BASKER_ASSERT(BTF_B.nnz > 0, "BTF B uninit");
		//BTF_B.row_idx[bnnz] = M.row_idx[i];
		//Note: do not offset because B srow = 0
		BTF_B.row_idx(bnnz) = M.row_idx(i);
		BTF_B.val(bnnz)     = M.val(i);
		bnnz++;
	      }
	    else
	      {
		#ifdef BASKER_DEBUG_ORDER_BTF
		printf("Adding nnz Lower,k: %d  %d %d %f \n",
		       k, scol, M.row_idx[i], 
		       M.val(i));
		#endif
		//BTF_C.row_idx[cnnz] = M.row_idx[i];
		BTF_C.row_idx(cnnz) = M.row_idx(i)-scol;
		BTF_C.val(cnnz)     = M.val(i);
		cnnz++;
	      }
	  }//over all nnz in k
	if(BTF_B.nnz > 0)
	  {
	    BTF_B.col_ptr(k-scol+1) = bnnz;
	  }
	BTF_C.col_ptr(k-scol+1) = cnnz;
      }//over all k

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("After BTF_B nnz: %d \n", bnnz);
    printf("After BTF_C nnz: %d \n", cnnz);
    #endif

    //printf("\n\n");
    //printf("DEBUG\n");
    //BTF_C.print();
    //printf("\n\n");

    return 0;

  }//end break_into_parts