BASKER_INLINE
  int Basker<Int, Entry, Exe_Space>::scotch_partition
  (BASKER_MATRIX &M)
  { 
    nd_flag = BASKER_TRUE;

    if(Options.symmetric == BASKER_TRUE)
      {
	//printf("Scotch Symmetric\n");
	part_scotch(M, part_tree);
      }
    else
      {
	//printf("Scotch Nonsymmetrix\n");
	BASKER_MATRIX MMT;
	AplusAT(M,MMT);
	//printMTX("AAT.mtx", MMT);
	part_scotch(MMT, part_tree);
	FREE(MMT);
      }
    
    nd_flag == BASKER_TRUE;
    //permute
    //permute_col(M, part_tree.permtab);
    ///permute_row(M, part_tree.permtab);
    permute_row(M, part_tree.permtab);
    permute_col(M, part_tree.permtab);

    //May need to sort row_idx
    return 0; 
  }//end scotch_partition()
 BASKER_INLINE
 int Basker<Int, Entry, Exe_Space>::permute
 (
  BASKER_MATRIX &M,
  INT_1DARRAY row,
  INT_1DARRAY col
  )
 {
   permute_col(M,col);
   permute_row(M,row);
   return 0;
 }//end permute(int, int)
Beispiel #3
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf(BASKER_MATRIX &M)
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_flag = BASKER_TRUE;

    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    #endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    #ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    break_into_parts(M, nblks, btf_tabs);

    btf_nblks = nblks;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    //#endif

    return 0;
  }//end find BTF
  BASKER_INLINE
  int Basker<Int,Entry,Exe_Space>::btf_order()
  {
    //1. Matching ordering on whole matrix
    //currently finds matching and permutes
    //found bottle-neck to work best with circuit problems
    sort_matrix(A);
    //printMTX("A_nonmatch.mtx", A);
    match_ordering(0);
    //printf("DEBUG1: done match\n");
    //for debuging
    sort_matrix(A);
    //printMTX("A_match.mtx", A);
   
    //2. BTF ordering on whole matrix
    // Gets estimate of work on all blocks
    //currently finds btf-hybrid and permutes
    //A -> [BTF_A, BTF_C; 0 , BTF B]


    printf("outter num_threads:%d \n", num_threads);
    MALLOC_INT_1DARRAY(btf_schedule, num_threads+1);
    init_value(btf_schedule, num_threads+1, 0);
    find_btf(A); 

   
    
    if(btf_tabs_offset != 0)
      {

        //  printf("A/B block stuff called\n");
	//3. ND on BTF_A
	//currently finds ND and permute BTF_A
	//Would like to change so finds permuation, 
	//and move into 2D-Structure
	//printMTX("A_BTF_FROM_A.mtx", BTF_A);
	sort_matrix(BTF_A);
	scotch_partition(BTF_A);
    
	//need to do a row perm on BTF_B too
	if(btf_nblks > 1)
	  {
	    permute_row(BTF_B, part_tree.permtab);
	  }
	//needed because  moving into 2D-Structure,
	//assumes sorted columns
	sort_matrix(BTF_A);
	if(btf_nblks > 1)
	  {
	    sort_matrix(BTF_B);
	    sort_matrix(BTF_C);
	  }
	//For debug
	//printMTX("A_BTF_PART_AFTER.mtx", BTF_A);
	
	//4. Init tree structure
	//This reduces the ND ordering into that fits,
	//thread counts
	init_tree_thread();
	

	//5. Permute BTF_A
	//Constrained symamd on A
	INT_1DARRAY cmember;
	MALLOC_INT_1DARRAY(cmember, BTF_A.ncol+1);
	init_value(cmember,BTF_A.ncol+1,(Int) 0);
	for(Int i = 0; i < tree.nblks; ++i)
	  {
	    for(Int j = tree.col_tabs(i); j < tree.col_tabs(i+1); ++j)
	      {
		cmember(j) = i;
	      }
	  }
	//INT_1DARRAY csymamd_perm = order_csym_array;
	MALLOC_INT_1DARRAY(order_csym_array, BTF_A.ncol+1);
	//MALLOC_INT_1DARRAY(csymamd_perm, BTF_A.ncol+1);
	init_value(order_csym_array, BTF_A.ncol+1,(Int) 0);
	//init_value(csymamd_perm, BTF_A.ncol+1,(Int) 0);
	
	csymamd_order(BTF_A, order_csym_array, cmember);
	//csymamd_order(BTF_A, csymamd_perm, cmember);
	
	//permute(BTF_A, csymamd_perm, csymamd_perm);
	permute_col(BTF_A, order_csym_array);
	sort_matrix(BTF_A);
	permute_row(BTF_A, order_csym_array);
	sort_matrix(BTF_A);
	//printMTX("A_BTF_AMD.mtx", BTF_A);
	
	
	if(btf_nblks > 1)
	  {
	    permute_row(BTF_B, order_csym_array);
	    sort_matrix(BTF_B);
	    //printMTX("B_BTF_AMD.mtx", BTF_B);
	    sort_matrix(BTF_C);
	    //printMTX("C_BTF_AMD.mtx", BTF_C);
	  }
    
    
	//6. Move to 2D Structure
	//finds the shapes for both view and submatrices,
	//need to be changed over to just submatrices
	matrix_to_views_2D(BTF_A);
	//finds the starting point of A for submatrices
	find_2D_convert(BTF_A);
	//now we can fill submatrices
        #ifdef BASKER_KOKKOS
	kokkos_order_init_2D<Int,Entry,Exe_Space> iO(this);
	Kokkos::parallel_for(TeamPolicy(num_threads,1), iO);
	Kokkos::fence();
        #else
	//Comeback
        #endif

	//printMTX("BTF_A.mtx", BTF_A); 
	
      }//if btf_tab_offset == 0

    
    if(btf_nblks > 1)
      {
	sort_matrix(BTF_C);
	//printMTX("C_TEST.mtx", BTF_C);
	//Permute C

	MALLOC_INT_1DARRAY(order_c_csym_array, BTF_C.ncol+1);
	init_value(order_c_csym_array, BTF_C.ncol+1,(Int) 0);
	
	printf("BEFORE \n");

	//csymamd_order(BTF_C, order_c_csym_array, cmember);

	blk_amd(BTF_C, order_c_csym_array);

	printf("After perm\n");
	
	permute_col(BTF_C, order_c_csym_array);
	sort_matrix(BTF_C);
	permute_row(BTF_C, order_c_csym_array);
	sort_matrix(BTF_C);

	if(btf_tabs_offset != 0)
	  {
	    permute_col(BTF_B, order_c_csym_array);
	    sort_matrix(BTF_B);
	    //printMTX("BTF_B.mtx", BTF_B);
	  }

	//printMTX("BTF_C.mtx", BTF_C);

      }
  
    
    //printf("Done with ordering\n");
    
    return 0;
  }//end btf_order
Beispiel #5
0
  BASKER_INLINE
  int Basker<Int,Entry, Exe_Space>::find_btf2
  (
   BASKER_MATRIX &M
  )
  {
    Int          nblks = 0;

    strong_component(M,nblks,order_btf_array,btf_tabs);

    btf_nblks = nblks;

    btf_flag = BASKER_TRUE;

    //#ifdef BASKER_DEBUG_ORDER_BTF
    printf("BTF nblks returned: %d \n", nblks);
    //BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS");
    //#endif

    #ifdef BASKER_DEBUG_ORDER_BTF
    if(nblks<2)
      {
	printf("BTF did not find enough blks\n");
      }
    #endif


    //#ifdef BASKER_DEBUG_ORDER_BTF
    /*
    printf("\nBTF perm: \n");
    for(Int i=0; i <M.nrow; i++)
      {
	printf("%d, ", order_btf_array(i));
	//printf("%d, ", btf_perm(i));
      }
    */
    printf("num_threads: %d \n", num_threads);
    printf("\n\nBTF tabs: \n");
    for(Int i=0; i < nblks+1; i++)
      {
	printf("%d, ", btf_tabs(i));
      }
    printf("\n");
    // #endif

    permute_col(M, order_btf_array);
    permute_row(M, order_btf_array);

    MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol);
    init_value(order_blk_amd_array, M.ncol, (Int)0);
    MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1);
    init_value(btf_blk_nnz, nblks+1, (Int) 0);
    MALLOC_INT_1DARRAY(btf_blk_work, nblks+1);
    init_value(btf_blk_work, nblks+1, (Int) 0);


    //Find AMD blk ordering, get nnz, and get work
    btf_blk_amd( M, order_blk_amd_array,
		 btf_blk_nnz, btf_blk_work);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("blk_perm:\n");
    for(Int i = 0; i < M.ncol; i++)
      {
	printf("(%d,%d) ", i, order_blk_amd_array(i));
      }
    printf("\n");
    printf("id/blk_size/blk_nnz/work: \n");
    for(Int i = 0; i < nblks; i++)
      {
	printf("(%d, %d, %d, %d) ", i,
	       btf_tabs(i+1)-btf_tabs(i), 
	       btf_blk_nnz(i), btf_blk_work(i));
      }
    printf("\n");
    #endif

    //printMTX("A_BEFORE.mtx", M);
    //printVec("AMD.txt", order_blk_amd_array, M.ncol);
    

    permute_col(M, order_blk_amd_array);
    permute_row(M, order_blk_amd_array);
    sort_matrix(M);

    //changed col to row, error.
    //print to see issue
    //printMTX("A_AMD.mtx", M);
    
       
    break_into_parts2(M, nblks, btf_tabs);

    //find schedule
    find_btf_schedule(M, nblks, btf_tabs);


    #ifdef BASKER_DEBUG_ORDER_BTF
    printf("------------BTF CUT: %d --------------\n", 
	   btf_tabs(btf_tabs_offset));
    #endif

    return 0;
  }//end find BTF(nnz)