BASKER_INLINE int Basker<Int, Entry, Exe_Space>::scotch_partition (BASKER_MATRIX &M) { nd_flag = BASKER_TRUE; if(Options.symmetric == BASKER_TRUE) { //printf("Scotch Symmetric\n"); part_scotch(M, part_tree); } else { //printf("Scotch Nonsymmetrix\n"); BASKER_MATRIX MMT; AplusAT(M,MMT); //printMTX("AAT.mtx", MMT); part_scotch(MMT, part_tree); FREE(MMT); } nd_flag == BASKER_TRUE; //permute //permute_col(M, part_tree.permtab); ///permute_row(M, part_tree.permtab); permute_row(M, part_tree.permtab); permute_col(M, part_tree.permtab); //May need to sort row_idx return 0; }//end scotch_partition()
BASKER_INLINE int Basker<Int, Entry, Exe_Space>::permute ( BASKER_MATRIX &M, INT_1DARRAY row, INT_1DARRAY col ) { permute_col(M,col); permute_row(M,row); return 0; }//end permute(int, int)
BASKER_INLINE int Basker<Int,Entry, Exe_Space>::find_btf(BASKER_MATRIX &M) { Int nblks = 0; strong_component(M,nblks,order_btf_array,btf_tabs); btf_flag = BASKER_TRUE; #ifdef BASKER_DEBUG_ORDER_BTF printf("BTF nblks returned: %d \n", nblks); BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS"); #endif #ifdef BASKER_DEBUG_ORDER_BTF if(nblks<2) { printf("BTF did not find enough blks\n"); } #endif #ifdef BASKER_DEBUG_ORDER_BTF /* printf("\nBTF perm: \n"); for(Int i=0; i <M.nrow; i++) { printf("%d, ", order_btf_array(i)); //printf("%d, ", btf_perm(i)); } */ printf("\n\nBTF tabs: \n"); for(Int i=0; i < nblks+1; i++) { printf("%d, ", btf_tabs(i)); } printf("\n"); #endif permute_col(M, order_btf_array); permute_row(M, order_btf_array); break_into_parts(M, nblks, btf_tabs); btf_nblks = nblks; //#ifdef BASKER_DEBUG_ORDER_BTF printf("------------BTF CUT: %d --------------\n", btf_tabs(btf_tabs_offset)); //#endif return 0; }//end find BTF
BASKER_INLINE int Basker<Int,Entry,Exe_Space>::btf_order() { //1. Matching ordering on whole matrix //currently finds matching and permutes //found bottle-neck to work best with circuit problems sort_matrix(A); //printMTX("A_nonmatch.mtx", A); match_ordering(0); //printf("DEBUG1: done match\n"); //for debuging sort_matrix(A); //printMTX("A_match.mtx", A); //2. BTF ordering on whole matrix // Gets estimate of work on all blocks //currently finds btf-hybrid and permutes //A -> [BTF_A, BTF_C; 0 , BTF B] printf("outter num_threads:%d \n", num_threads); MALLOC_INT_1DARRAY(btf_schedule, num_threads+1); init_value(btf_schedule, num_threads+1, 0); find_btf(A); if(btf_tabs_offset != 0) { // printf("A/B block stuff called\n"); //3. ND on BTF_A //currently finds ND and permute BTF_A //Would like to change so finds permuation, //and move into 2D-Structure //printMTX("A_BTF_FROM_A.mtx", BTF_A); sort_matrix(BTF_A); scotch_partition(BTF_A); //need to do a row perm on BTF_B too if(btf_nblks > 1) { permute_row(BTF_B, part_tree.permtab); } //needed because moving into 2D-Structure, //assumes sorted columns sort_matrix(BTF_A); if(btf_nblks > 1) { sort_matrix(BTF_B); sort_matrix(BTF_C); } //For debug //printMTX("A_BTF_PART_AFTER.mtx", BTF_A); //4. Init tree structure //This reduces the ND ordering into that fits, //thread counts init_tree_thread(); //5. Permute BTF_A //Constrained symamd on A INT_1DARRAY cmember; MALLOC_INT_1DARRAY(cmember, BTF_A.ncol+1); init_value(cmember,BTF_A.ncol+1,(Int) 0); for(Int i = 0; i < tree.nblks; ++i) { for(Int j = tree.col_tabs(i); j < tree.col_tabs(i+1); ++j) { cmember(j) = i; } } //INT_1DARRAY csymamd_perm = order_csym_array; MALLOC_INT_1DARRAY(order_csym_array, BTF_A.ncol+1); //MALLOC_INT_1DARRAY(csymamd_perm, BTF_A.ncol+1); init_value(order_csym_array, BTF_A.ncol+1,(Int) 0); //init_value(csymamd_perm, BTF_A.ncol+1,(Int) 0); csymamd_order(BTF_A, order_csym_array, cmember); //csymamd_order(BTF_A, csymamd_perm, cmember); //permute(BTF_A, csymamd_perm, csymamd_perm); permute_col(BTF_A, order_csym_array); sort_matrix(BTF_A); permute_row(BTF_A, order_csym_array); sort_matrix(BTF_A); //printMTX("A_BTF_AMD.mtx", BTF_A); if(btf_nblks > 1) { permute_row(BTF_B, order_csym_array); sort_matrix(BTF_B); //printMTX("B_BTF_AMD.mtx", BTF_B); sort_matrix(BTF_C); //printMTX("C_BTF_AMD.mtx", BTF_C); } //6. Move to 2D Structure //finds the shapes for both view and submatrices, //need to be changed over to just submatrices matrix_to_views_2D(BTF_A); //finds the starting point of A for submatrices find_2D_convert(BTF_A); //now we can fill submatrices #ifdef BASKER_KOKKOS kokkos_order_init_2D<Int,Entry,Exe_Space> iO(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); Kokkos::fence(); #else //Comeback #endif //printMTX("BTF_A.mtx", BTF_A); }//if btf_tab_offset == 0 if(btf_nblks > 1) { sort_matrix(BTF_C); //printMTX("C_TEST.mtx", BTF_C); //Permute C MALLOC_INT_1DARRAY(order_c_csym_array, BTF_C.ncol+1); init_value(order_c_csym_array, BTF_C.ncol+1,(Int) 0); printf("BEFORE \n"); //csymamd_order(BTF_C, order_c_csym_array, cmember); blk_amd(BTF_C, order_c_csym_array); printf("After perm\n"); permute_col(BTF_C, order_c_csym_array); sort_matrix(BTF_C); permute_row(BTF_C, order_c_csym_array); sort_matrix(BTF_C); if(btf_tabs_offset != 0) { permute_col(BTF_B, order_c_csym_array); sort_matrix(BTF_B); //printMTX("BTF_B.mtx", BTF_B); } //printMTX("BTF_C.mtx", BTF_C); } //printf("Done with ordering\n"); return 0; }//end btf_order
BASKER_INLINE int Basker<Int,Entry, Exe_Space>::find_btf2 ( BASKER_MATRIX &M ) { Int nblks = 0; strong_component(M,nblks,order_btf_array,btf_tabs); btf_nblks = nblks; btf_flag = BASKER_TRUE; //#ifdef BASKER_DEBUG_ORDER_BTF printf("BTF nblks returned: %d \n", nblks); //BASKER_ASSERT(nblks>1, "NOT ENOUGH BTF BLOCKS"); //#endif #ifdef BASKER_DEBUG_ORDER_BTF if(nblks<2) { printf("BTF did not find enough blks\n"); } #endif //#ifdef BASKER_DEBUG_ORDER_BTF /* printf("\nBTF perm: \n"); for(Int i=0; i <M.nrow; i++) { printf("%d, ", order_btf_array(i)); //printf("%d, ", btf_perm(i)); } */ printf("num_threads: %d \n", num_threads); printf("\n\nBTF tabs: \n"); for(Int i=0; i < nblks+1; i++) { printf("%d, ", btf_tabs(i)); } printf("\n"); // #endif permute_col(M, order_btf_array); permute_row(M, order_btf_array); MALLOC_INT_1DARRAY(order_blk_amd_array, M.ncol); init_value(order_blk_amd_array, M.ncol, (Int)0); MALLOC_INT_1DARRAY(btf_blk_nnz, nblks+1); init_value(btf_blk_nnz, nblks+1, (Int) 0); MALLOC_INT_1DARRAY(btf_blk_work, nblks+1); init_value(btf_blk_work, nblks+1, (Int) 0); //Find AMD blk ordering, get nnz, and get work btf_blk_amd( M, order_blk_amd_array, btf_blk_nnz, btf_blk_work); #ifdef BASKER_DEBUG_ORDER_BTF printf("blk_perm:\n"); for(Int i = 0; i < M.ncol; i++) { printf("(%d,%d) ", i, order_blk_amd_array(i)); } printf("\n"); printf("id/blk_size/blk_nnz/work: \n"); for(Int i = 0; i < nblks; i++) { printf("(%d, %d, %d, %d) ", i, btf_tabs(i+1)-btf_tabs(i), btf_blk_nnz(i), btf_blk_work(i)); } printf("\n"); #endif //printMTX("A_BEFORE.mtx", M); //printVec("AMD.txt", order_blk_amd_array, M.ncol); permute_col(M, order_blk_amd_array); permute_row(M, order_blk_amd_array); sort_matrix(M); //changed col to row, error. //print to see issue //printMTX("A_AMD.mtx", M); break_into_parts2(M, nblks, btf_tabs); //find schedule find_btf_schedule(M, nblks, btf_tabs); #ifdef BASKER_DEBUG_ORDER_BTF printf("------------BTF CUT: %d --------------\n", btf_tabs(btf_tabs_offset)); #endif return 0; }//end find BTF(nnz)